]> git.eshelyaron.com Git - emacs.git/commitdiff
* xml.el: Implement XML parameter entities.
authorChong Yidong <cyd@gnu.org>
Sat, 30 Jun 2012 11:33:22 +0000 (19:33 +0800)
committerChong Yidong <cyd@gnu.org>
Sat, 30 Jun 2012 11:33:22 +0000 (19:33 +0800)
(xml-parameter-entity-alist): New variable.
(xml-parse-region, xml-parse-fragment): Preserve previous values
of xml-entity-alist and xml-parameter-entity-alist, so that
repeated calls on different documents do not change them.
(xml-parse-tag): Fix doctype regexp.
(xml--entity-replacement-text): New function.
(xml-parse-dtd): Use it.  Don't handle system entities; doing that
properly requires url retrieval which is unimplemented.
(xml-escape-string): Doc fix.

lisp/ChangeLog
lisp/xml.el

index f2fa5a37ac7ae227326260028367b6d5ec290e14..dddfce0414ca7f70abed7b4b936744746bde9ea2 100644 (file)
@@ -1,3 +1,16 @@
+2012-06-30  Chong Yidong  <cyd@gnu.org>
+
+       * xml.el: Implement XML parameter entities.
+       (xml-parameter-entity-alist): New variable.
+       (xml-parse-region, xml-parse-fragment): Preserve previous values
+       of xml-entity-alist and xml-parameter-entity-alist, so that
+       repeated calls on different documents do not change them.
+       (xml-parse-tag): Fix doctype regexp.
+       (xml--entity-replacement-text): New function.
+       (xml-parse-dtd): Use it.  Don't handle system entities; doing that
+       properly requires url retrieval which is unimplemented.
+       (xml-escape-string): Doc fix.
+
 2012-06-30  Stefan Monnier  <monnier@iro.umontreal.ca>
 
        * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2.
index d1e824c4ece0f6dc8ebfd13c8b9065f915016994..f135bdfabe42c0615b71ff0d9c4297deef6337ff 100644 (file)
     ("apos" . "'")
     ("quot" . "\"")
     ("amp"  . "&"))
-  "The defined entities.  Entities are added to this when the DTD is parsed.")
+  "Alist of defined XML entities.")
+
+(defvar xml-parameter-entity-alist nil
+  "Alist of defined XML parametric entities.")
 
 (defvar xml-sub-parser nil
-  "Dynamically set this to a non-nil value if you want to parse an XML fragment.")
+  "Non-nil when the XML parser is parsing an XML fragment.")
 
 (defvar xml-validating-parser nil
   "Set to non-nil to get validity checking.")
@@ -308,6 +311,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded."
   ;; specs DTRT.
   (with-syntax-table (standard-syntax-table)
     (let ((case-fold-search nil)       ; XML is case-sensitive.
+         ;; Prevent entity definitions from changing the defaults
+         (xml-entity-alist xml-entity-alist)
+         (xml-parameter-entity-alist xml-parameter-entity-alist)
          xml result dtd)
       (save-excursion
        (if buffer
@@ -366,6 +372,9 @@ specify that the name shouldn't be given a namespace."
 (defun xml-parse-fragment (&optional parse-dtd parse-ns)
   "Parse xml-like fragments."
   (let ((xml-sub-parser t)
+       ;; Prevent entity definitions from changing the defaults
+       (xml-entity-alist xml-entity-alist)
+       (xml-parameter-entity-alist xml-parameter-entity-alist)
        children)
     (while (not (eobp))
       (let ((bit (xml-parse-tag
@@ -413,7 +422,7 @@ Returns one of:
         (buffer-substring-no-properties pos (match-beginning 0))
         (xml-parse-string))))
      ;;  DTD for the document
-     ((looking-at "<!DOCTYPE")
+     ((looking-at "<!DOCTYPE[ \t\n\r]")
       (let ((dtd (xml-parse-dtd parse-ns)))
        (skip-syntax-forward " ")
        (if xml-validating-parser
@@ -580,11 +589,11 @@ This follows the rule [28] in the XML specifications."
   ;;  Get the name of the document
   (looking-at xml-name-regexp)
   (let ((dtd (list (match-string-no-properties 0) 'dtd))
-       type element end-pos)
+       (xml-parameter-entity-alist xml-parameter-entity-alist))
     (goto-char (match-end 0))
-
     (skip-syntax-forward " ")
-    ;; XML [75]
+
+    ;; External subset (XML [75])
     (cond ((looking-at "PUBLIC\\s-+")
           (goto-char (match-end 0))
           (unless (or (re-search-forward
@@ -607,119 +616,137 @@ This follows the rule [28] in the XML specifications."
             (error "XML: Missing System ID"))
           (push (list (match-string-no-properties 1) 'system) dtd)))
     (skip-syntax-forward " ")
-    (if (eq ?> (char-after))
-       (forward-char)
-      (if (not (eq (char-after) ?\[))
-         (error "XML: Bad DTD")
+
+    (if (eq (char-after) ?>)
+
+       ;; No internal subset
        (forward-char)
-       ;;  Parse the rest of the DTD
-       ;;  Fixme: Deal with NOTATION, PIs.
-       (while (not (looking-at "\\s-*\\]"))
-         (skip-syntax-forward " ")
-         (cond
-
-          ;;  Translation of rule [45] of XML specifications
-          ((looking-at
-            "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
-
-           (setq element (match-string-no-properties 1)
-                 type    (match-string-no-properties 2))
-           (setq end-pos (match-end 0))
 
-           ;;  Translation of rule [46] of XML specifications
+      ;; Internal subset (XML [28b])
+      (unless (eq (char-after) ?\[)
+       (error "XML: Bad DTD"))
+      (forward-char)
+
+      ;; Parse the rest of the DTD
+      ;; Fixme: Deal with NOTATION, PIs.
+      (while (not (looking-at "\\s-*\\]"))
+       (skip-syntax-forward " ")
+       (cond
+        ;; Element declaration [45]:
+        ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+         (let ((element (match-string-no-properties 1))
+               (type    (match-string-no-properties 2))
+               (end-pos (match-end 0)))
+           ;; Translation of rule [46] of XML specifications
            (cond
-            ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration
+            ((string-match "^EMPTY[ \t\n\r]*$" type)       ; empty declaration
              (setq type 'empty))
-            ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
+            ((string-match "^ANY[ \t\n\r]*$" type)         ; any type of contents
              (setq type 'any))
-            ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
+            ((string-match "^(\\(.*\\))[ \t\n\r]*$" type)  ; children ([47])
              (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
-            ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
+            ((string-match "^%[^;]+;[ \t\n\r]*$" type)   ; substitution
              nil)
-            (t
-             (if xml-validating-parser
-                 (error "XML: (Validity) Invalid element type in the DTD"))))
+            (xml-validating-parser
+             (error "XML: (Validity) Invalid element type in the DTD")))
 
-           ;;  rule [45]: the element declaration must be unique
-           (if (and (assoc element dtd)
-                    xml-validating-parser)
-               (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)"
-                      element))
+           ;; rule [45]: the element declaration must be unique
+           (and (assoc element dtd)
+                xml-validating-parser
+                (error "XML: (Validity) DTD element declarations must be unique (<%s>)"
+                       element))
 
            ;;  Store the element in the DTD
            (push (list element type) dtd)
-           (goto-char end-pos))
-
-          ;; Translation of rule [52] of XML specifications
-          ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
-                               "\\)[ \t\n\r]*\\(" xml-att-def-re
-                               "\\)*[ \t\n\r]*>"))
-
-           ;; We don't do anything with ATTLIST currently
-           (goto-char (match-end 0)))
-
-          ((looking-at "<!--")
-           (search-forward "-->"))
-          ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
-                               "\\)[ \t\n\r]*\\(" xml-entity-value-re
-                               "\\)[ \t\n\r]*>"))
-           (let ((name  (match-string-no-properties 1))
-                 (value (substring (match-string-no-properties 2) 1
-                                   (- (length (match-string-no-properties 2)) 1))))
-             (goto-char (match-end 0))
-             (setq xml-entity-alist
-                   (append xml-entity-alist
-                           (list (cons name
-                                       (with-temp-buffer
-                                         (insert value)
-                                         (goto-char (point-min))
-                                         (xml-parse-fragment
-                                          xml-validating-parser
-                                          parse-ns))))))))
-          ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
-                                   "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
-                                   "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
-               (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
-                                   "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
-                                   "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
-                                   "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
-                                   "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
-                                   "[ \t\n\r]*>")))
-           (let ((name  (match-string-no-properties 1))
-                 (file  (substring (match-string-no-properties 2) 1
-                                   (- (length (match-string-no-properties 2)) 1))))
-             (goto-char (match-end 0))
-             (setq xml-entity-alist
-                   (append xml-entity-alist
-                           (list (cons name (with-temp-buffer
-                                              (insert-file-contents file)
-                                              (goto-char (point-min))
-                                              (xml-parse-fragment
-                                               xml-validating-parser
-                                               parse-ns))))))))
-          ;; skip parameter entity declarations
-          ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" xml-name-re
-                                   "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
-                                   "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
-               (looking-at (concat "<!ENTITY[ \t\n\r]+"
-                                   "%[ \t\n\r]+"
-                                   "\\(" xml-name-re "\\)[ \t\n\r]+"
-                                   "PUBLIC[ \t\n\r]+"
-                                   "\\(\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
-                                   "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'\\)[ \t\n\r]+"
-                                   "\\(\"[^\"]+\"\\|'[^']+'\\)"
-                                   "[ \t\n\r]*>")))
-           (goto-char (match-end 0)))
-          ;; skip parameter entities
-          ((looking-at (concat "%" xml-name-re ";"))
-           (goto-char (match-end 0)))
-          (t
-           (when xml-validating-parser
-             (error "XML: (Validity) Invalid DTD item"))))))
+           (goto-char end-pos)))
+
+        ;; Attribute-list declaration [52] (currently unsupported):
+        ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
+                             "\\)[ \t\n\r]*\\(" xml-att-def-re
+                             "\\)*[ \t\n\r]*>"))
+         (goto-char (match-end 0)))
+
+        ;; Comments (skip to end):
+        ((looking-at "<!--")
+         (search-forward "-->"))
+
+        ;; Internal entity declarations:
+        ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                             xml-name-re "\\)[ \t\n\r]*\\("
+                             xml-entity-value-re "\\)[ \t\n\r]*>"))
+         (let* ((name (prog1 (match-string-no-properties 2)
+                        (goto-char (match-end 0))))
+                (alist (if (match-string 1)
+                           'xml-parameter-entity-alist
+                         'xml-entity-alist))
+                ;; Retrieve the deplacement text:
+                (value (xml--entity-replacement-text
+                        ;; Entity value, sans quotation marks:
+                        (substring (match-string-no-properties 3) 1 -1))))
+           ;; If the same entity is declared more than once, the
+           ;; first declaration is binding.
+           (unless (assoc name (symbol-value alist))
+             (set alist (cons (cons name value) (symbol-value alist))))))
+
+        ;; External entity declarations (currently unsupported):
+        ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                 xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
+                                 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
+             (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                 xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
+                                 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
+                                 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
+                                 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
+                                 "[ \t\n\r]*>")))
+         (goto-char (match-end 0)))
+
+        ;; Parameter entity:
+        ((looking-at (concat "%\\(" xml-name-re "\\);"))
+         (goto-char (match-end 0))
+         (let* ((entity (match-string 1))
+                (end (point-marker))
+                (elt (assoc entity xml-parameter-entity-alist)))
+           (when elt
+             (replace-match (cdr elt) t t)
+             (goto-char end))))
+
+        ;; Anything else:
+        (xml-validating-parser
+         (error "XML: (Validity) Invalid DTD item"))))
+
       (if (looking-at "\\s-*]>")
          (goto-char (match-end 0))))
     (nreverse dtd)))
 
+(defun xml--entity-replacement-text (string)
+  "Return the replacement text for the entity value STRING.
+The replacement text is obtained by replacing character
+references and parameter-entity references."
+  (let ((ref-re (eval-when-compile
+                 (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\("
+                         xml-name-re "\\)\\);")))
+       children)
+    (while (string-match ref-re string)
+      (push (substring string 0 (match-beginning 0)) children)
+      (let ((remainder (substring string (match-end 0)))
+           ref val)
+       (cond ((setq ref (match-string 1 string))
+              ;; Decimal character reference
+              (setq val (decode-char 'ucs (string-to-number ref)))
+              (if val (push (string val) children)))
+             ;; Hexadecimal character reference
+             ((setq ref (match-string 2 string))
+              (setq val (decode-char 'ucs (string-to-number ref 16)))
+              (if val (push (string val) children)))
+             ;; Parameter entity reference
+             ((setq ref (match-string 3 string))
+              (setq val (assoc ref xml-parameter-entity-alist))
+              (if val
+                  (push (cdr val) children)
+                (push (concat "%" ref ";") children))))
+       (setq string remainder)))
+    (mapconcat 'identity (nreverse (cons string children)) "")))
+
 (defun xml-parse-elem-type (string)
   "Convert element type STRING into a Lisp structure."
 
@@ -864,15 +891,12 @@ The first line is indented with the optional INDENT-STRING."
 (defalias 'xml-print 'xml-debug-print)
 
 (defun xml-escape-string (string)
-  "Return the string with entity substitutions made from
-xml-entity-alist."
+  "Return STRING with entity substitutions made from `xml-entity-alist'."
   (mapconcat (lambda (byte)
                (let ((char (char-to-string byte)))
                  (if (rassoc char xml-entity-alist)
                      (concat "&" (car (rassoc char xml-entity-alist)) ";")
                    char)))
-             ;; This differs from the non-unicode branch.  Just
-             ;; grabbing the string works here.
              string ""))
 
 (defun xml-debug-print-internal (xml indent-string)