]> git.eshelyaron.com Git - emacs.git/commitdiff
Improve xml parameter entity parsing, and add a new ERT test.
authorChong Yidong <cyd@gnu.org>
Sun, 1 Jul 2012 07:17:05 +0000 (15:17 +0800)
committerChong Yidong <cyd@gnu.org>
Sun, 1 Jul 2012 07:17:05 +0000 (15:17 +0800)
* test/automated/xml-parse-tests.el: New file.

* lisp/xml.el (xml--parse-buffer): New function.  Move most of
xml-parse-region here.
(xml-parse-region): Copy region into a temporary buffer, since
parameter entity substitution requires changing buffer contents.
Use xml--parse-buffer.
(xml-parse-file): Use xml--parse-buffer.
(xml-parse-dtd): Make parameter entity substitution work right.

lisp/ChangeLog
lisp/xml.el
test/ChangeLog
test/automated/xml-parse-tests.el [new file with mode: 0644]

index 0cae8a88e77743a850c8212839ce47121f7b1d08..3156dc412e32431be5793784cf9b6245298c12d5 100644 (file)
@@ -1,3 +1,13 @@
+2012-07-01  Chong Yidong  <cyd@gnu.org>
+
+       * xml.el (xml--parse-buffer): New function.  Move most of
+       xml-parse-region here.
+       (xml-parse-region): Copy region into a temporary buffer, since
+       parameter entity substitution requires changing buffer contents.
+       Use xml--parse-buffer.
+       (xml-parse-file): Use xml--parse-buffer.
+       (xml-parse-dtd): Make parameter entity substitution work right.
+
 2012-06-30  Glenn Morris  <rgm@gnu.org>
 
        * comint.el (follow-comint-scroll-to-bottom): Fix declaration.
index a9e1b2c283063de9bbce86ea87f302b93387229f..841e19a174a0322e5b280f036ce22f77407ae373 100644 (file)
@@ -165,23 +165,12 @@ See also `xml-get-attribute-or-nil'."
 ;;;###autoload
 (defun xml-parse-file (file &optional parse-dtd parse-ns)
   "Parse the well-formed XML file FILE.
-If FILE is already visited, use its buffer and don't kill it.
-Returns the top node with all its children.
+Return the top node with all its children.
 If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
 If PARSE-NS is non-nil, then QNAMES are expanded."
-  (if (get-file-buffer file)
-      (with-current-buffer (get-file-buffer file)
-       (save-excursion
-         (xml-parse-region (point-min)
-                           (point-max)
-                           (current-buffer)
-                           parse-dtd parse-ns)))
-    (with-temp-buffer
-      (insert-file-contents file)
-      (xml-parse-region (point-min)
-                       (point-max)
-                       (current-buffer)
-                       parse-dtd parse-ns))))
+  (with-temp-buffer
+    (insert-file-contents file)
+    (xml--parse-buffer parse-dtd parse-ns)))
 
 (eval-and-compile
 (let* ((start-chars (concat "[:alpha:]:_"))
@@ -320,42 +309,44 @@ and returned as the first element of the list.
 If PARSE-NS is non-nil, then QNAMES are expanded."
   ;; Use fixed syntax table to ensure regexp char classes and syntax
   ;; specs DTRT.
+  (unless buffer
+    (setq buffer (current-buffer)))
+  (with-temp-buffer
+    (insert-buffer-substring buffer beg end)
+    (xml--parse-buffer parse-dtd parse-ns)))
+
+(defun xml--parse-buffer (parse-dtd parse-ns)
   (with-syntax-table (standard-syntax-table)
     (let ((case-fold-search nil)       ; XML is case-sensitive.
          ;; Prevent entity definitions from changing the defaults
          (xml-entity-alist xml-entity-alist)
          (xml-parameter-entity-alist xml-parameter-entity-alist)
          xml result dtd)
-      (save-excursion
-       (if buffer
-           (set-buffer buffer))
-       (save-restriction
-         (narrow-to-region beg end)
-         (goto-char (point-min))
-         (while (not (eobp))
-           (if (search-forward "<" nil t)
-               (progn
-                 (forward-char -1)
-                 (setq result (xml-parse-tag parse-dtd parse-ns))
-                 (cond
-                  ((null result)
-                   ;; Not looking at an xml start tag.
-                   (unless (eobp)
-                     (forward-char 1)))
-                  ((and xml (not xml-sub-parser))
-                   ;; Translation of rule [1] of XML specifications
-                   (error "XML: (Not Well-Formed) Only one root tag allowed"))
-                  ((and (listp (car result))
-                        parse-dtd)
-                   (setq dtd (car result))
-                   (if (cdr result)    ; possible leading comment
-                       (add-to-list 'xml (cdr result))))
-                  (t
-                   (add-to-list 'xml result))))
-             (goto-char (point-max))))
-         (if parse-dtd
-             (cons dtd (nreverse xml))
-           (nreverse xml)))))))
+      (goto-char (point-min))
+      (while (not (eobp))
+       (if (search-forward "<" nil t)
+           (progn
+             (forward-char -1)
+             (setq result (xml-parse-tag parse-dtd parse-ns))
+             (cond
+              ((null result)
+               ;; Not looking at an xml start tag.
+               (unless (eobp)
+                 (forward-char 1)))
+              ((and xml (not xml-sub-parser))
+               ;; Translation of rule [1] of XML specifications
+               (error "XML: (Not Well-Formed) Only one root tag allowed"))
+              ((and (listp (car result))
+                    parse-dtd)
+               (setq dtd (car result))
+               (if (cdr result)        ; possible leading comment
+                   (add-to-list 'xml (cdr result))))
+              (t
+               (add-to-list 'xml result))))
+         (goto-char (point-max))))
+      (if parse-dtd
+         (cons dtd (nreverse xml))
+       (nreverse xml)))))
 
 (defun xml-maybe-do-ns (name default xml-ns)
   "Perform any namespace expansion.
@@ -600,7 +591,10 @@ This follows the rule [28] in the XML specifications."
   ;;  Get the name of the document
   (looking-at xml-name-regexp)
   (let ((dtd (list (match-string-no-properties 0) 'dtd))
-       (xml-parameter-entity-alist xml-parameter-entity-alist))
+       (xml-parameter-entity-alist xml-parameter-entity-alist)
+       (parameter-entity-re (eval-when-compile
+                              (concat "%\\(" xml-name-re "\\);")))
+       next-parameter-entity)
     (goto-char (match-end 0))
     (skip-syntax-forward " ")
 
@@ -638,13 +632,28 @@ This follows the rule [28] in the XML specifications."
        (error "XML: Bad DTD"))
       (forward-char)
 
+      ;; [2.8]: "markup declarations may be made up in whole or in
+      ;; part of the replacement text of parameter entities."
+
+      ;; Since parameter entities are valid only within the DTD, we
+      ;; first search for the position of the next possible parameter
+      ;; entity.  Then, search for the next DTD element; if it ends
+      ;; before the next parameter entity, expand the parameter entity
+      ;; and try again.
+      (setq next-parameter-entity
+           (save-excursion
+             (if (re-search-forward parameter-entity-re nil t)
+                 (match-beginning 0))))
+
       ;; Parse the rest of the DTD
       ;; Fixme: Deal with NOTATION, PIs.
       (while (not (looking-at "\\s-*\\]"))
        (skip-syntax-forward " ")
        (cond
         ;; Element declaration [45]:
-        ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+        ((and (looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+              (or (null next-parameter-entity)
+                  (<= (match-end 0) next-parameter-entity)))
          (let ((element (match-string-no-properties 1))
                (type    (match-string-no-properties 2))
                (end-pos (match-end 0)))
@@ -672,19 +681,31 @@ This follows the rule [28] in the XML specifications."
            (goto-char end-pos)))
 
         ;; Attribute-list declaration [52] (currently unsupported):
-        ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
-                             "\\)[ \t\n\r]*\\(" xml-att-def-re
-                             "\\)*[ \t\n\r]*>"))
+        ((and (looking-at (eval-when-compile
+                            (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
+                                    "\\)[ \t\n\r]*\\(" xml-att-def-re
+                                    "\\)*[ \t\n\r]*>")))
+              (or (null next-parameter-entity)
+                  (<= (match-end 0) next-parameter-entity)))
          (goto-char (match-end 0)))
 
-        ;; Comments (skip to end):
+        ;; Comments (skip to end, ignoring parameter entity):
         ((looking-at "<!--")
-         (search-forward "-->"))
+         (search-forward "-->")
+         (and next-parameter-entity
+              (> (point) next-parameter-entity)
+              (setq next-parameter-entity
+                    (save-excursion
+                      (if (re-search-forward parameter-entity-re nil t)
+                          (match-beginning 0))))))
 
         ;; Internal entity declarations:
-        ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
-                             xml-name-re "\\)[ \t\n\r]*\\("
-                             xml-entity-value-re "\\)[ \t\n\r]*>"))
+        ((and (looking-at (eval-when-compile
+                            (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                    xml-name-re "\\)[ \t\n\r]*\\("
+                                    xml-entity-value-re "\\)[ \t\n\r]*>")))
+              (or (null next-parameter-entity)
+                  (<= (match-end 0) next-parameter-entity)))
          (let* ((name (prog1 (match-string-no-properties 2)
                         (goto-char (match-end 0))))
                 (alist (if (match-string 1)
@@ -700,26 +721,39 @@ This follows the rule [28] in the XML specifications."
              (set alist (cons (cons name value) (symbol-value alist))))))
 
         ;; External entity declarations (currently unsupported):
-        ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
-                                 xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
-                                 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
-             (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
-                                 xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
-                                 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
-                                 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
-                                 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
-                                 "[ \t\n\r]*>")))
+        ((and (or (looking-at (eval-when-compile
+                                (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                        xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
+                                        "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")))
+                  (looking-at (eval-when-compile
+                                (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
+                                        xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
+                                        "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
+                                        "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
+                                        "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
+                                        "[ \t\n\r]*>"))))
+              (or (null next-parameter-entity)
+                  (<= (match-end 0) next-parameter-entity)))
          (goto-char (match-end 0)))
 
-        ;; Parameter entity:
-        ((looking-at (concat "%\\(" xml-name-re "\\);"))
-         (goto-char (match-end 0))
-         (let* ((entity (match-string 1))
-                (end (point-marker))
-                (elt (assoc entity xml-parameter-entity-alist)))
-           (when elt
-             (replace-match (cdr elt) t t)
-             (goto-char end))))
+        ;; If a parameter entity is in the way, expand it.
+        (next-parameter-entity
+         (save-excursion
+           (goto-char next-parameter-entity)
+           (unless (looking-at parameter-entity-re)
+             (error "XML: Internal error"))
+           (let* ((entity (match-string 1))
+                  (beg    (point-marker))
+                  (elt    (assoc entity xml-parameter-entity-alist)))
+             (if elt
+                 (progn
+                   (replace-match (cdr elt) t t)
+                   ;; The replacement can itself be a parameter entity.
+                   (goto-char next-parameter-entity))
+               (goto-char (match-end 0))))
+           (setq next-parameter-entity
+                 (if (re-search-forward parameter-entity-re nil t)
+                     (match-beginning 0)))))
 
         ;; Anything else:
         (xml-validating-parser
index 45fc70e044056fe4e30defaaadd706a37576da43..d9d9bc5a9fad0d6371b8a8ccefb98187aabd2433 100644 (file)
@@ -1,3 +1,7 @@
+2012-07-01  Chong Yidong  <cyd@gnu.org>
+
+       * automated/xml-parse-tests.el: New file.
+
 2012-06-27  Stefan Monnier  <monnier@iro.umontreal.ca>
 
        * automated/ert-x-tests.el (ert-test-run-tests-interactively-2):
diff --git a/test/automated/xml-parse-tests.el b/test/automated/xml-parse-tests.el
new file mode 100644 (file)
index 0000000..8e8ef29
--- /dev/null
@@ -0,0 +1,57 @@
+;;; xml-parse-tests.el --- Test suite for XML parsing.
+
+;; Copyright (C) 2012 Free Software Foundation, Inc.
+
+;; Author: Chong Yidong <cyd@stupidchicken.com>
+;; Keywords:       internal
+;; Human-Keywords: internal
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; Type M-x test-xml-parse RET to generate the test buffer.
+
+;;; Code:
+
+(require 'xml)
+
+(defvar xml-parse-tests--data
+  '(;; General entity substitution
+    ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
+     ((foo ((a . "b")) (bar nil "AbC;"))))
+    ;; Parameter entity substitution
+    ("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
+     ((foo ((a . "b")) (bar nil "AbC;"))))
+    ;; Tricky parameter entity substitution (like XML spec Appendix D)
+    ("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '&#37;zz;'><!ENTITY % zz '&#60;!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" .
+     ((foo nil "AbC"))))
+  "Alist of XML strings and their expected parse trees.")
+
+(ert-deftest xml-parse-tests ()
+  "Test XML parsing."
+  (with-temp-buffer
+    (dolist (test xml-parse-tests--data)
+      (erase-buffer)
+      (insert (car test))
+      (should (equal (cdr test)
+                    (xml-parse-region (point-min) (point-max)))))))
+
+;; Local Variables:
+;; no-byte-compile: t
+;; End:
+
+;;; xml-parse-tests.el ends here.