From 7f3fbd5d73bff96d42ef087ec87b662005242842 Mon Sep 17 00:00:00 2001 From: Chong Yidong Date: Sat, 30 Jun 2012 19:33:22 +0800 Subject: [PATCH] * xml.el: Implement XML parameter entities. (xml-parameter-entity-alist): New variable. (xml-parse-region, xml-parse-fragment): Preserve previous values of xml-entity-alist and xml-parameter-entity-alist, so that repeated calls on different documents do not change them. (xml-parse-tag): Fix doctype regexp. (xml--entity-replacement-text): New function. (xml-parse-dtd): Use it. Don't handle system entities; doing that properly requires url retrieval which is unimplemented. (xml-escape-string): Doc fix. --- lisp/ChangeLog | 13 +++ lisp/xml.el | 240 +++++++++++++++++++++++++++---------------------- 2 files changed, 145 insertions(+), 108 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index f2fa5a37ac7..dddfce0414c 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,16 @@ +2012-06-30 Chong Yidong + + * xml.el: Implement XML parameter entities. + (xml-parameter-entity-alist): New variable. + (xml-parse-region, xml-parse-fragment): Preserve previous values + of xml-entity-alist and xml-parameter-entity-alist, so that + repeated calls on different documents do not change them. + (xml-parse-tag): Fix doctype regexp. + (xml--entity-replacement-text): New function. + (xml-parse-dtd): Use it. Don't handle system entities; doing that + properly requires url retrieval which is unimplemented. + (xml-escape-string): Doc fix. + 2012-06-30 Stefan Monnier * emacs-lisp/cl-lib.el (cl-pushnew): Use macroexp-let2. diff --git a/lisp/xml.el b/lisp/xml.el index d1e824c4ece..f135bdfabe4 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -95,10 +95,13 @@ ("apos" . "'") ("quot" . "\"") ("amp" . "&")) - "The defined entities. Entities are added to this when the DTD is parsed.") + "Alist of defined XML entities.") + +(defvar xml-parameter-entity-alist nil + "Alist of defined XML parametric entities.") (defvar xml-sub-parser nil - "Dynamically set this to a non-nil value if you want to parse an XML fragment.") + "Non-nil when the XML parser is parsing an XML fragment.") (defvar xml-validating-parser nil "Set to non-nil to get validity checking.") @@ -308,6 +311,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded." ;; specs DTRT. (with-syntax-table (standard-syntax-table) (let ((case-fold-search nil) ; XML is case-sensitive. + ;; Prevent entity definitions from changing the defaults + (xml-entity-alist xml-entity-alist) + (xml-parameter-entity-alist xml-parameter-entity-alist) xml result dtd) (save-excursion (if buffer @@ -366,6 +372,9 @@ specify that the name shouldn't be given a namespace." (defun xml-parse-fragment (&optional parse-dtd parse-ns) "Parse xml-like fragments." (let ((xml-sub-parser t) + ;; Prevent entity definitions from changing the defaults + (xml-entity-alist xml-entity-alist) + (xml-parameter-entity-alist xml-parameter-entity-alist) children) (while (not (eobp)) (let ((bit (xml-parse-tag @@ -413,7 +422,7 @@ Returns one of: (buffer-substring-no-properties pos (match-beginning 0)) (xml-parse-string)))) ;; DTD for the document - ((looking-at " (char-after)) - (forward-char) - (if (not (eq (char-after) ?\[)) - (error "XML: Bad DTD") + + (if (eq (char-after) ?>) + + ;; No internal subset (forward-char) - ;; Parse the rest of the DTD - ;; Fixme: Deal with NOTATION, PIs. - (while (not (looking-at "\\s-*\\]")) - (skip-syntax-forward " ") - (cond - - ;; Translation of rule [45] of XML specifications - ((looking-at - "]+\\)>") - - (setq element (match-string-no-properties 1) - type (match-string-no-properties 2)) - (setq end-pos (match-end 0)) - ;; Translation of rule [46] of XML specifications + ;; Internal subset (XML [28b]) + (unless (eq (char-after) ?\[) + (error "XML: Bad DTD")) + (forward-char) + + ;; Parse the rest of the DTD + ;; Fixme: Deal with NOTATION, PIs. + (while (not (looking-at "\\s-*\\]")) + (skip-syntax-forward " ") + (cond + ;; Element declaration [45]: + ((looking-at "]+\\)>") + (let ((element (match-string-no-properties 1)) + (type (match-string-no-properties 2)) + (end-pos (match-end 0))) + ;; Translation of rule [46] of XML specifications (cond - ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration + ((string-match "^EMPTY[ \t\n\r]*$" type) ; empty declaration (setq type 'empty)) - ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents + ((string-match "^ANY[ \t\n\r]*$" type) ; any type of contents (setq type 'any)) - ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) + ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) - ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution + ((string-match "^%[^;]+;[ \t\n\r]*$" type) ; substitution nil) - (t - (if xml-validating-parser - (error "XML: (Validity) Invalid element type in the DTD")))) + (xml-validating-parser + (error "XML: (Validity) Invalid element type in the DTD"))) - ;; rule [45]: the element declaration must be unique - (if (and (assoc element dtd) - xml-validating-parser) - (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)" - element)) + ;; rule [45]: the element declaration must be unique + (and (assoc element dtd) + xml-validating-parser + (error "XML: (Validity) DTD element declarations must be unique (<%s>)" + element)) ;; Store the element in the DTD (push (list element type) dtd) - (goto-char end-pos)) - - ;; Translation of rule [52] of XML specifications - ((looking-at (concat "")) - - ;; We don't do anything with ATTLIST currently - (goto-char (match-end 0))) - - ((looking-at "")) - ((looking-at (concat "")) - (let ((name (match-string-no-properties 1)) - (value (substring (match-string-no-properties 2) 1 - (- (length (match-string-no-properties 2)) 1)))) - (goto-char (match-end 0)) - (setq xml-entity-alist - (append xml-entity-alist - (list (cons name - (with-temp-buffer - (insert value) - (goto-char (point-min)) - (xml-parse-fragment - xml-validating-parser - parse-ns)))))))) - ((or (looking-at (concat "")) - (looking-at (concat ""))) - (let ((name (match-string-no-properties 1)) - (file (substring (match-string-no-properties 2) 1 - (- (length (match-string-no-properties 2)) 1)))) - (goto-char (match-end 0)) - (setq xml-entity-alist - (append xml-entity-alist - (list (cons name (with-temp-buffer - (insert-file-contents file) - (goto-char (point-min)) - (xml-parse-fragment - xml-validating-parser - parse-ns)))))))) - ;; skip parameter entity declarations - ((or (looking-at (concat "")) - (looking-at (concat ""))) - (goto-char (match-end 0))) - ;; skip parameter entities - ((looking-at (concat "%" xml-name-re ";")) - (goto-char (match-end 0))) - (t - (when xml-validating-parser - (error "XML: (Validity) Invalid DTD item")))))) + (goto-char end-pos))) + + ;; Attribute-list declaration [52] (currently unsupported): + ((looking-at (concat "")) + (goto-char (match-end 0))) + + ;; Comments (skip to end): + ((looking-at "")) + + ;; Internal entity declarations: + ((looking-at (concat "")) + (let* ((name (prog1 (match-string-no-properties 2) + (goto-char (match-end 0)))) + (alist (if (match-string 1) + 'xml-parameter-entity-alist + 'xml-entity-alist)) + ;; Retrieve the deplacement text: + (value (xml--entity-replacement-text + ;; Entity value, sans quotation marks: + (substring (match-string-no-properties 3) 1 -1)))) + ;; If the same entity is declared more than once, the + ;; first declaration is binding. + (unless (assoc name (symbol-value alist)) + (set alist (cons (cons name value) (symbol-value alist)))))) + + ;; External entity declarations (currently unsupported): + ((or (looking-at (concat "")) + (looking-at (concat ""))) + (goto-char (match-end 0))) + + ;; Parameter entity: + ((looking-at (concat "%\\(" xml-name-re "\\);")) + (goto-char (match-end 0)) + (let* ((entity (match-string 1)) + (end (point-marker)) + (elt (assoc entity xml-parameter-entity-alist))) + (when elt + (replace-match (cdr elt) t t) + (goto-char end)))) + + ;; Anything else: + (xml-validating-parser + (error "XML: (Validity) Invalid DTD item")))) + (if (looking-at "\\s-*]>") (goto-char (match-end 0)))) (nreverse dtd))) +(defun xml--entity-replacement-text (string) + "Return the replacement text for the entity value STRING. +The replacement text is obtained by replacing character +references and parameter-entity references." + (let ((ref-re (eval-when-compile + (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\(" + xml-name-re "\\)\\);"))) + children) + (while (string-match ref-re string) + (push (substring string 0 (match-beginning 0)) children) + (let ((remainder (substring string (match-end 0))) + ref val) + (cond ((setq ref (match-string 1 string)) + ;; Decimal character reference + (setq val (decode-char 'ucs (string-to-number ref))) + (if val (push (string val) children))) + ;; Hexadecimal character reference + ((setq ref (match-string 2 string)) + (setq val (decode-char 'ucs (string-to-number ref 16))) + (if val (push (string val) children))) + ;; Parameter entity reference + ((setq ref (match-string 3 string)) + (setq val (assoc ref xml-parameter-entity-alist)) + (if val + (push (cdr val) children) + (push (concat "%" ref ";") children)))) + (setq string remainder))) + (mapconcat 'identity (nreverse (cons string children)) ""))) + (defun xml-parse-elem-type (string) "Convert element type STRING into a Lisp structure." @@ -864,15 +891,12 @@ The first line is indented with the optional INDENT-STRING." (defalias 'xml-print 'xml-debug-print) (defun xml-escape-string (string) - "Return the string with entity substitutions made from -xml-entity-alist." + "Return STRING with entity substitutions made from `xml-entity-alist'." (mapconcat (lambda (byte) (let ((char (char-to-string byte))) (if (rassoc char xml-entity-alist) (concat "&" (car (rassoc char xml-entity-alist)) ";") char))) - ;; This differs from the non-unicode branch. Just - ;; grabbing the string works here. string "")) (defun xml-debug-print-internal (xml indent-string) -- 2.39.2