From 728df3d91cf2590d25999343b4f0daf417809828 Mon Sep 17 00:00:00 2001 From: Vinicius Jose Latorre Date: Mon, 5 Apr 2004 01:48:53 +0000 Subject: [PATCH] Parser for DTD (Data Type Definition for XML). --- lisp/progmodes/ebnf-dtd.el | 1350 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1350 insertions(+) create mode 100644 lisp/progmodes/ebnf-dtd.el diff --git a/lisp/progmodes/ebnf-dtd.el b/lisp/progmodes/ebnf-dtd.el new file mode 100644 index 00000000000..45c8abd0fad --- /dev/null +++ b/lisp/progmodes/ebnf-dtd.el @@ -0,0 +1,1350 @@ +;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML) + +;; Copyright (C) 2004 Free Sofware Foundation, Inc. + +;; Author: Vinicius Jose Latorre +;; Maintainer: Vinicius Jose Latorre +;; Time-stamp: <2004/04/04 21:50:16 vinicius> +;; Keywords: wp, ebnf, PostScript +;; Version: 1.0 + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; +;; This is part of ebnf2ps package. +;; +;; This package defines a parser for DTD (Data Type Description for XML). +;; +;; See ebnf2ps.el for documentation. +;; +;; +;; DTD Syntax +;; ---------- +;; +;; See the URLs: +;; `http://www.w3.org/TR/2004/REC-xml-20040204/' +;; (Extensible Markup Language (XML) 1.0 (Third Edition)) +;; `http://www.w3.org/TR/html40/' +;; (HTML 4.01 Specification) +;; `http://www.w3.org/TR/NOTE-html-970421' +;; (HTML DTD with support for Style Sheets) +;; +;; +;; /* Document */ +;; +;; document ::= prolog element Misc* +;; /* Note that *only* the prolog will be parsed */ +;; +;; +;; /* Characters */ +;; +;; Char ::= #x9 | #xA | #xD +;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] +;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ +;; +;; /* NOTE: +;; +;; Document authors are encouraged to avoid "compatibility characters", as +;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of +;; [Unicode3]). The characters defined in the following ranges are also +;; discouraged. They are either control characters or permanently undefined +;; Unicode characters: +;; +;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF], +;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF], +;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF], +;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF], +;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF], +;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF], +;; [#10FFFE-#x10FFFF]. */ +;; +;; +;; /* White Space */ +;; +;; S ::= (#x20 | #x9 | #xD | #xA)+ +;; +;; +;; /* Names and Tokens */ +;; +;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' +;; | CombiningChar | Extender +;; +;; Name ::= (Letter | '_' | ':') (NameChar)* +;; +;; Names ::= Name (#x20 Name)* +;; +;; Nmtoken ::= (NameChar)+ +;; +;; Nmtokens ::= Nmtoken (#x20 Nmtoken)* +;; +;; +;; /* Literals */ +;; +;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' +;; | "'" ([^%&'] | PEReference | Reference)* "'" +;; +;; AttValue ::= '"' ([^<&"] | Reference)* '"' +;; | "'" ([^<&'] | Reference)* "'" +;; +;; SystemLiteral ::= ('"' [^"]* '"') +;; | ("'" [^']* "'") +;; +;; PubidLiteral ::= '"' PubidChar* '"' +;; | "'" (PubidChar - "'")* "'" +;; +;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] +;; +;; /* NOTE: +;; +;; Although the EntityValue production allows the definition of a general +;; entity consisting of a single explicit < in the literal (e.g., ), it is strongly advised to avoid this practice since any +;; reference to that entity will cause a well-formedness error. */ +;; +;; +;; /* Character Data */ +;; +;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) +;; +;; +;; /* Comments */ +;; +;; Comment ::= '' +;; +;; +;; /* Processing Instructions */ +;; +;; PI ::= '' Char*)))? '?>' +;; +;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) +;; +;; +;; /* CDATA Sections */ +;; +;; CDSect ::= CDStart CData CDEnd +;; +;; CDStart ::= '' Char*)) +;; +;; CDEnd ::= ']]>' +;; +;; +;; /* Prolog */ +;; +;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? +;; +;; XMLDecl ::= '' +;; +;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') +;; +;; Eq ::= S? '=' S? +;; +;; VersionNum ::= '1.0' +;; +;; Misc ::= Comment | PI | S +;; +;; +;; /* Document Type Definition */ +;; +;; doctypedecl ::= '' +;; [VC: Root Element Type] +;; [WFC: External Subset] +;; +;; DeclSep ::= PEReference | S +;; [WFC: PE Between Declarations] +;; +;; intSubset ::= (markupdecl | DeclSep)* +;; +;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl +;; | NotationDecl | PI | Comment +;; [VC: Proper Declaration/PE Nesting] +;; [WFC: PEs in Internal Subset] +;; +;; +;; /* External Subset */ +;; +;; extSubset ::= TextDecl? extSubsetDecl +;; +;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* +;; +;; +;; /* Standalone Document Declaration */ +;; +;; SDDecl ::= S 'standalone' Eq +;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +;; [VC: Standalone Document Declaration] +;; +;; +;; /* Element */ +;; +;; element ::= EmptyElemTag | STag content ETag +;; [WFC: Element Type Match] +;; [VC: Element Valid] +;; +;; +;; /* Start-tag */ +;; +;; STag ::= '<' Name (S Attribute)* S? '>' +;; [WFC: Unique Att Spec] +;; +;; Attribute ::= Name Eq AttValue +;; [VC: Attribute Value Type] +;; [WFC: No External Entity References] +;; [WFC: No < in Attribute Values] +;; +;; +;; /* End-tag */ +;; +;; ETag ::= '' +;; +;; +;; /* Content of Elements */ +;; +;; content ::= CharData? +;; ((element | Reference | CDSect | PI | Comment) CharData?)* +;; +;; +;; /* Tags for Empty Elements */ +;; +;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' +;; [WFC: Unique Att Spec] +;; +;; +;; /* Element Type Declaration */ +;; +;; elementdecl ::= '' +;; [VC: Unique Element Type Declaration] +;; +;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children +;; +;; +;; /* Element-content Models */ +;; +;; children ::= (choice | seq) ('?' | '*' | '+')? +;; +;; cp ::= (Name | choice | seq) ('?' | '*' | '+')? +;; +;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' +;; [VC: Proper Group/PE Nesting] +;; +;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' +;; [VC: Proper Group/PE Nesting] +;; +;; +;; /* Mixed-content Declaration */ +;; +;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' +;; | '(' S? '#PCDATA' S? ')' +;; [VC: Proper Group/PE Nesting] +;; [VC: No Duplicate Types] +;; +;; +;; /* Attribute-list Declaration */ +;; +;; AttlistDecl ::= '' +;; +;; AttDef ::= S Name S AttType S DefaultDecl +;; +;; +;; /* Attribute Types */ +;; +;; AttType ::= StringType | TokenizedType | EnumeratedType +;; +;; StringType ::= 'CDATA' +;; +;; TokenizedType ::= 'ID' [VC: ID] +;; [VC: One ID per Element Type] +;; [VC: ID Attribute Default] +;; | 'IDREF' [VC: IDREF] +;; | 'IDREFS' [VC: IDREF] +;; | 'ENTITY' [VC: Entity Name] +;; | 'ENTITIES' [VC: Entity Name] +;; | 'NMTOKEN' [VC: Name Token] +;; | 'NMTOKENS' [VC: Name Token] +;; +;; +;; /* Enumerated Attribute Types */ +;; +;; EnumeratedType ::= NotationType | Enumeration +;; +;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +;; [VC: Notation Attributes] +;; [VC: One Notation Per Element Type] +;; [VC: No Notation on Empty Element] +;; [VC: No Duplicate Tokens] +;; +;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +;; [VC: Enumeration] +;; [VC: No Duplicate Tokens] +;; +;; +;; /* Attribute Defaults */ +;; +;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED' +;; | (('#FIXED' S)? AttValue) +;; [VC: Required Attribute] +;; [VC: Attribute Default Value Syntactically Correct] +;; [WFC: No < in Attribute Values] +;; [VC: Fixed Attribute Default] +;; +;; +;; /* Conditional Section */ +;; +;; conditionalSect ::= includeSect | ignoreSect +;; +;; includeSect ::= '' +;; [VC: Proper Conditional Section/PE Nesting] +;; +;; ignoreSect ::= '' +;; [VC: Proper Conditional Section/PE Nesting] +;; +;; ignoreSectContents ::= Ignore ('' Ignore)* +;; +;; Ignore ::= Char* - (Char* ('') Char*) +;; +;; +;; /* Character Reference */ +;; +;; CharRef ::= '&#' [0-9]+ ';' +;; | '&#x' [0-9a-fA-F]+ ';' +;; [WFC: Legal Character] +;; +;; +;; /* Entity Reference */ +;; +;; Reference ::= EntityRef | CharRef +;; +;; EntityRef ::= '&' Name ';' +;; [WFC: Entity Declared] +;; [VC: Entity Declared] +;; [WFC: Parsed Entity] +;; [WFC: No Recursion] +;; +;; PEReference ::= '%' Name ';' +;; [VC: Entity Declared] +;; [WFC: No Recursion] +;; [WFC: In DTD] +;; +;; +;; /* Entity Declaration */ +;; +;; EntityDecl ::= GEDecl | PEDecl +;; +;; GEDecl ::= '' +;; +;; PEDecl ::= '' +;; +;; EntityDef ::= EntityValue | (ExternalID NDataDecl?) +;; +;; PEDef ::= EntityValue | ExternalID +;; +;; +;; /* External Entity Declaration */ +;; +;; ExternalID ::= 'SYSTEM' S SystemLiteral +;; | 'PUBLIC' S PubidLiteral S SystemLiteral +;; +;; NDataDecl ::= S 'NDATA' S Name +;; [VC: Notation Declared] +;; +;; +;; /* Text Declaration */ +;; +;; TextDecl ::= '' +;; +;; +;; /* Well-Formed External Parsed Entity */ +;; +;; extParsedEnt ::= TextDecl? content +;; +;; +;; /* Encoding Declaration */ +;; +;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) +;; +;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +;; /* Encoding name contains only Latin characters */ +;; +;; +;; /* Notation Declarations */ +;; +;; NotationDecl ::= '' +;; [VC: Unique Notation Name] +;; +;; PublicID ::= 'PUBLIC' S PubidLiteral +;; +;; +;; /* Characters */ +;; +;; Letter ::= BaseChar | Ideographic +;; +;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] +;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] +;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] +;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] +;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] +;; | #x0386 | [#x0388-#x038A] | #x038C +;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] +;; | #x03DA | #x03DC | #x03DE +;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] +;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] +;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] +;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] +;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] +;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] +;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] +;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 +;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D +;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] +;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 +;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] +;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] +;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] +;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] +;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] +;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] +;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] +;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] +;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] +;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D +;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] +;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] +;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] +;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] +;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] +;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] +;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] +;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE +;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] +;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] +;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] +;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 +;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D +;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] +;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] +;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] +;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] +;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] +;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] +;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] +;; | #x113C | #x113E | #x1140 +;; | #x114C | #x114E | #x1150 +;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] +;; | #x1163 | #x1165 | #x1167 +;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] +;; | #x1175 | #x119E | #x11A8 +;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] +;; | #x11BA | [#x11BC-#x11C2] | #x11EB +;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B] +;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] +;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] +;; | #x1F59 | #x1F5B | #x1F5D +;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] +;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] +;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] +;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 +;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182] +;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] +;; | [#xAC00-#xD7A3] +;; +;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] +;; +;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] +;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] +;; | #x05BF | [#x05C1-#x05C2] | #x05C4 +;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] +;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] +;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C +;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954] +;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC +;; | #x09BE | #x09BF | [#x09C0-#x09C4] +;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 +;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C +;; | #x0A3E | #x0A3F | [#x0A40-#x0A42] +;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] +;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] +;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] +;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] +;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] +;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] +;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] +;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] +;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] +;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] +;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] +;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A] +;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] +;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] +;; | #x0F35 | #x0F37 | #x0F39 +;; | #x0F3E | #x0F3F | [#x0F71-#x0F84] +;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 +;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 +;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] +;; | #x3099 | #x309A +;; +;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] +;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] +;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] +;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] +;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] +;; +;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 +;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE] +;; +;; +;; NOTES +;; ----- +;; +;; At moment, only the ` +;; +;; +;; +;; +;; +;; +;; +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Code: + + +(require 'ebnf-otz) + + +(defvar ebnf-dtd-lex nil + "Value returned by `ebnf-dtd-lex' function.") + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Syntactic analyzer + + +;;; document ::= prolog element Misc* +;;; /* Note that *only* the prolog will be parsed */ + +(defun ebnf-dtd-parser (start) + "DTD parser." + (let ((total (+ (- ebnf-limit start) 1)) + (bias (1- start)) + (origin (point)) + rule-list token rule the-end) + (goto-char start) + (setq token (ebnf-dtd-lex)) + (and (eq token 'end-of-input) + (error "Empty DTD file")) + (setq token (ebnf-dtd-prolog token)) + (unless (eq (car token) 'end-prolog) + (setq the-end (cdr token) + token (car token)) + (while (not (eq token the-end)) + (ebnf-message-float + "Parsing...%s%%" + (/ (* (- (point) bias) 100.0) total)) + (setq token (ebnf-dtd-intsubset token) + rule (cdr token) + token (car token)) + (or (null rule) + (ebnf-add-empty-rule-list rule) + (setq rule-list (cons rule rule-list)))) + (or (eq the-end 'end-of-input) + (eq (ebnf-dtd-lex) 'end-decl) + (error "Missing end of DOCTYPE")) + ;; adjust message, 'cause *only* prolog will be parsed + (ebnf-message-float "Parsing...%s%%" 100.0)) + (goto-char origin) + rule-list)) + + +;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? +;;; +;;; XMLDecl ::= '' +;;; +;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') +;;; +;;; Eq ::= S? '=' S? +;;; +;;; VersionNum ::= '1.0' +;;; +;;; Misc ::= Comment | PI | S +;;; +;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) +;;; +;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])* +;;; /* Encoding name contains only Latin characters */ +;;; +;;; SDDecl ::= S 'standalone' Eq +;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) +;;; +;;; doctypedecl ::= '' + + +(defun ebnf-dtd-prolog (token) + (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml")) + ;; version = "1.0" + (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr + "^1\\.0$" "XML version")) + ;; ( encoding = "encoding name" )? + (setq token (ebnf-dtd-attribute-optional + token 'encoding-attr + "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding")) + ;; ( standalone = ( "yes" | "no" ) )? + (setq token (ebnf-dtd-attribute-optional + token 'standalone-attr + "^yes|no$" "XML standalone")) + (or (eq token 'end-pi) + (error "Missing end of XML processing instruction"))) + ;; processing instructions + (setq token (ebnf-dtd-pi (ebnf-dtd-lex))) + (cond + ;; DOCTYPE + ((eq token 'doctype-decl) + (or (eq (ebnf-dtd-lex) 'name) + (error "Document type name is missing")) + (cons (if (eq (ebnf-dtd-externalid) 'begin-subset) + (ebnf-dtd-lex) + 'end-prolog) + 'end-subset)) + ((memq token '(element-decl attlist-decl entity-decl notation-decl)) + (cons token 'end-of-input)) + (t + '(end-prolog . end-subset)) + )) + + +(defun ebnf-dtd-attribute (token attr match attr-name) + (or (eq token attr) + (error "%s attribute is missing" attr-name)) + (ebnf-dtd-attribute-optional token attr match attr-name)) + + +(defun ebnf-dtd-attribute-optional (token attr match attr-name) + (when (eq token attr) + (or (and (eq (ebnf-dtd-lex) 'equal) + (eq (ebnf-dtd-lex) 'string) + (string-match match ebnf-dtd-lex)) + (error "XML %s attribute is invalid" attr-name)) + (setq token (ebnf-dtd-lex))) + token) + + +;;; ExternalID ::= 'SYSTEM' S SystemLiteral +;;; | 'PUBLIC' S PubidLiteral S SystemLiteral + + +(defun ebnf-dtd-externalid (&optional token) + (let ((must-have token)) + (or token (setq token (ebnf-dtd-lex))) + (cond ((eq token 'system) + (ebnf-dtd-systemliteral)) + ((eq token 'public) + (ebnf-dtd-pubidliteral) + (ebnf-dtd-systemliteral)) + (must-have + (error "Missing `SYSTEM' or `PUBLIC' in external id")) + (t + token)))) + + +;;; SystemLiteral ::= ('"' [^"]* '"') +;;; | ("'" [^']* "'") + + +(defun ebnf-dtd-systemliteral () + (or (eq (ebnf-dtd-lex) 'string) + (error "System identifier is invalid")) + (ebnf-dtd-lex)) + + +;;; PubidLiteral ::= '"' PubidChar* '"' +;;; | "'" (PubidChar - "'")* "'" +;;; +;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9] + + +(defun ebnf-dtd-pubidliteral () + (or (and (eq (ebnf-dtd-lex) 'string) + (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$" + ebnf-dtd-lex)) + (error "Public identifier is invalid"))) + + +;;; PI ::= '' Char*)))? '?>' +;;; +;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) + + +(defun ebnf-dtd-pi (token) + (while (eq token 'begin-pi) + (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex) + (error "Processing instruction name can not be `XML'")) + (while (not (eq (ebnf-dtd-lex) 'end-pi))) + (setq token (ebnf-dtd-lex))) + token) + + +;;; doctypedecl ::= '' +;;; +;;; intSubset ::= (markupdecl | DeclSep)* +;;; +;;; DeclSep ::= PEReference | S +;;; +;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl +;;; | NotationDecl | PI | Comment + + +(defun ebnf-dtd-intsubset (token) + ;; PI - Processing Instruction + (and (eq token 'begin-pi) + (setq token (ebnf-dtd-pi token))) + (cond + ((memq token '(end-subset end-of-input)) + (cons token nil)) + ((eq token 'pe-ref) + (cons (ebnf-dtd-lex) nil)) ; annotation + ((eq token 'element-decl) + (ebnf-dtd-elementdecl)) ; rule + ((eq token 'attlist-decl) + (ebnf-dtd-attlistdecl)) ; annotation + ((eq token 'entity-decl) + (ebnf-dtd-entitydecl)) ; annotation + ((eq token 'notation-decl) + (ebnf-dtd-notationdecl)) ; annotation + (t + (error "Invalid DOCTYPE element")) + )) + + +;;; elementdecl ::= '' +;;; +;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children +;;; +;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' +;;; | '(' S? '#PCDATA' S? ')' +;;; +;;; children ::= (choice | seq) ('?' | '*' | '+')? +;;; +;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' +;;; +;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' +;;; +;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')? + + +(defun ebnf-dtd-elementdecl () + (let ((action ebnf-action) + name token body) + (setq ebnf-action nil) + (or (eq (ebnf-dtd-lex) 'name) + (error "Invalid ELEMENT name")) + (setq name ebnf-dtd-lex + token (ebnf-dtd-lex) + body (cond ((memq token '(empty any)) + (let ((term (ebnf-make-terminal ebnf-dtd-lex))) + (cons (ebnf-dtd-lex) term))) + ((eq token 'begin-group) + (setq token (ebnf-dtd-lex)) + (if (eq token 'pcdata) + (ebnf-dtd-mixed) + (ebnf-dtd-children token))) + (t + (error "Invalid ELEMENT content")) + )) + (or (eq (car body) 'end-decl) + (error "Missing `>' in ELEMENT declaration")) + (ebnf-eps-add-production name) + (cons (ebnf-dtd-lex) + (ebnf-make-production name (cdr body) action)))) + + +;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' +;;; | '(' S? '#PCDATA' S? ')' + + +(defun ebnf-dtd-mixed () + (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex) nil)) + (token (ebnf-dtd-lex)) + (has-alternative (eq token 'alternative))) + (while (eq token 'alternative) + (or (eq (ebnf-dtd-lex) 'name) + (error "Invalid name")) + (setq alt (cons ebnf-dtd-lex alt) + token (ebnf-dtd-lex))) + (or (eq token 'end-group) + (error "Missing `)'")) + (and has-alternative + (or (eq (ebnf-dtd-lex) 'zero-or-more) + (error "Missing `*'"))) + (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil)))) + + +;;; children ::= (choice | seq) ('?' | '*' | '+')? + + +(defun ebnf-dtd-children (token) + (ebnf-dtd-operators (ebnf-dtd-choice-seq token))) + + +;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' +;;; +;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' + + +(defun ebnf-dtd-choice-seq (token) + (setq token (ebnf-dtd-cp token)) + (let (elist) + (cond + ;; choice + ((eq (car token) 'alternative) + (while (eq (car token) 'alternative) + (setq elist (cons (cdr token) elist) + token (ebnf-dtd-cp (ebnf-dtd-lex)))) + (setq elist (ebnf-token-alternative elist token))) + ;; seq + ((eq (car token) 'comma) + (while (eq (car token) 'comma) + (setq elist (cons (cdr token) elist) + token (ebnf-dtd-cp (ebnf-dtd-lex)))) + (setq elist (ebnf-token-sequence (cons (cdr token) elist)))) + ;; only one element + (t + (setq elist (cdr token)))) + (or (eq (car token) 'end-group) + (error "Missing `)' in ELEMENT content")) + elist)) + + +;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')? + + +(defun ebnf-dtd-cp (token) + (ebnf-dtd-operators (cond ((eq token 'name) + (ebnf-make-terminal ebnf-dtd-lex)) + ((eq token 'begin-group) + (ebnf-dtd-choice-seq (ebnf-dtd-lex))) + (t + (error "Invalid element")) + ))) + + +;;; elm ('?' | '*' | '+')? + + +(defun ebnf-dtd-operators (elm) + (let ((token (ebnf-dtd-lex))) + (cond ((eq token 'optional) ; ? - optional + (cons (ebnf-dtd-lex) (ebnf-token-optional elm))) + ((eq token 'zero-or-more) ; * - zero or more + (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm))) + ((eq token 'one-or-more) ; + - one or more + (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm))) + (t ; only element + (cons token elm)) + ))) + + +;;; AttlistDecl ::= '' +;;; +;;; AttDef ::= S Name S AttType S DefaultDecl +;;; +;;; AttType ::= StringType | TokenizedType | EnumeratedType +;;; +;;; StringType ::= 'CDATA' +;;; +;;; TokenizedType ::= 'ID' +;;; | 'IDREF' +;;; | 'IDREFS' +;;; | 'ENTITY' +;;; | 'ENTITIES' +;;; | 'NMTOKEN' +;;; | 'NMTOKENS' +;;; +;;; EnumeratedType ::= NotationType | Enumeration +;;; +;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' +;;; +;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +;;; +;;; DefaultDecl ::= '#REQUIRED' +;;; | '#IMPLIED' +;;; | (('#FIXED' S)? AttValue) +;;; +;;; +;;; AttValue ::= '"' ([^<&"] | Reference)* '"' +;;; | "'" ([^<&'] | Reference)* "'" +;;; +;;; Reference ::= EntityRef | CharRef +;;; +;;; EntityRef ::= '&' Name ';' +;;; +;;; CharRef ::= '&#' [0-9]+ ';' +;;; | '&#x' [0-9a-fA-F]+ ';' + +;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$" + + +(defun ebnf-dtd-attlistdecl () + (or (eq (ebnf-dtd-lex) 'name) + (error "Invalid ATTLIST name")) + (let (token) + (while (eq (setq token (ebnf-dtd-lex)) 'name) + ;; type + (setq token (ebnf-dtd-lex)) + (cond + ((eq token 'notation) + (or (eq (ebnf-dtd-lex) 'begin-group) + (error "Missing `(' in NOTATION type in ATTLIST declaration")) + (ebnf-dtd-namelist "NOTATION" '(name))) + ((eq token 'begin-group) + (ebnf-dtd-namelist "enumeration" '(name name-char))) + ((memq token + '(cdata id idref idrefs entity entities nmtoken nmtokens))) + (t + (error "Invalid type in ATTLIST declaration"))) + ;; default value + (setq token (ebnf-dtd-lex)) + (unless (memq token '(required implied)) + (and (eq token 'fixed) + (setq token (ebnf-dtd-lex))) + (or (and (eq token 'string) + (string-match + "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$" + ebnf-dtd-lex)) + (error "Invalid default value in ATTLIST declaration")))) + (or (eq token 'end-decl) + (error "Missing `>' in end of ATTLIST")) + (cons (ebnf-dtd-lex) nil))) + + +(defun ebnf-dtd-namelist (type name-list) + (let (token) + (while (progn + (or (memq (ebnf-dtd-lex) name-list) + (error "Invalid name in %s type in ATTLIST declaration" type)) + (eq (setq token (ebnf-dtd-lex)) 'alternative))) + (or (eq token 'end-group) + (error "Missing `)' in %s type in ATTLIST declaration" type)))) + + +;;; EntityDecl ::= GEDecl | PEDecl +;;; +;;; GEDecl ::= '' +;;; +;;; PEDecl ::= '' +;;; +;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?) +;;; +;;; PEDef ::= EntityValue | ExternalID +;;; +;;; NDataDecl ::= S 'NDATA' S Name +;;; +;;; +;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' +;;; | "'" ([^%&'] | PEReference | Reference)* "'" +;;; +;;; PEReference ::= '%' Name ';' +;;; +;;; Reference ::= EntityRef | CharRef +;;; +;;; EntityRef ::= '&' Name ';' +;;; +;;; CharRef ::= '&#' [0-9]+ ';' +;;; | '&#x' [0-9a-fA-F]+ ';' + +;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$" + + +(defun ebnf-dtd-entitydecl () + (let* ((token (ebnf-dtd-lex)) + (pedecl (eq token 'percent))) + (and pedecl + (setq token (ebnf-dtd-lex))) + (or (eq token 'name) + (error "Invalid name of ENTITY")) + (setq token (ebnf-dtd-lex)) + (if (eq token 'string) + (if (string-match + "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$" + ebnf-dtd-lex) + (setq token (ebnf-dtd-lex)) + (error "Invalid ENTITY definition")) + (setq token (ebnf-dtd-externalid token)) + (when (and (not pedecl) (eq token 'ndata)) + (or (eq (ebnf-dtd-lex) 'name) + (error "Invalid NDATA name")) + (setq token (ebnf-dtd-lex)))) + (or (eq token 'end-decl) + (error "Missing `>' in end of ENTITY")) + (cons (ebnf-dtd-lex) nil))) + + +;;; NotationDecl ::= '' +;;; +;;; PublicID ::= 'PUBLIC' S PubidLiteral + + +(defun ebnf-dtd-notationdecl () + (or (eq (ebnf-dtd-lex) 'name) + (error "Invalid name NOTATION")) + (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl) + (error "Missing `>' in end of NOTATION")) + (cons (ebnf-dtd-lex) nil)) + + +;;; ExternalID ::= 'SYSTEM' S SystemLiteral +;;; | 'PUBLIC' S PubidLiteral S SystemLiteral +;;; +;;; PublicID ::= 'PUBLIC' S PubidLiteral + + +(defun ebnf-dtd-externalid-or-publicid () + (let ((token (ebnf-dtd-lex))) + (cond ((eq token 'system) + (ebnf-dtd-systemliteral)) + ((eq token 'public) + (ebnf-dtd-pubidliteral) + (and (eq (setq token (ebnf-dtd-lex)) 'string) + (setq token (ebnf-dtd-lex))) + token) + (t + (error "Missing `SYSTEM' or `PUBLIC'"))))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Lexical analyzer + + +(defconst ebnf-dtd-token-table (make-vector 256 'error) + "Vector used to map characters to a lexical token.") + + +(defun ebnf-dtd-initialize () + "Initialize EBNF token table." + ;; control character & control 8-bit character are set to `error' + (let ((char ?\060)) + ;; digits: 0-9 + (while (< char ?\072) + (aset ebnf-dtd-token-table char 'name-char) + (setq char (1+ char))) + ;; printable character: A-Z + (setq char ?\101) + (while (< char ?\133) + (aset ebnf-dtd-token-table char 'name) + (setq char (1+ char))) + ;; printable character: a-z + (setq char ?\141) + (while (< char ?\173) + (aset ebnf-dtd-token-table char 'name) + (setq char (1+ char))) + ;; European 8-bit accentuated characters: + (setq char ?\240) + (while (< char ?\400) + (aset ebnf-dtd-token-table char 'name) + (setq char (1+ char))) + ;; Override name characters: + (aset ebnf-dtd-token-table ?_ 'name) + (aset ebnf-dtd-token-table ?: 'name) + (aset ebnf-dtd-token-table ?. 'name-char) + (aset ebnf-dtd-token-table ?- 'name-char) + ;; Override space characters: + (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed + (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return + (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab + (aset ebnf-dtd-token-table ?\ 'space) ; [SP] space + ;; Override other lexical characters: + (aset ebnf-dtd-token-table ?= 'equal) + (aset ebnf-dtd-token-table ?, 'comma) + (aset ebnf-dtd-token-table ?* 'zero-or-more) + (aset ebnf-dtd-token-table ?+ 'one-or-more) + (aset ebnf-dtd-token-table ?| 'alternative) + (aset ebnf-dtd-token-table ?% 'percent) + (aset ebnf-dtd-token-table ?& 'ampersand) + (aset ebnf-dtd-token-table ?# 'hash) + (aset ebnf-dtd-token-table ?\? 'interrogation) + (aset ebnf-dtd-token-table ?\" 'double-quote) + (aset ebnf-dtd-token-table ?\' 'single-quote) + (aset ebnf-dtd-token-table ?< 'less-than) + (aset ebnf-dtd-token-table ?> 'end-decl) + (aset ebnf-dtd-token-table ?\( 'begin-group) + (aset ebnf-dtd-token-table ?\) 'end-group) + (aset ebnf-dtd-token-table ?\[ 'begin-subset) + (aset ebnf-dtd-token-table ?\] 'end-subset))) + + +;; replace the range "\240-\377" (see `ebnf-range-regexp'). +(defconst ebnf-dtd-name-chars + (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377)) + + +(defconst ebnf-dtd-decl-alist + '(("ATTLIST" . attlist-decl) + ("DOCTYPE" . doctype-decl) + ("ELEMENT" . element-decl) + ("ENTITY" . entity-decl) + ("NOTATION" . notation-decl))) + + +(defconst ebnf-dtd-element-alist + '(("#FIXED" . fixed) + ("#IMPLIED" . implied) + ("#PCDATA" . pcdata) + ("#REQUIRED" . required))) + + +(defconst ebnf-dtd-name-alist + '(("ANY" . any) + ("CDATA" . cdata) + ("EMPTY" . empty) + ("ENTITIES" . entities) + ("ENTITY" . entity) + ("ID" . id) + ("IDREF" . idref) + ("IDREFS" . idrefs) + ("NDATA" . ndata) + ("NMTOKEN" . nmtoken) + ("NMTOKENS" . nmtokens) + ("NOTATION" . notation) + ("PUBLIC" . public) + ("SYSTEM" . system) + ("encoding" . encoding-attr) + ("standalone" . standalone-attr) + ("version" . version-attr))) + + +(defun ebnf-dtd-lex () + "Lexical analyser for DTD. + +Return a lexical token. + +See documentation for variable `ebnf-dtd-lex'." + (if (>= (point) ebnf-limit) + 'end-of-input + (let (token) + ;; skip spaces and comments + (while (if (> (following-char) 255) + (progn + (setq token 'error) + nil) + (setq token (aref ebnf-dtd-token-table (following-char))) + (cond + ((eq token 'space) + (skip-chars-forward " \n\r\t" ebnf-limit) + (< (point) ebnf-limit)) + ((and (eq token 'less-than) + (looking-at "")))) + (skip-chars-forward "-" ebnf-limit)) + ;; check for a valid end of comment + (cond ((>= (point) ebnf-limit) + nil) + ((looking-at "-->") + (forward-char 3) + t) + (t + (error "Illegal character")) + )) + + +(defun ebnf-dtd-eps-filename () + (forward-char) + (let (fname) + (while (progn + (setq fname + (concat fname + (ebnf-buffer-substring ebnf-dtd-filename-chars))) + (and (< (point) ebnf-limit) + (= (following-char) ?-) ; may be \n, \t, \r + (not (looking-at "-->")))) + (setq fname (concat fname (ebnf-buffer-substring "-")))) + fname)) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +(provide 'ebnf-dtd) + + +;;; arch-tag: +;;; ebnf-dtd.el ends here -- 2.39.5