--- /dev/null
+;;; semantic-lex.el --- Lexical Analyzer builder
+
+;;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+;;; 2007, 2008, 2009 Free Software Foundation, Inc.
+
+;; Author: Eric M. Ludlam <zappo@gnu.org>
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;
+;; This file handles the creation of lexical analyzers for different
+;; languages in Emacs Lisp. The purpose of a lexical analyzer is to
+;; convert a buffer into a list of lexical tokens. Each token
+;; contains the token class (such as 'number, 'symbol, 'IF, etc) and
+;; the location in the buffer it was found. Optionally, a token also
+;; contains a string representing what is at the designated buffer
+;; location.
+;;
+;; Tokens are pushed onto a token stream, which is basically a list of
+;; all the lexical tokens from the analyzed region. The token stream
+;; is then handed to the grammar which parsers the file.
+;;
+;;; How it works
+;;
+;; Each analyzer specifies a condition and forms. These conditions
+;; and forms are assembled into a function by `define-lex' that does
+;; the lexical analysis.
+;;
+;; In the lexical analyzer created with `define-lex', each condition
+;; is tested for a given point. When the conditin is true, the forms
+;; run.
+;;
+;; The forms can push a lexical token onto the token stream. The
+;; analyzer forms also must move the current analyzer point. If the
+;; analyzer point is moved without pushing a token, then tne matched
+;; syntax is effectively ignored, or skipped.
+;;
+;; Thus, starting at the beginning of a region to be analyzed, each
+;; condition is tested. One will match, and a lexical token might be
+;; pushed, and the point is moved to the end of the lexical token
+;; identified. At the new position, the process occurs again until
+;; the end of the specified region is reached.
+;;
+;;; How to use semantic-lex
+;;
+;; To create a lexer for a language, use the `define-lex' macro.
+;;
+;; The `define-lex' macro accepts a list of lexical analyzers. Each
+;; analyzer is created with `define-lex-analyzer', or one of the
+;; derivitive macros. A single analyzer defines a regular expression
+;; to match text in a buffer, and a short segment of code to create
+;; one lexical token.
+;;
+;; Each analyzer has a NAME, DOC, a CONDITION, and possibly some
+;; FORMS. The NAME is the name used in `define-lex'. The DOC
+;; describes what the analyzer should do.
+;;
+;; The CONDITION evaluates the text at the current point in the
+;; current buffer. If CONDITION is true, then the FORMS will be
+;; executed.
+;;
+;; The purpose of the FORMS is to push new lexical tokens onto the
+;; list of tokens for the current buffer, and to move point after the
+;; matched text.
+;;
+;; Some macros for creating one analyzer are:
+;;
+;; define-lex-analyzer - A generic analyzer associating any style of
+;; condition to forms.
+;; define-lex-regex-analyzer - Matches a regular expression.
+;; define-lex-simple-regex-analyzer - Matches a regular expressions,
+;; and pushes the match.
+;; define-lex-block-analyzer - Matches list syntax, and defines
+;; handles open/close delimiters.
+;;
+;; These macros are used by the grammar compiler when lexical
+;; information is specified in a grammar:
+;; define-lex- * -type-analyzer - Matches syntax specified in
+;; a grammar, and pushes one token for it. The * would
+;; be `sexp' for things like lists or strings, and
+;; `string' for things that need to match some special
+;; string, such as "\\." where a literal match is needed.
+;;
+;;; Lexical Tables
+;;
+;; There are tables of different symbols managed in semantic-lex.el.
+;; They are:
+;;
+;; Lexical keyword table - A Table of symbols declared in a grammar
+;; file with the %keyword declaration.
+;; Keywords are used by `semantic-lex-symbol-or-keyword'
+;; to create lexical tokens based on the keyword.
+;;
+;; Lexical type table - A table of symbols declared in a grammer
+;; file with the %type declaration.
+;; The grammar compiler uses the type table to create new
+;; lexical analyzers. These analyzers are then used to when
+;; a new lexical analyzer is made for a language.
+;;
+;;; Lexical Types
+;;
+;; A lexical type defines a kind of lexical analyzer that will be
+;; automatically generated from a grammar file based on some
+;; predetermined attributes. For now these two attributes are
+;; recognized :
+;;
+;; * matchdatatype : define the kind of lexical analyzer. That is :
+;;
+;; - regexp : define a regexp analyzer (see
+;; `define-lex-regex-type-analyzer')
+;;
+;; - string : define a string analyzer (see
+;; `define-lex-string-type-analyzer')
+;;
+;; - block : define a block type analyzer (see
+;; `define-lex-block-type-analyzer')
+;;
+;; - sexp : define a sexp analyzer (see
+;; `define-lex-sexp-type-analyzer')
+;;
+;; - keyword : define a keyword analyzer (see
+;; `define-lex-keyword-type-analyzer')
+;;
+;; * syntax : define the syntax that matches a syntactic
+;; expression. When syntax is matched the corresponding type
+;; analyzer is entered and the resulting match data will be
+;; interpreted based on the kind of analyzer (see matchdatatype
+;; above).
+;;
+;; The following lexical types are predefined :
+;;
+;; +-------------+---------------+--------------------------------+
+;; | type | matchdatatype | syntax |
+;; +-------------+---------------+--------------------------------+
+;; | punctuation | string | "\\(\\s.\\|\\s$\\|\\s'\\)+" |
+;; | keyword | keyword | "\\(\\sw\\|\\s_\\)+" |
+;; | symbol | regexp | "\\(\\sw\\|\\s_\\)+" |
+;; | string | sexp | "\\s\"" |
+;; | number | regexp | semantic-lex-number-expression |
+;; | block | block | "\\s(\\|\\s)" |
+;; +-------------+---------------+--------------------------------+
+;;
+;; In a grammar you must use a %type expression to automatically generate
+;; the corresponding analyzers of that type.
+;;
+;; Here is an example to auto-generate punctuation analyzers
+;; with 'matchdatatype and 'syntax predefined (see table above)
+;;
+;; %type <punctuation> ;; will auto-generate this kind of analyzers
+;;
+;; It is equivalent to write :
+;;
+;; %type <punctuation> syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
+;;
+;; ;; Some punctuations based on the type defines above
+;;
+;; %token <punctuation> NOT "!"
+;; %token <punctuation> NOTEQ "!="
+;; %token <punctuation> MOD "%"
+;; %token <punctuation> MODEQ "%="
+;;
+
+;;; On the Semantic 1.x lexer
+;;
+;; In semantic 1.x, the lexical analyzer was an all purpose routine.
+;; To boost efficiency, the analyzer is now a series of routines that
+;; are constructed at build time into a single routine. This will
+;; eliminate unneeded if statements to speed the lexer.
+
+(require 'semantic/fw)
+;;; Code:
+
+;;; Compatibility
+;;
+(eval-and-compile
+ (if (not (fboundp 'with-syntax-table))
+
+;; Copied from Emacs 21 for compatibility with released Emacses.
+(defmacro with-syntax-table (table &rest body)
+ "With syntax table of current buffer set to a copy of TABLE, evaluate BODY.
+The syntax table of the current buffer is saved, BODY is evaluated, and the
+saved table is restored, even in case of an abnormal exit.
+Value is what BODY returns."
+ (let ((old-table (make-symbol "table"))
+ (old-buffer (make-symbol "buffer")))
+ `(let ((,old-table (syntax-table))
+ (,old-buffer (current-buffer)))
+ (unwind-protect
+ (progn
+ (set-syntax-table (copy-syntax-table ,table))
+ ,@body)
+ (save-current-buffer
+ (set-buffer ,old-buffer)
+ (set-syntax-table ,old-table))))))
+
+))
+\f
+;;; Semantic 2.x lexical analysis
+;;
+(defun semantic-lex-map-symbols (fun table &optional property)
+ "Call function FUN on every symbol in TABLE.
+If optional PROPERTY is non-nil, call FUN only on every symbol which
+as a PROPERTY value. FUN receives a symbol as argument."
+ (if (arrayp table)
+ (mapatoms
+ #'(lambda (symbol)
+ (if (or (null property) (get symbol property))
+ (funcall fun symbol)))
+ table)))
+
+;;; Lexical keyword table handling.
+;;
+;; These keywords are keywords defined for using in a grammar with the
+;; %keyword declaration, and are not keywords used in Emacs Lisp.
+
+(defvar semantic-flex-keywords-obarray nil
+ "Buffer local keyword obarray for the lexical analyzer.
+These keywords are matched explicitly, and converted into special symbols.")
+(make-variable-buffer-local 'semantic-flex-keywords-obarray)
+
+(defmacro semantic-lex-keyword-invalid (name)
+ "Signal that NAME is an invalid keyword name."
+ `(signal 'wrong-type-argument '(semantic-lex-keyword-p ,name)))
+
+(defsubst semantic-lex-keyword-symbol (name)
+ "Return keyword symbol with NAME or nil if not found."
+ (and (arrayp semantic-flex-keywords-obarray)
+ (stringp name)
+ (intern-soft name semantic-flex-keywords-obarray)))
+
+(defsubst semantic-lex-keyword-p (name)
+ "Return non-nil if a keyword with NAME exists in the keyword table.
+Return nil otherwise."
+ (and (setq name (semantic-lex-keyword-symbol name))
+ (symbol-value name)))
+
+(defsubst semantic-lex-keyword-set (name value)
+ "Set value of keyword with NAME to VALUE and return VALUE."
+ (set (intern name semantic-flex-keywords-obarray) value))
+
+(defsubst semantic-lex-keyword-value (name)
+ "Return value of keyword with NAME.
+Signal an error if a keyword with NAME does not exist."
+ (let ((keyword (semantic-lex-keyword-symbol name)))
+ (if keyword
+ (symbol-value keyword)
+ (semantic-lex-keyword-invalid name))))
+
+(defsubst semantic-lex-keyword-put (name property value)
+ "For keyword with NAME, set its PROPERTY to VALUE."
+ (let ((keyword (semantic-lex-keyword-symbol name)))
+ (if keyword
+ (put keyword property value)
+ (semantic-lex-keyword-invalid name))))
+
+(defsubst semantic-lex-keyword-get (name property)
+ "For keyword with NAME, return its PROPERTY value."
+ (let ((keyword (semantic-lex-keyword-symbol name)))
+ (if keyword
+ (get keyword property)
+ (semantic-lex-keyword-invalid name))))
+
+(defun semantic-lex-make-keyword-table (specs &optional propspecs)
+ "Convert keyword SPECS into an obarray and return it.
+SPECS must be a list of (NAME . TOKSYM) elements, where:
+
+ NAME is the name of the keyword symbol to define.
+ TOKSYM is the lexical token symbol of that keyword.
+
+If optional argument PROPSPECS is non nil, then interpret it, and
+apply those properties.
+PROPSPECS must be a list of (NAME PROPERTY VALUE) elements."
+ ;; Create the symbol hash table
+ (let ((semantic-flex-keywords-obarray (make-vector 13 0))
+ spec)
+ ;; fill it with stuff
+ (while specs
+ (setq spec (car specs)
+ specs (cdr specs))
+ (semantic-lex-keyword-set (car spec) (cdr spec)))
+ ;; Apply all properties
+ (while propspecs
+ (setq spec (car propspecs)
+ propspecs (cdr propspecs))
+ (semantic-lex-keyword-put (car spec) (nth 1 spec) (nth 2 spec)))
+ semantic-flex-keywords-obarray))
+
+(defsubst semantic-lex-map-keywords (fun &optional property)
+ "Call function FUN on every lexical keyword.
+If optional PROPERTY is non-nil, call FUN only on every keyword which
+as a PROPERTY value. FUN receives a lexical keyword as argument."
+ (semantic-lex-map-symbols
+ fun semantic-flex-keywords-obarray property))
+
+(defun semantic-lex-keywords (&optional property)
+ "Return a list of lexical keywords.
+If optional PROPERTY is non-nil, return only keywords which have a
+PROPERTY set."
+ (let (keywords)
+ (semantic-lex-map-keywords
+ #'(lambda (symbol) (setq keywords (cons symbol keywords)))
+ property)
+ keywords))
+\f
+;;; Type table handling.
+;;
+;; The lexical type table manages types that occur in a grammar file
+;; with the %type declaration. Types represent different syntaxes.
+;; See code for `semantic-lex-preset-default-types' for the classic
+;; types of syntax.
+(defvar semantic-lex-types-obarray nil
+ "Buffer local types obarray for the lexical analyzer.")
+(make-variable-buffer-local 'semantic-lex-types-obarray)
+
+(defmacro semantic-lex-type-invalid (type)
+ "Signal that TYPE is an invalid lexical type name."
+ `(signal 'wrong-type-argument '(semantic-lex-type-p ,type)))
+
+(defsubst semantic-lex-type-symbol (type)
+ "Return symbol with TYPE or nil if not found."
+ (and (arrayp semantic-lex-types-obarray)
+ (stringp type)
+ (intern-soft type semantic-lex-types-obarray)))
+
+(defsubst semantic-lex-type-p (type)
+ "Return non-nil if a symbol with TYPE name exists."
+ (and (setq type (semantic-lex-type-symbol type))
+ (symbol-value type)))
+
+(defsubst semantic-lex-type-set (type value)
+ "Set value of symbol with TYPE name to VALUE and return VALUE."
+ (set (intern type semantic-lex-types-obarray) value))
+
+(defsubst semantic-lex-type-value (type &optional noerror)
+ "Return value of symbol with TYPE name.
+If optional argument NOERROR is non-nil return nil if a symbol with
+TYPE name does not exist. Otherwise signal an error."
+ (let ((sym (semantic-lex-type-symbol type)))
+ (if sym
+ (symbol-value sym)
+ (unless noerror
+ (semantic-lex-type-invalid type)))))
+
+(defsubst semantic-lex-type-put (type property value &optional add)
+ "For symbol with TYPE name, set its PROPERTY to VALUE.
+If optional argument ADD is non-nil, create a new symbol with TYPE
+name if it does not already exist. Otherwise signal an error."
+ (let ((sym (semantic-lex-type-symbol type)))
+ (unless sym
+ (or add (semantic-lex-type-invalid type))
+ (semantic-lex-type-set type nil)
+ (setq sym (semantic-lex-type-symbol type)))
+ (put sym property value)))
+
+(defsubst semantic-lex-type-get (type property &optional noerror)
+ "For symbol with TYPE name, return its PROPERTY value.
+If optional argument NOERROR is non-nil return nil if a symbol with
+TYPE name does not exist. Otherwise signal an error."
+ (let ((sym (semantic-lex-type-symbol type)))
+ (if sym
+ (get sym property)
+ (unless noerror
+ (semantic-lex-type-invalid type)))))
+
+(defun semantic-lex-preset-default-types ()
+ "Install useful default properties for well known types."
+ (semantic-lex-type-put "punctuation" 'matchdatatype 'string t)
+ (semantic-lex-type-put "punctuation" 'syntax "\\(\\s.\\|\\s$\\|\\s'\\)+")
+ (semantic-lex-type-put "keyword" 'matchdatatype 'keyword t)
+ (semantic-lex-type-put "keyword" 'syntax "\\(\\sw\\|\\s_\\)+")
+ (semantic-lex-type-put "symbol" 'matchdatatype 'regexp t)
+ (semantic-lex-type-put "symbol" 'syntax "\\(\\sw\\|\\s_\\)+")
+ (semantic-lex-type-put "string" 'matchdatatype 'sexp t)
+ (semantic-lex-type-put "string" 'syntax "\\s\"")
+ (semantic-lex-type-put "number" 'matchdatatype 'regexp t)
+ (semantic-lex-type-put "number" 'syntax 'semantic-lex-number-expression)
+ (semantic-lex-type-put "block" 'matchdatatype 'block t)
+ (semantic-lex-type-put "block" 'syntax "\\s(\\|\\s)")
+ )
+
+(defun semantic-lex-make-type-table (specs &optional propspecs)
+ "Convert type SPECS into an obarray and return it.
+SPECS must be a list of (TYPE . TOKENS) elements, where:
+
+ TYPE is the name of the type symbol to define.
+ TOKENS is an list of (TOKSYM . MATCHER) elements, where:
+
+ TOKSYM is any lexical token symbol.
+ MATCHER is a string or regexp a text must match to be a such
+ lexical token.
+
+If optional argument PROPSPECS is non nil, then interpret it, and
+apply those properties.
+PROPSPECS must be a list of (TYPE PROPERTY VALUE)."
+ ;; Create the symbol hash table
+ (let* ((semantic-lex-types-obarray (make-vector 13 0))
+ spec type tokens token alist default)
+ ;; fill it with stuff
+ (while specs
+ (setq spec (car specs)
+ specs (cdr specs)
+ type (car spec)
+ tokens (cdr spec)
+ default nil
+ alist nil)
+ (while tokens
+ (setq token (car tokens)
+ tokens (cdr tokens))
+ (if (cdr token)
+ (setq alist (cons token alist))
+ (setq token (car token))
+ (if default
+ (message
+ "*Warning* default value of <%s> tokens changed to %S, was %S"
+ type default token))
+ (setq default token)))
+ ;; Ensure the default matching spec is the first one.
+ (semantic-lex-type-set type (cons default (nreverse alist))))
+ ;; Install useful default types & properties
+ (semantic-lex-preset-default-types)
+ ;; Apply all properties
+ (while propspecs
+ (setq spec (car propspecs)
+ propspecs (cdr propspecs))
+ ;; Create the type if necessary.
+ (semantic-lex-type-put (car spec) (nth 1 spec) (nth 2 spec) t))
+ semantic-lex-types-obarray))
+
+(defsubst semantic-lex-map-types (fun &optional property)
+ "Call function FUN on every lexical type.
+If optional PROPERTY is non-nil, call FUN only on every type symbol
+which as a PROPERTY value. FUN receives a type symbol as argument."
+ (semantic-lex-map-symbols
+ fun semantic-lex-types-obarray property))
+
+(defun semantic-lex-types (&optional property)
+ "Return a list of lexical type symbols.
+If optional PROPERTY is non-nil, return only type symbols which have
+PROPERTY set."
+ (let (types)
+ (semantic-lex-map-types
+ #'(lambda (symbol) (setq types (cons symbol types)))
+ property)
+ types))
+\f
+;;; Lexical Analyzer framework settings
+;;
+
+(defvar semantic-lex-analyzer 'semantic-flex
+ "The lexical analyzer used for a given buffer.
+See `semantic-lex' for documentation.
+For compatibility with Semantic 1.x it defaults to `semantic-flex'.")
+(make-variable-buffer-local 'semantic-lex-analyzer)
+
+(defvar semantic-lex-tokens
+ '(
+ (bol)
+ (charquote)
+ (close-paren)
+ (comment)
+ (newline)
+ (open-paren)
+ (punctuation)
+ (semantic-list)
+ (string)
+ (symbol)
+ (whitespace)
+ )
+ "An alist of of semantic token types.
+As of December 2001 (semantic 1.4beta13), this variable is not used in
+any code. The only use is to refer to the doc-string from elsewhere.
+
+The key to this alist is the symbol representing token type that
+\\[semantic-flex] returns. These are
+
+ - bol: Empty string matching a beginning of line.
+ This token is produced with
+ `semantic-lex-beginning-of-line'.
+
+ - charquote: String sequences that match `\\s\\+' regexp.
+ This token is produced with `semantic-lex-charquote'.
+
+ - close-paren: Characters that match `\\s)' regexp.
+ These are typically `)', `}', `]', etc.
+ This token is produced with
+ `semantic-lex-close-paren'.
+
+ - comment: A comment chunk. These token types are not
+ produced by default.
+ This token is produced with `semantic-lex-comments'.
+ Comments are ignored with `semantic-lex-ignore-comments'.
+ Comments are treated as whitespace with
+ `semantic-lex-comments-as-whitespace'.
+
+ - newline Characters matching `\\s-*\\(\n\\|\\s>\\)' regexp.
+ This token is produced with `semantic-lex-newline'.
+
+ - open-paren: Characters that match `\\s(' regexp.
+ These are typically `(', `{', `[', etc.
+ If `semantic-lex-paren-or-list' is used,
+ then `open-paren' is not usually generated unless
+ the `depth' argument to \\[semantic-lex] is
+ greater than 0.
+ This token is always produced if the analyzer
+ `semantic-lex-open-paren' is used.
+
+ - punctuation: Characters matching `{\\(\\s.\\|\\s$\\|\\s'\\)'
+ regexp.
+ This token is produced with `semantic-lex-punctuation'.
+ Always specify this analyzer after the comment
+ analyzer.
+
+ - semantic-list: String delimited by matching parenthesis, braces,
+ etc. that the lexer skipped over, because the
+ `depth' parameter to \\[semantic-flex] was not high
+ enough.
+ This token is produced with `semantic-lex-paren-or-list'.
+
+ - string: Quoted strings, i.e., string sequences that start
+ and end with characters matching `\\s\"'
+ regexp. The lexer relies on @code{forward-sexp} to
+ find the matching end.
+ This token is produced with `semantic-lex-string'.
+
+ - symbol: String sequences that match `\\(\\sw\\|\\s_\\)+'
+ regexp.
+ This token is produced with
+ `semantic-lex-symbol-or-keyword'. Always add this analyzer
+ after `semantic-lex-number', or other analyzers that
+ match its regular expression.
+
+ - whitespace: Characters that match `\\s-+' regexp.
+ This token is produced with `semantic-lex-whitespace'.")
+
+(defvar semantic-lex-syntax-modifications nil
+ "Changes to the syntax table for this buffer.
+These changes are active only while the buffer is being flexed.
+This is a list where each element has the form:
+ (CHAR CLASS)
+CHAR is the char passed to `modify-syntax-entry',
+and CLASS is the string also passed to `modify-syntax-entry' to define
+what syntax class CHAR has.")
+(make-variable-buffer-local 'semantic-lex-syntax-modifications)
+
+(defvar semantic-lex-syntax-table nil
+ "Syntax table used by lexical analysis.
+See also `semantic-lex-syntax-modifications'.")
+(make-variable-buffer-local 'semantic-lex-syntax-table)
+
+(defvar semantic-lex-comment-regex nil
+ "Regular expression for identifying comment start during lexical analysis.
+This may be automatically set when semantic initializes in a mode, but
+may need to be overriden for some special languages.")
+(make-variable-buffer-local 'semantic-lex-comment-regex)
+
+(defvar semantic-lex-number-expression
+ ;; This expression was written by David Ponce for Java, and copied
+ ;; here for C and any other similar language.
+ (eval-when-compile
+ (concat "\\("
+ "\\<[0-9]+[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
+ "\\|"
+ "\\<[0-9]+[.][eE][-+]?[0-9]+[fFdD]?\\>"
+ "\\|"
+ "\\<[0-9]+[.][fFdD]\\>"
+ "\\|"
+ "\\<[0-9]+[.]"
+ "\\|"
+ "[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
+ "\\|"
+ "\\<[0-9]+[eE][-+]?[0-9]+[fFdD]?\\>"
+ "\\|"
+ "\\<0[xX][0-9a-fA-F]+[lL]?\\>"
+ "\\|"
+ "\\<[0-9]+[lLfFdD]?\\>"
+ "\\)"
+ ))
+ "Regular expression for matching a number.
+If this value is nil, no number extraction is done during lex.
+This expression tries to match C and Java like numbers.
+
+DECIMAL_LITERAL:
+ [1-9][0-9]*
+ ;
+HEX_LITERAL:
+ 0[xX][0-9a-fA-F]+
+ ;
+OCTAL_LITERAL:
+ 0[0-7]*
+ ;
+INTEGER_LITERAL:
+ <DECIMAL_LITERAL>[lL]?
+ | <HEX_LITERAL>[lL]?
+ | <OCTAL_LITERAL>[lL]?
+ ;
+EXPONENT:
+ [eE][+-]?[09]+
+ ;
+FLOATING_POINT_LITERAL:
+ [0-9]+[.][0-9]*<EXPONENT>?[fFdD]?
+ | [.][0-9]+<EXPONENT>?[fFdD]?
+ | [0-9]+<EXPONENT>[fFdD]?
+ | [0-9]+<EXPONENT>?[fFdD]
+ ;")
+(make-variable-buffer-local 'semantic-lex-number-expression)
+
+(defvar semantic-lex-depth 0
+ "Default lexing depth.
+This specifies how many lists to create tokens in.")
+(make-variable-buffer-local 'semantic-lex-depth)
+
+(defvar semantic-lex-unterminated-syntax-end-function
+ (lambda (syntax syntax-start lex-end) lex-end)
+ "Function called when unterminated syntax is encountered.
+This should be set to one function. That function should take three
+parameters. The SYNTAX, or type of syntax which is unterminated.
+SYNTAX-START where the broken syntax begins.
+LEX-END is where the lexical analysis was asked to end.
+This function can be used for languages that can intelligently fix up
+broken syntax, or the exit lexical analysis via `throw' or `signal'
+when finding unterminated syntax.")
+
+;;; Interactive testing commands
+
+(defun semantic-lex-test (arg)
+ "Test the semantic lexer in the current buffer.
+If universal argument ARG, then try the whole buffer."
+ (interactive "P")
+ (let* ((start (current-time))
+ (result (semantic-lex
+ (if arg (point-min) (point))
+ (point-max)))
+ (end (current-time)))
+ (message "Elapsed Time: %.2f seconds."
+ (semantic-elapsed-time start end))
+ (pop-to-buffer "*Lexer Output*")
+ (require 'pp)
+ (erase-buffer)
+ (insert (pp-to-string result))
+ (goto-char (point-min))
+ ))
+
+(defun semantic-lex-test-full-depth (arg)
+ "Test the semantic lexer in the current buffer parsing through lists.
+Usually the lexer parses
+If universal argument ARG, then try the whole buffer."
+ (interactive "P")
+ (let* ((start (current-time))
+ (result (semantic-lex
+ (if arg (point-min) (point))
+ (point-max)
+ 100))
+ (end (current-time)))
+ (message "Elapsed Time: %.2f seconds."
+ (semantic-elapsed-time start end))
+ (pop-to-buffer "*Lexer Output*")
+ (require 'pp)
+ (erase-buffer)
+ (insert (pp-to-string result))
+ (goto-char (point-min))
+ ))
+
+(defun semantic-lex-test-region (beg end)
+ "Test the semantic lexer in the current buffer.
+Analyze the area between BEG and END."
+ (interactive "r")
+ (let ((result (semantic-lex beg end)))
+ (pop-to-buffer "*Lexer Output*")
+ (require 'pp)
+ (erase-buffer)
+ (insert (pp-to-string result))
+ (goto-char (point-min))
+ ))
+
+(defvar semantic-lex-debug nil
+ "When non-nil, debug the local lexical analyzer.")
+
+(defun semantic-lex-debug (arg)
+ "Debug the semantic lexer in the current buffer.
+Argument ARG specifies of the analyze the whole buffer, or start at point.
+While engaged, each token identified by the lexer will be highlighted
+in the target buffer A description of the current token will be
+displayed in the minibuffer. Press SPC to move to the next lexical token."
+ (interactive "P")
+ (require 'semantic/debug)
+ (let ((semantic-lex-debug t))
+ (semantic-lex-test arg)))
+
+(defun semantic-lex-highlight-token (token)
+ "Highlight the lexical TOKEN.
+TOKEN is a lexical token with a START And END position.
+Return the overlay."
+ (let ((o (semantic-make-overlay (semantic-lex-token-start token)
+ (semantic-lex-token-end token))))
+ (semantic-overlay-put o 'face 'highlight)
+ o))
+
+(defsubst semantic-lex-debug-break (token)
+ "Break during lexical analysis at TOKEN."
+ (when semantic-lex-debug
+ (let ((o nil))
+ (unwind-protect
+ (progn
+ (when token
+ (setq o (semantic-lex-highlight-token token)))
+ (semantic-read-event
+ (format "%S :: SPC - continue" token))
+ )
+ (when o
+ (semantic-overlay-delete o))))))
+
+;;; Lexical analyzer creation
+;;
+;; Code for creating a lex function from lists of analyzers.
+;;
+;; A lexical analyzer is created from a list of individual analyzers.
+;; Each individual analyzer specifies a single match, and code that
+;; goes with it.
+;;
+;; Creation of an analyzer assembles these analyzers into a new function
+;; with the behaviors of all the individual analyzers.
+;;
+(defmacro semantic-lex-one-token (analyzers)
+ "Calculate one token from the current buffer at point.
+Uses locally bound variables from `define-lex'.
+Argument ANALYZERS is the list of analyzers being used."
+ (cons 'cond (mapcar #'symbol-value analyzers)))
+
+(defvar semantic-lex-end-point nil
+ "The end point as tracked through lexical functions.")
+
+(defvar semantic-lex-current-depth nil
+ "The current depth as tracked through lexical functions.")
+
+(defvar semantic-lex-maximum-depth nil
+ "The maximum depth of parenthisis as tracked through lexical functions.")
+
+(defvar semantic-lex-token-stream nil
+ "The current token stream we are collecting.")
+
+(defvar semantic-lex-analysis-bounds nil
+ "The bounds of the current analysis.")
+
+(defvar semantic-lex-block-streams nil
+ "Streams of tokens inside collapsed blocks.
+This is an alist of (ANCHOR . STREAM) elements where ANCHOR is the
+start position of the block, and STREAM is the list of tokens in that
+block.")
+
+(defvar semantic-lex-reset-hooks nil
+ "List of hooks major-modes use to reset lexical analyzers.
+Hooks are called with START and END values for the current lexical pass.
+Should be set with `add-hook'specifying a LOCAL option.")
+
+;; Stack of nested blocks.
+(defvar semantic-lex-block-stack nil)
+;;(defvar semantic-lex-timeout 5
+;; "*Number of sections of lexing before giving up.")
+
+(defmacro define-lex (name doc &rest analyzers)
+ "Create a new lexical analyzer with NAME.
+DOC is a documentation string describing this analyzer.
+ANALYZERS are small code snippets of analyzers to use when
+building the new NAMED analyzer. Only use analyzers which
+are written to be used in `define-lex'.
+Each analyzer should be an analyzer created with `define-lex-analyzer'.
+Note: The order in which analyzers are listed is important.
+If two analyzers can match the same text, it is important to order the
+analyzers so that the one you want to match first occurs first. For
+example, it is good to put a numbe analyzer in front of a symbol
+analyzer which might mistake a number for as a symbol."
+ `(defun ,name (start end &optional depth length)
+ ,(concat doc "\nSee `semantic-lex' for more information.")
+ ;; Make sure the state of block parsing starts over.
+ (setq semantic-lex-block-streams nil)
+ ;; Allow specialty reset items.
+ (run-hook-with-args 'semantic-lex-reset-hooks start end)
+ ;; Lexing state.
+ (let* (;(starttime (current-time))
+ (starting-position (point))
+ (semantic-lex-token-stream nil)
+ (semantic-lex-block-stack nil)
+ (tmp-start start)
+ (semantic-lex-end-point start)
+ (semantic-lex-current-depth 0)
+ ;; Use the default depth when not specified.
+ (semantic-lex-maximum-depth
+ (or depth semantic-lex-depth))
+ ;; Bounds needed for unterminated syntax
+ (semantic-lex-analysis-bounds (cons start end))
+ ;; This entry prevents text properties from
+ ;; confusing our lexical analysis. See Emacs 22 (CVS)
+ ;; version of C++ mode with template hack text properties.
+ (parse-sexp-lookup-properties nil)
+ )
+ ;; Maybe REMOVE THIS LATER.
+ ;; Trying to find incremental parser bug.
+ (when (> end (point-max))
+ (error ,(format "%s: end (%%d) > point-max (%%d)" name)
+ end (point-max)))
+ (with-syntax-table semantic-lex-syntax-table
+ (goto-char start)
+ (while (and (< (point) end)
+ (or (not length)
+ (<= (length semantic-lex-token-stream) length)))
+ (semantic-lex-one-token ,analyzers)
+ (when (eq semantic-lex-end-point tmp-start)
+ (error ,(format "%s: endless loop at %%d, after %%S" name)
+ tmp-start (car semantic-lex-token-stream)))
+ (setq tmp-start semantic-lex-end-point)
+ (goto-char semantic-lex-end-point)
+ ;;(when (> (semantic-elapsed-time starttime (current-time))
+ ;; semantic-lex-timeout)
+ ;; (error "Timeout during lex at char %d" (point)))
+ (semantic-throw-on-input 'lex)
+ (semantic-lex-debug-break (car semantic-lex-token-stream))
+ ))
+ ;; Check that there is no unterminated block.
+ (when semantic-lex-block-stack
+ (let* ((last (pop semantic-lex-block-stack))
+ (blk last))
+ (while blk
+ (message
+ ,(format "%s: `%%s' block from %%S is unterminated" name)
+ (car blk) (cadr blk))
+ (setq blk (pop semantic-lex-block-stack)))
+ (semantic-lex-unterminated-syntax-detected (car last))))
+ ;; Return to where we started.
+ ;; Do not wrap in protective stuff so that if there is an error
+ ;; thrown, the user knows where.
+ (goto-char starting-position)
+ ;; Return the token stream
+ (nreverse semantic-lex-token-stream))))
+\f
+;;; Collapsed block tokens delimited by any tokens.
+;;
+(defun semantic-lex-start-block (syntax)
+ "Mark the last read token as the beginning of a SYNTAX block."
+ (if (or (not semantic-lex-maximum-depth)
+ (< semantic-lex-current-depth semantic-lex-maximum-depth))
+ (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
+ (push (list syntax (car semantic-lex-token-stream))
+ semantic-lex-block-stack)))
+
+(defun semantic-lex-end-block (syntax)
+ "Process the end of a previously marked SYNTAX block.
+That is, collapse the tokens inside that block, including the
+beginning and end of block tokens, into a high level block token of
+class SYNTAX.
+The token at beginning of block is the one marked by a previous call
+to `semantic-lex-start-block'. The current token is the end of block.
+The collapsed tokens are saved in `semantic-lex-block-streams'."
+ (if (null semantic-lex-block-stack)
+ (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
+ (let* ((stream semantic-lex-token-stream)
+ (blk (pop semantic-lex-block-stack))
+ (bstream (cdr blk))
+ (first (car bstream))
+ (last (pop stream)) ;; The current token mark the EOBLK
+ tok)
+ (if (not (eq (car blk) syntax))
+ ;; SYNTAX doesn't match the syntax of the current block in
+ ;; the stack. So we encountered the end of the SYNTAX block
+ ;; before the end of the current one in the stack which is
+ ;; signaled unterminated.
+ (semantic-lex-unterminated-syntax-detected (car blk))
+ ;; Move tokens found inside the block from the main stream
+ ;; into a separate block stream.
+ (while (and stream (not (eq (setq tok (pop stream)) first)))
+ (push tok bstream))
+ ;; The token marked as beginning of block was not encountered.
+ ;; This should not happen!
+ (or (eq tok first)
+ (error "Token %S not found at beginning of block `%s'"
+ first syntax))
+ ;; Save the block stream for future reuse, to avoid to redo
+ ;; the lexical analysis of the block content!
+ ;; Anchor the block stream with its start position, so we can
+ ;; use: (cdr (assq start semantic-lex-block-streams)) to
+ ;; quickly retrieve the lexical stream associated to a block.
+ (setcar blk (semantic-lex-token-start first))
+ (setcdr blk (nreverse bstream))
+ (push blk semantic-lex-block-streams)
+ ;; In the main stream, replace the tokens inside the block by
+ ;; a high level block token of class SYNTAX.
+ (setq semantic-lex-token-stream stream)
+ (semantic-lex-push-token
+ (semantic-lex-token
+ syntax (car blk) (semantic-lex-token-end last)))
+ ))))
+\f
+;;; Lexical token API
+;;
+;; Functions for accessing parts of a token. Use these functions
+;; instead of accessing the list structure directly because the
+;; contents of the lexical may change.
+;;
+(defmacro semantic-lex-token (symbol start end &optional str)
+ "Create a lexical token.
+SYMBOL is a symbol representing the class of syntax found.
+START and END define the bounds of the token in the current buffer.
+Optional STR is the string for the token iff the the bounds
+in the buffer do not cover the string they represent. (As from
+macro expansion.)"
+ ;; This if statement checks the existance of a STR argument at
+ ;; compile time, where STR is some symbol or constant. If the
+ ;; variable STr (runtime) is nil, this will make an incorrect decision.
+ ;;
+ ;; It is like this to maintain the original speed of the compiled
+ ;; code.
+ (if str
+ `(cons ,symbol (cons ,str (cons ,start ,end)))
+ `(cons ,symbol (cons ,start ,end))))
+
+(defun semantic-lex-token-p (thing)
+ "Return non-nil if THING is a semantic lex token.
+This is an exhaustively robust check."
+ (and (consp thing)
+ (symbolp (car thing))
+ (or (and (numberp (nth 1 thing))
+ (numberp (nthcdr 2 thing)))
+ (and (stringp (nth 1 thing))
+ (numberp (nth 2 thing))
+ (numberp (nthcdr 3 thing)))
+ ))
+ )
+
+(defun semantic-lex-token-with-text-p (thing)
+ "Return non-nil if THING is a semantic lex token.
+This is an exhaustively robust check."
+ (and (consp thing)
+ (symbolp (car thing))
+ (= (length thing) 4)
+ (stringp (nth 1 thing))
+ (numberp (nth 2 thing))
+ (numberp (nth 3 thing)))
+ )
+
+(defun semantic-lex-token-without-text-p (thing)
+ "Return non-nil if THING is a semantic lex token.
+This is an exhaustively robust check."
+ (and (consp thing)
+ (symbolp (car thing))
+ (= (length thing) 3)
+ (numberp (nth 1 thing))
+ (numberp (nth 2 thing)))
+ )
+
+(defun semantic-lex-expand-block-specs (specs)
+ "Expand block specifications SPECS into a Lisp form.
+SPECS is a list of (BLOCK BEGIN END) elements where BLOCK, BEGIN, and
+END are token class symbols that indicate to produce one collapsed
+BLOCK token from tokens found between BEGIN and END ones.
+BLOCK must be a non-nil symbol, and at least one of the BEGIN or END
+symbols must be non-nil too.
+When BEGIN is non-nil, generate a call to `semantic-lex-start-block'
+when a BEGIN token class is encountered.
+When END is non-nil, generate a call to `semantic-lex-end-block' when
+an END token class is encountered."
+ (let ((class (make-symbol "class"))
+ (form nil))
+ (dolist (spec specs)
+ (when (car spec)
+ (when (nth 1 spec)
+ (push `((eq ',(nth 1 spec) ,class)
+ (semantic-lex-start-block ',(car spec)))
+ form))
+ (when (nth 2 spec)
+ (push `((eq ',(nth 2 spec) ,class)
+ (semantic-lex-end-block ',(car spec)))
+ form))))
+ (when form
+ `((let ((,class (semantic-lex-token-class
+ (car semantic-lex-token-stream))))
+ (cond ,@(nreverse form))))
+ )))
+
+(defmacro semantic-lex-push-token (token &rest blockspecs)
+ "Push TOKEN in the lexical analyzer token stream.
+Return the lexical analysis current end point.
+If optional arguments BLOCKSPECS is non-nil, it specifies to process
+collapsed block tokens. See `semantic-lex-expand-block-specs' for
+more details.
+This macro should only be called within the bounds of
+`define-lex-analyzer'. It changes the values of the lexical analyzer
+variables `token-stream' and `semantic-lex-end-point'. If you need to
+move `semantic-lex-end-point' somewhere else, just modify this
+variable after calling `semantic-lex-push-token'."
+ `(progn
+ (push ,token semantic-lex-token-stream)
+ ,@(semantic-lex-expand-block-specs blockspecs)
+ (setq semantic-lex-end-point
+ (semantic-lex-token-end (car semantic-lex-token-stream)))
+ ))
+
+(defsubst semantic-lex-token-class (token)
+ "Fetch the class of the lexical token TOKEN.
+See also the function `semantic-lex-token'."
+ (car token))
+
+(defsubst semantic-lex-token-bounds (token)
+ "Fetch the start and end locations of the lexical token TOKEN.
+Return a pair (START . END)."
+ (if (not (numberp (car (cdr token))))
+ (cdr (cdr token))
+ (cdr token)))
+
+(defsubst semantic-lex-token-start (token)
+ "Fetch the start position of the lexical token TOKEN.
+See also the function `semantic-lex-token'."
+ (car (semantic-lex-token-bounds token)))
+
+(defsubst semantic-lex-token-end (token)
+ "Fetch the end position of the lexical token TOKEN.
+See also the function `semantic-lex-token'."
+ (cdr (semantic-lex-token-bounds token)))
+
+(defsubst semantic-lex-token-text (token)
+ "Fetch the text associated with the lexical token TOKEN.
+See also the function `semantic-lex-token'."
+ (if (stringp (car (cdr token)))
+ (car (cdr token))
+ (buffer-substring-no-properties
+ (semantic-lex-token-start token)
+ (semantic-lex-token-end token))))
+
+(defun semantic-lex-init ()
+ "Initialize any lexical state for this buffer."
+ (unless semantic-lex-comment-regex
+ (setq semantic-lex-comment-regex
+ (if comment-start-skip
+ (concat "\\(\\s<\\|" comment-start-skip "\\)")
+ "\\(\\s<\\)")))
+ ;; Setup the lexer syntax-table
+ (setq semantic-lex-syntax-table (copy-syntax-table (syntax-table)))
+ (dolist (mod semantic-lex-syntax-modifications)
+ (modify-syntax-entry
+ (car mod) (nth 1 mod) semantic-lex-syntax-table)))
+
+(define-overloadable-function semantic-lex (start end &optional depth length)
+ "Lexically analyze text in the current buffer between START and END.
+Optional argument DEPTH indicates at what level to scan over entire
+lists. The last argument, LENGTH specifies that `semantic-lex'
+should only return LENGTH tokens. The return value is a token stream.
+Each element is a list, such of the form
+ (symbol start-expression . end-expression)
+where SYMBOL denotes the token type.
+See `semantic-lex-tokens' variable for details on token types. END
+does not mark the end of the text scanned, only the end of the
+beginning of text scanned. Thus, if a string extends past END, the
+end of the return token will be larger than END. To truly restrict
+scanning, use `narrow-to-region'."
+ (funcall semantic-lex-analyzer start end depth length))
+
+(defsubst semantic-lex-buffer (&optional depth)
+ "Lex the current buffer.
+Optional argument DEPTH is the depth to scan into lists."
+ (semantic-lex (point-min) (point-max) depth))
+
+(defsubst semantic-lex-list (semlist depth)
+ "Lex the body of SEMLIST to DEPTH."
+ (semantic-lex (semantic-lex-token-start semlist)
+ (semantic-lex-token-end semlist)
+ depth))
+\f
+;;; Analyzer creation macros
+;;
+;; An individual analyzer is a condition and code that goes with it.
+;;
+;; Created analyzers become variables with the code associated with them
+;; as the symbol value. These analyzers are assembled into a lexer
+;; to create new lexical analyzers.
+;;
+(defsubst semantic-lex-unterminated-syntax-detected (syntax)
+ "Inside a lexical analyzer, use this when unterminated syntax was found.
+Argument SYNTAX indicates the type of syntax that is unterminated.
+The job of this function is to move (point) to a new logical location
+so that analysis can continue, if possible."
+ (goto-char
+ (funcall semantic-lex-unterminated-syntax-end-function
+ syntax
+ (car semantic-lex-analysis-bounds)
+ (cdr semantic-lex-analysis-bounds)
+ ))
+ (setq semantic-lex-end-point (point)))
+
+(defcustom semantic-lex-debug-analyzers nil
+ "Non nil means to debug analyzers with syntax protection.
+Only in effect if `debug-on-error' is also non-nil."
+ :group 'semantic
+ :type 'boolean)
+
+(defmacro semantic-lex-unterminated-syntax-protection (syntax &rest forms)
+ "For SYNTAX, execute FORMS with protection for unterminated syntax.
+If FORMS throws an error, treat this as a syntax problem, and
+execute the unterminated syntax code. FORMS should return a position.
+Irreguardless of an error, the cursor should be moved to the end of
+the desired syntax, and a position returned.
+If `debug-on-error' is set, errors are not caught, so that you can
+debug them.
+Avoid using a large FORMS since it is duplicated."
+ `(if (and debug-on-error semantic-lex-debug-analyzers)
+ (progn ,@forms)
+ (condition-case nil
+ (progn ,@forms)
+ (error
+ (semantic-lex-unterminated-syntax-detected ,syntax)))))
+(put 'semantic-lex-unterminated-syntax-protection
+ 'lisp-indent-function 1)
+
+(defmacro define-lex-analyzer (name doc condition &rest forms)
+ "Create a single lexical analyzer NAME with DOC.
+When an analyzer is called, the current buffer and point are
+positioned in a buffer at the location to be analyzed.
+CONDITION is an expression which returns t if FORMS should be run.
+Within the bounds of CONDITION and FORMS, the use of backquote
+can be used to evaluate expressions at compile time.
+While forms are running, the following variables will be locally bound:
+ `semantic-lex-analysis-bounds' - The bounds of the current analysis.
+ of the form (START . END)
+ `semantic-lex-maximum-depth' - The maximum depth of semantic-list
+ for the current analysis.
+ `semantic-lex-current-depth' - The current depth of `semantic-list' that has
+ been decended.
+ `semantic-lex-end-point' - End Point after match.
+ Analyzers should set this to a buffer location if their
+ match string does not represent the end of the matched text.
+ `semantic-lex-token-stream' - The token list being collected.
+ Add new lexical tokens to this list.
+Proper action in FORMS is to move the value of `semantic-lex-end-point' to
+after the location of the analyzed entry, and to add any discovered tokens
+at the beginning of `semantic-lex-token-stream'.
+This can be done by using `semantic-lex-push-token'."
+ `(eval-and-compile
+ (defvar ,name nil ,doc)
+ (defun ,name nil)
+ ;; Do this part separately so that re-evaluation rebuilds this code.
+ (setq ,name '(,condition ,@forms))
+ ;; Build a single lexical analyzer function, so the doc for
+ ;; function help is automatically provided, and perhaps the
+ ;; function could be useful for testing and debugging one
+ ;; analyzer.
+ (fset ',name (lambda () ,doc
+ (let ((semantic-lex-token-stream nil)
+ (semantic-lex-end-point (point))
+ (semantic-lex-analysis-bounds
+ (cons (point) (point-max)))
+ (semantic-lex-current-depth 0)
+ (semantic-lex-maximum-depth
+ semantic-lex-depth)
+ )
+ (when ,condition ,@forms)
+ semantic-lex-token-stream)))
+ ))
+
+(defmacro define-lex-regex-analyzer (name doc regexp &rest forms)
+ "Create a lexical analyzer with NAME and DOC that will match REGEXP.
+FORMS are evaluated upon a successful match.
+See `define-lex-analyzer' for more about analyzers."
+ `(define-lex-analyzer ,name
+ ,doc
+ (looking-at ,regexp)
+ ,@forms
+ ))
+
+(defmacro define-lex-simple-regex-analyzer (name doc regexp toksym
+ &optional index
+ &rest forms)
+ "Create a lexical analyzer with NAME and DOC that match REGEXP.
+TOKSYM is the symbol to use when creating a semantic lexical token.
+INDEX is the index into the match that defines the bounds of the token.
+Index should be a plain integer, and not specified in the macro as an
+expression.
+FORMS are evaluated upon a successful match BEFORE the new token is
+created. It is valid to ignore FORMS.
+See `define-lex-analyzer' for more about analyzers."
+ `(define-lex-analyzer ,name
+ ,doc
+ (looking-at ,regexp)
+ ,@forms
+ (semantic-lex-push-token
+ (semantic-lex-token ,toksym
+ (match-beginning ,(or index 0))
+ (match-end ,(or index 0))))
+ ))
+
+(defmacro define-lex-block-analyzer (name doc spec1 &rest specs)
+ "Create a lexical analyzer NAME for paired delimiters blocks.
+It detects a paired delimiters block or the corresponding open or
+close delimiter depending on the value of the variable
+`semantic-lex-current-depth'. DOC is the documentation string of the lexical
+analyzer. SPEC1 and SPECS specify the token symbols and open, close
+delimiters used. Each SPEC has the form:
+
+\(BLOCK-SYM (OPEN-DELIM OPEN-SYM) (CLOSE-DELIM CLOSE-SYM))
+
+where BLOCK-SYM is the symbol returned in a block token. OPEN-DELIM
+and CLOSE-DELIM are respectively the open and close delimiters
+identifying a block. OPEN-SYM and CLOSE-SYM are respectively the
+symbols returned in open and close tokens."
+ (let ((specs (cons spec1 specs))
+ spec open olist clist)
+ (while specs
+ (setq spec (car specs)
+ specs (cdr specs)
+ open (nth 1 spec)
+ ;; build alist ((OPEN-DELIM OPEN-SYM BLOCK-SYM) ...)
+ olist (cons (list (car open) (cadr open) (car spec)) olist)
+ ;; build alist ((CLOSE-DELIM CLOSE-SYM) ...)
+ clist (cons (nth 2 spec) clist)))
+ `(define-lex-analyzer ,name
+ ,doc
+ (and
+ (looking-at "\\(\\s(\\|\\s)\\)")
+ (let ((text (match-string 0)) match)
+ (cond
+ ((setq match (assoc text ',olist))
+ (if (or (not semantic-lex-maximum-depth)
+ (< semantic-lex-current-depth semantic-lex-maximum-depth))
+ (progn
+ (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (nth 1 match)
+ (match-beginning 0) (match-end 0))))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (nth 2 match)
+ (match-beginning 0)
+ (save-excursion
+ (semantic-lex-unterminated-syntax-protection (nth 2 match)
+ (forward-list 1)
+ (point)))
+ ))
+ ))
+ ((setq match (assoc text ',clist))
+ (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (nth 1 match)
+ (match-beginning 0) (match-end 0)))))))
+ )))
+\f
+;;; Analyzers
+;;
+;; Pre-defined common analyzers.
+;;
+(define-lex-analyzer semantic-lex-default-action
+ "The default action when no other lexical actions match text.
+This action will just throw an error."
+ t
+ (error "Unmatched Text during Lexical Analysis"))
+
+(define-lex-analyzer semantic-lex-beginning-of-line
+ "Detect and create a beginning of line token (BOL)."
+ (and (bolp)
+ ;; Just insert a (bol N . N) token in the token stream,
+ ;; without moving the point. N is the point at the
+ ;; beginning of line.
+ (semantic-lex-push-token (semantic-lex-token 'bol (point) (point)))
+ nil) ;; CONTINUE
+ ;; We identify and add the BOL token onto the stream, but since
+ ;; semantic-lex-end-point doesn't move, we always fail CONDITION, and have no
+ ;; FORMS body.
+ nil)
+
+(define-lex-simple-regex-analyzer semantic-lex-newline
+ "Detect and create newline tokens."
+ "\\s-*\\(\n\\|\\s>\\)" 'newline 1)
+
+(define-lex-regex-analyzer semantic-lex-newline-as-whitespace
+ "Detect and create newline tokens.
+Use this ONLY if newlines are not whitespace characters (such as when
+they are comment end characters) AND when you want whitespace tokens."
+ "\\s-*\\(\n\\|\\s>\\)"
+ ;; Language wants whitespaces. Create a token for it.
+ (if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
+ 'whitespace)
+ ;; Merge whitespace tokens together if they are adjacent. Two
+ ;; whitespace tokens may be sperated by a comment which is not in
+ ;; the token stream.
+ (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
+ (match-end 0))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'whitespace (match-beginning 0) (match-end 0)))))
+
+(define-lex-regex-analyzer semantic-lex-ignore-newline
+ "Detect and ignore newline tokens.
+Use this ONLY if newlines are not whitespace characters (such as when
+they are comment end characters)."
+ "\\s-*\\(\n\\|\\s>\\)"
+ (setq semantic-lex-end-point (match-end 0)))
+
+(define-lex-regex-analyzer semantic-lex-whitespace
+ "Detect and create whitespace tokens."
+ ;; catch whitespace when needed
+ "\\s-+"
+ ;; Language wants whitespaces. Create a token for it.
+ (if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
+ 'whitespace)
+ ;; Merge whitespace tokens together if they are adjacent. Two
+ ;; whitespace tokens may be sperated by a comment which is not in
+ ;; the token stream.
+ (progn
+ (setq semantic-lex-end-point (match-end 0))
+ (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
+ semantic-lex-end-point))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'whitespace (match-beginning 0) (match-end 0)))))
+
+(define-lex-regex-analyzer semantic-lex-ignore-whitespace
+ "Detect and skip over whitespace tokens."
+ ;; catch whitespace when needed
+ "\\s-+"
+ ;; Skip over the detected whitespace, do not create a token for it.
+ (setq semantic-lex-end-point (match-end 0)))
+
+(define-lex-simple-regex-analyzer semantic-lex-number
+ "Detect and create number tokens.
+See `semantic-lex-number-expression' for details on matching numbers,
+and number formats."
+ semantic-lex-number-expression 'number)
+
+(define-lex-regex-analyzer semantic-lex-symbol-or-keyword
+ "Detect and create symbol and keyword tokens."
+ "\\(\\sw\\|\\s_\\)+"
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (or (semantic-lex-keyword-p (match-string 0)) 'symbol)
+ (match-beginning 0) (match-end 0))))
+
+(define-lex-simple-regex-analyzer semantic-lex-charquote
+ "Detect and create charquote tokens."
+ ;; Character quoting characters (ie, \n as newline)
+ "\\s\\+" 'charquote)
+
+(define-lex-simple-regex-analyzer semantic-lex-punctuation
+ "Detect and create punctuation tokens."
+ "\\(\\s.\\|\\s$\\|\\s'\\)" 'punctuation)
+
+(define-lex-analyzer semantic-lex-punctuation-type
+ "Detect and create a punctuation type token.
+Recognized punctuations are defined in the current table of lexical
+types, as the value of the `punctuation' token type."
+ (and (looking-at "\\(\\s.\\|\\s$\\|\\s'\\)+")
+ (let* ((key (match-string 0))
+ (pos (match-beginning 0))
+ (end (match-end 0))
+ (len (- end pos))
+ (lst (semantic-lex-type-value "punctuation" t))
+ (def (car lst)) ;; default lexical symbol or nil
+ (lst (cdr lst)) ;; alist of (LEX-SYM . PUNCT-STRING)
+ (elt nil))
+ (if lst
+ ;; Starting with the longest one, search if the
+ ;; punctuation string is defined for this language.
+ (while (and (> len 0) (not (setq elt (rassoc key lst))))
+ (setq len (1- len)
+ key (substring key 0 len))))
+ (if elt ;; Return the punctuation token found
+ (semantic-lex-push-token
+ (semantic-lex-token (car elt) pos (+ pos len)))
+ (if def ;; Return a default generic token
+ (semantic-lex-push-token
+ (semantic-lex-token def pos end))
+ ;; Nothing match
+ )))))
+
+(define-lex-regex-analyzer semantic-lex-paren-or-list
+ "Detect open parenthesis.
+Return either a paren token or a semantic list token depending on
+`semantic-lex-current-depth'."
+ "\\s("
+ (if (or (not semantic-lex-maximum-depth)
+ (< semantic-lex-current-depth semantic-lex-maximum-depth))
+ (progn
+ (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'open-paren (match-beginning 0) (match-end 0))))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'semantic-list (match-beginning 0)
+ (save-excursion
+ (semantic-lex-unterminated-syntax-protection 'semantic-list
+ (forward-list 1)
+ (point))
+ )))
+ ))
+
+(define-lex-simple-regex-analyzer semantic-lex-open-paren
+ "Detect and create an open parenthisis token."
+ "\\s(" 'open-paren 0 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth)))
+
+(define-lex-simple-regex-analyzer semantic-lex-close-paren
+ "Detect and create a close paren token."
+ "\\s)" 'close-paren 0 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)))
+
+(define-lex-regex-analyzer semantic-lex-string
+ "Detect and create a string token."
+ "\\s\""
+ ;; Zing to the end of this string.
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'string (point)
+ (save-excursion
+ (semantic-lex-unterminated-syntax-protection 'string
+ (forward-sexp 1)
+ (point))
+ ))))
+
+(define-lex-regex-analyzer semantic-lex-comments
+ "Detect and create a comment token."
+ semantic-lex-comment-regex
+ (save-excursion
+ (forward-comment 1)
+ ;; Generate newline token if enabled
+ (if (bolp) (backward-char 1))
+ (setq semantic-lex-end-point (point))
+ ;; Language wants comments or want them as whitespaces,
+ ;; link them together.
+ (if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'comment)
+ (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
+ semantic-lex-end-point)
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'comment (match-beginning 0) semantic-lex-end-point)))))
+
+(define-lex-regex-analyzer semantic-lex-comments-as-whitespace
+ "Detect comments and create a whitespace token."
+ semantic-lex-comment-regex
+ (save-excursion
+ (forward-comment 1)
+ ;; Generate newline token if enabled
+ (if (bolp) (backward-char 1))
+ (setq semantic-lex-end-point (point))
+ ;; Language wants comments or want them as whitespaces,
+ ;; link them together.
+ (if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'whitespace)
+ (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
+ semantic-lex-end-point)
+ (semantic-lex-push-token
+ (semantic-lex-token
+ 'whitespace (match-beginning 0) semantic-lex-end-point)))))
+
+(define-lex-regex-analyzer semantic-lex-ignore-comments
+ "Detect and create a comment token."
+ semantic-lex-comment-regex
+ (let ((comment-start-point (point)))
+ (forward-comment 1)
+ (if (eq (point) comment-start-point)
+ ;; In this case our start-skip string failed
+ ;; to work properly. Lets try and move over
+ ;; whatever white space we matched to begin
+ ;; with.
+ (skip-syntax-forward "-.'"
+ (save-excursion
+ (end-of-line)
+ (point)))
+ ;; We may need to back up so newlines or whitespace is generated.
+ (if (bolp)
+ (backward-char 1)))
+ (if (eq (point) comment-start-point)
+ (error "Strange comment syntax prevents lexical analysis"))
+ (setq semantic-lex-end-point (point))))
+\f
+;;; Comment lexer
+;;
+;; Predefined lexers that could be used instead of creating new
+;; analyers.
+
+(define-lex semantic-comment-lexer
+ "A simple lexical analyzer that handles comments.
+This lexer will only return comment tokens. It is the default lexer
+used by `semantic-find-doc-snarf-comment' to snarf up the comment at
+point."
+ semantic-lex-ignore-whitespace
+ semantic-lex-ignore-newline
+ semantic-lex-comments
+ semantic-lex-default-action)
+
+;;; Test Lexer
+;;
+(define-lex semantic-simple-lexer
+ "A simple lexical analyzer that handles simple buffers.
+This lexer ignores comments and whitespace, and will return
+syntax as specified by the syntax table."
+ semantic-lex-ignore-whitespace
+ semantic-lex-ignore-newline
+ semantic-lex-number
+ semantic-lex-symbol-or-keyword
+ semantic-lex-charquote
+ semantic-lex-paren-or-list
+ semantic-lex-close-paren
+ semantic-lex-string
+ semantic-lex-ignore-comments
+ semantic-lex-punctuation
+ semantic-lex-default-action)
+\f
+;;; Analyzers generated from grammar.
+;;
+;; Some analyzers are hand written. Analyzers created with these
+;; functions are generated from the grammar files.
+
+(defmacro define-lex-keyword-type-analyzer (name doc syntax)
+ "Define a keyword type analyzer NAME with DOC string.
+SYNTAX is the regexp that matches a keyword syntactic expression."
+ (let ((key (make-symbol "key")))
+ `(define-lex-analyzer ,name
+ ,doc
+ (and (looking-at ,syntax)
+ (let ((,key (semantic-lex-keyword-p (match-string 0))))
+ (when ,key
+ (semantic-lex-push-token
+ (semantic-lex-token
+ ,key (match-beginning 0) (match-end 0)))))))
+ ))
+
+(defmacro define-lex-sexp-type-analyzer (name doc syntax token)
+ "Define a sexp type analyzer NAME with DOC string.
+SYNTAX is the regexp that matches the beginning of the s-expression.
+TOKEN is the lexical token returned when SYNTAX matches."
+ `(define-lex-regex-analyzer ,name
+ ,doc
+ ,syntax
+ (semantic-lex-push-token
+ (semantic-lex-token
+ ,token (point)
+ (save-excursion
+ (semantic-lex-unterminated-syntax-protection ,token
+ (forward-sexp 1)
+ (point))))))
+ )
+
+(defmacro define-lex-regex-type-analyzer (name doc syntax matches default)
+ "Define a regexp type analyzer NAME with DOC string.
+SYNTAX is the regexp that matches a syntactic expression.
+MATCHES is an alist of lexical elements used to refine the syntactic
+expression.
+DEFAULT is the default lexical token returned when no MATCHES."
+ (if matches
+ (let* ((val (make-symbol "val"))
+ (lst (make-symbol "lst"))
+ (elt (make-symbol "elt"))
+ (pos (make-symbol "pos"))
+ (end (make-symbol "end")))
+ `(define-lex-analyzer ,name
+ ,doc
+ (and (looking-at ,syntax)
+ (let* ((,val (match-string 0))
+ (,pos (match-beginning 0))
+ (,end (match-end 0))
+ (,lst ,matches)
+ ,elt)
+ (while (and ,lst (not ,elt))
+ (if (string-match (cdar ,lst) ,val)
+ (setq ,elt (caar ,lst))
+ (setq ,lst (cdr ,lst))))
+ (semantic-lex-push-token
+ (semantic-lex-token (or ,elt ,default) ,pos ,end))))
+ ))
+ `(define-lex-simple-regex-analyzer ,name
+ ,doc
+ ,syntax ,default)
+ ))
+
+(defmacro define-lex-string-type-analyzer (name doc syntax matches default)
+ "Define a string type analyzer NAME with DOC string.
+SYNTAX is the regexp that matches a syntactic expression.
+MATCHES is an alist of lexical elements used to refine the syntactic
+expression.
+DEFAULT is the default lexical token returned when no MATCHES."
+ (if matches
+ (let* ((val (make-symbol "val"))
+ (lst (make-symbol "lst"))
+ (elt (make-symbol "elt"))
+ (pos (make-symbol "pos"))
+ (end (make-symbol "end"))
+ (len (make-symbol "len")))
+ `(define-lex-analyzer ,name
+ ,doc
+ (and (looking-at ,syntax)
+ (let* ((,val (match-string 0))
+ (,pos (match-beginning 0))
+ (,end (match-end 0))
+ (,len (- ,end ,pos))
+ (,lst ,matches)
+ ,elt)
+ ;; Starting with the longest one, search if a lexical
+ ;; value match a token defined for this language.
+ (while (and (> ,len 0) (not (setq ,elt (rassoc ,val ,lst))))
+ (setq ,len (1- ,len)
+ ,val (substring ,val 0 ,len)))
+ (when ,elt ;; Adjust token end position.
+ (setq ,elt (car ,elt)
+ ,end (+ ,pos ,len)))
+ (semantic-lex-push-token
+ (semantic-lex-token (or ,elt ,default) ,pos ,end))))
+ ))
+ `(define-lex-simple-regex-analyzer ,name
+ ,doc
+ ,syntax ,default)
+ ))
+
+(defmacro define-lex-block-type-analyzer (name doc syntax matches)
+ "Define a block type analyzer NAME with DOC string.
+
+SYNTAX is the regexp that matches block delimiters, typically the
+open (`\\\\s(') and close (`\\\\s)') parenthesis syntax classes.
+
+MATCHES is a pair (OPEN-SPECS . CLOSE-SPECS) that defines blocks.
+
+ OPEN-SPECS is a list of (OPEN-DELIM OPEN-TOKEN BLOCK-TOKEN) elements
+ where:
+
+ OPEN-DELIM is a string: the block open delimiter character.
+
+ OPEN-TOKEN is the lexical token class associated to the OPEN-DELIM
+ delimiter.
+
+ BLOCK-TOKEN is the lexical token class associated to the block
+ that starts at the OPEN-DELIM delimiter.
+
+ CLOSE-SPECS is a list of (CLOSE-DELIM CLOSE-TOKEN) elements where:
+
+ CLOSE-DELIM is a string: the block end delimiter character.
+
+ CLOSE-TOKEN is the lexical token class associated to the
+ CLOSE-DELIM delimiter.
+
+Each element in OPEN-SPECS must have a corresponding element in
+CLOSE-SPECS.
+
+The lexer will return a BLOCK-TOKEN token when the value of
+`semantic-lex-current-depth' is greater than or equal to the maximum
+depth of parenthesis tracking (see also the function `semantic-lex').
+Otherwise it will return OPEN-TOKEN and CLOSE-TOKEN tokens.
+
+TO DO: Put the following in the developer's guide and just put a
+reference here.
+
+In the grammar:
+
+The value of a block token must be a string that contains a readable
+sexp of the form:
+
+ \"(OPEN-TOKEN CLOSE-TOKEN)\"
+
+OPEN-TOKEN and CLOSE-TOKEN represent the block delimiters, and must be
+lexical tokens of respectively `open-paren' and `close-paren' types.
+Their value is the corresponding delimiter character as a string.
+
+Here is a small example to analyze a parenthesis block:
+
+ %token <block> PAREN_BLOCK \"(LPAREN RPAREN)\"
+ %token <open-paren> LPAREN \"(\"
+ %token <close-paren> RPAREN \")\"
+
+When the lexer encounters the open-paren delimiter \"(\":
+
+ - If the maximum depth of parenthesis tracking is not reached (that
+ is, current depth < max depth), it returns a (LPAREN start . end)
+ token, then continue analysis inside the block. Later, when the
+ corresponding close-paren delimiter \")\" will be encountered, it
+ will return a (RPAREN start . end) token.
+
+ - If the maximum depth of parenthesis tracking is reached (current
+ depth >= max depth), it returns the whole parenthesis block as
+ a (PAREN_BLOCK start . end) token."
+ (let* ((val (make-symbol "val"))
+ (lst (make-symbol "lst"))
+ (elt (make-symbol "elt")))
+ `(define-lex-analyzer ,name
+ ,doc
+ (and
+ (looking-at ,syntax) ;; "\\(\\s(\\|\\s)\\)"
+ (let ((,val (match-string 0))
+ (,lst ,matches)
+ ,elt)
+ (cond
+ ((setq ,elt (assoc ,val (car ,lst)))
+ (if (or (not semantic-lex-maximum-depth)
+ (< semantic-lex-current-depth semantic-lex-maximum-depth))
+ (progn
+ (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (nth 1 ,elt)
+ (match-beginning 0) (match-end 0))))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (nth 2 ,elt)
+ (match-beginning 0)
+ (save-excursion
+ (semantic-lex-unterminated-syntax-protection (nth 2 ,elt)
+ (forward-list 1)
+ (point)))))))
+ ((setq ,elt (assoc ,val (cdr ,lst)))
+ (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
+ (semantic-lex-push-token
+ (semantic-lex-token
+ (nth 1 ,elt)
+ (match-beginning 0) (match-end 0))))
+ ))))
+ ))
+\f
+;;; Lexical Safety
+;;
+;; The semantic lexers, unlike other lexers, can throw errors on
+;; unbalanced syntax. Since editing is all about changeging test
+;; we need to provide a convenient way to protect against syntactic
+;; inequalities.
+
+(defmacro semantic-lex-catch-errors (symbol &rest forms)
+ "Using SYMBOL, execute FORMS catching lexical errors.
+If FORMS results in a call to the parser that throws a lexical error,
+the error will be caught here without the buffer's cache being thrown
+out of date.
+If there is an error, the syntax that failed is returned.
+If there is no error, then the last value of FORMS is returned."
+ (let ((ret (make-symbol "ret"))
+ (syntax (make-symbol "syntax"))
+ (start (make-symbol "start"))
+ (end (make-symbol "end")))
+ `(let* ((semantic-lex-unterminated-syntax-end-function
+ (lambda (,syntax ,start ,end)
+ (throw ',symbol ,syntax)))
+ ;; Delete the below when semantic-flex is fully retired.
+ (semantic-flex-unterminated-syntax-end-function
+ semantic-lex-unterminated-syntax-end-function)
+ (,ret (catch ',symbol
+ (save-excursion
+ ,@forms
+ nil))))
+ ;; Great Sadness. Assume that FORMS execute within the
+ ;; confines of the current buffer only! Mark this thing
+ ;; unparseable iff the special symbol was thrown. This
+ ;; will prevent future calls from parsing, but will allow
+ ;; then to still return the cache.
+ (when ,ret
+ ;; Leave this message off. If an APP using this fcn wants
+ ;; a message, they can do it themselves. This cleans up
+ ;; problems with the idle scheduler obscuring useful data.
+ ;;(message "Buffer not currently parsable (%S)." ,ret)
+ (semantic-parse-tree-unparseable))
+ ,ret)))
+(put 'semantic-lex-catch-errors 'lisp-indent-function 1)
+
+\f
+;;; Interfacing with edebug
+;;
+(add-hook
+ 'edebug-setup-hook
+ #'(lambda ()
+
+ (def-edebug-spec define-lex
+ (&define name stringp (&rest symbolp))
+ )
+ (def-edebug-spec define-lex-analyzer
+ (&define name stringp form def-body)
+ )
+ (def-edebug-spec define-lex-regex-analyzer
+ (&define name stringp form def-body)
+ )
+ (def-edebug-spec define-lex-simple-regex-analyzer
+ (&define name stringp form symbolp [ &optional form ] def-body)
+ )
+ (def-edebug-spec define-lex-block-analyzer
+ (&define name stringp form (&rest form))
+ )
+ (def-edebug-spec semantic-lex-catch-errors
+ (symbolp def-body)
+ )
+
+ ))
+\f
+;;; Compatibility with Semantic 1.x lexical analysis
+;;
+;; NOTE: DELETE THIS SOMEDAY SOON
+
+(semantic-alias-obsolete 'semantic-flex-start 'semantic-lex-token-start)
+(semantic-alias-obsolete 'semantic-flex-end 'semantic-lex-token-end)
+(semantic-alias-obsolete 'semantic-flex-text 'semantic-lex-token-text)
+(semantic-alias-obsolete 'semantic-flex-make-keyword-table 'semantic-lex-make-keyword-table)
+(semantic-alias-obsolete 'semantic-flex-keyword-p 'semantic-lex-keyword-p)
+(semantic-alias-obsolete 'semantic-flex-keyword-put 'semantic-lex-keyword-put)
+(semantic-alias-obsolete 'semantic-flex-keyword-get 'semantic-lex-keyword-get)
+(semantic-alias-obsolete 'semantic-flex-map-keywords 'semantic-lex-map-keywords)
+(semantic-alias-obsolete 'semantic-flex-keywords 'semantic-lex-keywords)
+(semantic-alias-obsolete 'semantic-flex-buffer 'semantic-lex-buffer)
+(semantic-alias-obsolete 'semantic-flex-list 'semantic-lex-list)
+
+;; This simple scanner uses the syntax table to generate a stream of
+;; simple tokens of the form:
+;;
+;; (SYMBOL START . END)
+;;
+;; Where symbol is the type of thing it is. START and END mark that
+;; objects boundary.
+
+(defvar semantic-flex-tokens semantic-lex-tokens
+ "An alist of of semantic token types.
+See variable `semantic-lex-tokens'.")
+
+(defvar semantic-flex-unterminated-syntax-end-function
+ (lambda (syntax syntax-start flex-end) flex-end)
+ "Function called when unterminated syntax is encountered.
+This should be set to one function. That function should take three
+parameters. The SYNTAX, or type of syntax which is unterminated.
+SYNTAX-START where the broken syntax begins.
+FLEX-END is where the lexical analysis was asked to end.
+This function can be used for languages that can intelligently fix up
+broken syntax, or the exit lexical analysis via `throw' or `signal'
+when finding unterminated syntax.")
+
+(defvar semantic-flex-extensions nil
+ "Buffer local extensions to the lexical analyzer.
+This should contain an alist with a key of a regex and a data element of
+a function. The function should both move point, and return a lexical
+token of the form:
+ ( TYPE START . END)
+nil is also a valid return value.
+TYPE can be any type of symbol, as long as it doesn't occur as a
+nonterminal in the language definition.")
+(make-variable-buffer-local 'semantic-flex-extensions)
+
+(defvar semantic-flex-syntax-modifications nil
+ "Changes to the syntax table for this buffer.
+These changes are active only while the buffer is being flexed.
+This is a list where each element has the form:
+ (CHAR CLASS)
+CHAR is the char passed to `modify-syntax-entry',
+and CLASS is the string also passed to `modify-syntax-entry' to define
+what syntax class CHAR has.")
+(make-variable-buffer-local 'semantic-flex-syntax-modifications)
+
+(defvar semantic-ignore-comments t
+ "Default comment handling.
+t means to strip comments when flexing. Nil means to keep comments
+as part of the token stream.")
+(make-variable-buffer-local 'semantic-ignore-comments)
+
+(defvar semantic-flex-enable-newlines nil
+ "When flexing, report 'newlines as syntactic elements.
+Useful for languages where the newline is a special case terminator.
+Only set this on a per mode basis, not globally.")
+(make-variable-buffer-local 'semantic-flex-enable-newlines)
+
+(defvar semantic-flex-enable-whitespace nil
+ "When flexing, report 'whitespace as syntactic elements.
+Useful for languages where the syntax is whitespace dependent.
+Only set this on a per mode basis, not globally.")
+(make-variable-buffer-local 'semantic-flex-enable-whitespace)
+
+(defvar semantic-flex-enable-bol nil
+ "When flexing, report beginning of lines as syntactic elements.
+Useful for languages like python which are indentation sensitive.
+Only set this on a per mode basis, not globally.")
+(make-variable-buffer-local 'semantic-flex-enable-bol)
+
+(defvar semantic-number-expression semantic-lex-number-expression
+ "See variable `semantic-lex-number-expression'.")
+(make-variable-buffer-local 'semantic-number-expression)
+
+(defvar semantic-flex-depth 0
+ "Default flexing depth.
+This specifies how many lists to create tokens in.")
+(make-variable-buffer-local 'semantic-flex-depth)
+
+(defun semantic-flex (start end &optional depth length)
+ "Using the syntax table, do something roughly equivalent to flex.
+Semantically check between START and END. Optional argument DEPTH
+indicates at what level to scan over entire lists.
+The return value is a token stream. Each element is a list, such of
+the form (symbol start-expression . end-expression) where SYMBOL
+denotes the token type.
+See `semantic-flex-tokens' variable for details on token types.
+END does not mark the end of the text scanned, only the end of the
+beginning of text scanned. Thus, if a string extends past END, the
+end of the return token will be larger than END. To truly restrict
+scanning, use `narrow-to-region'.
+The last argument, LENGTH specifies that `semantic-flex' should only
+return LENGTH tokens."
+ (message "`semantic-flex' is an obsolete function. Use `define-lex' to create lexers.")
+ (if (not semantic-flex-keywords-obarray)
+ (setq semantic-flex-keywords-obarray [ nil ]))
+ (let ((ts nil)
+ (pos (point))
+ (ep nil)
+ (curdepth 0)
+ (cs (if comment-start-skip
+ (concat "\\(\\s<\\|" comment-start-skip "\\)")
+ (concat "\\(\\s<\\)")))
+ (newsyntax (copy-syntax-table (syntax-table)))
+ (mods semantic-flex-syntax-modifications)
+ ;; Use the default depth if it is not specified.
+ (depth (or depth semantic-flex-depth)))
+ ;; Update the syntax table
+ (while mods
+ (modify-syntax-entry (car (car mods)) (car (cdr (car mods))) newsyntax)
+ (setq mods (cdr mods)))
+ (with-syntax-table newsyntax
+ (goto-char start)
+ (while (and (< (point) end) (or (not length) (<= (length ts) length)))
+ (cond
+ ;; catch beginning of lines when needed.
+ ;; Must be done before catching any other tokens!
+ ((and semantic-flex-enable-bol
+ (bolp)
+ ;; Just insert a (bol N . N) token in the token stream,
+ ;; without moving the point. N is the point at the
+ ;; beginning of line.
+ (setq ts (cons (cons 'bol (cons (point) (point))) ts))
+ nil)) ;; CONTINUE
+ ;; special extensions, includes whitespace, nl, etc.
+ ((and semantic-flex-extensions
+ (let ((fe semantic-flex-extensions)
+ (r nil))
+ (while fe
+ (if (looking-at (car (car fe)))
+ (setq ts (cons (funcall (cdr (car fe))) ts)
+ r t
+ fe nil
+ ep (point)))
+ (setq fe (cdr fe)))
+ (if (and r (not (car ts))) (setq ts (cdr ts)))
+ r)))
+ ;; catch newlines when needed
+ ((looking-at "\\s-*\\(\n\\|\\s>\\)")
+ (if semantic-flex-enable-newlines
+ (setq ep (match-end 1)
+ ts (cons (cons 'newline
+ (cons (match-beginning 1) ep))
+ ts))))
+ ;; catch whitespace when needed
+ ((looking-at "\\s-+")
+ (if semantic-flex-enable-whitespace
+ ;; Language wants whitespaces, link them together.
+ (if (eq (car (car ts)) 'whitespace)
+ (setcdr (cdr (car ts)) (match-end 0))
+ (setq ts (cons (cons 'whitespace
+ (cons (match-beginning 0)
+ (match-end 0)))
+ ts)))))
+ ;; numbers
+ ((and semantic-number-expression
+ (looking-at semantic-number-expression))
+ (setq ts (cons (cons 'number
+ (cons (match-beginning 0)
+ (match-end 0)))
+ ts)))
+ ;; symbols
+ ((looking-at "\\(\\sw\\|\\s_\\)+")
+ (setq ts (cons (cons
+ ;; Get info on if this is a keyword or not
+ (or (semantic-flex-keyword-p (match-string 0))
+ 'symbol)
+ (cons (match-beginning 0) (match-end 0)))
+ ts)))
+ ;; Character quoting characters (ie, \n as newline)
+ ((looking-at "\\s\\+")
+ (setq ts (cons (cons 'charquote
+ (cons (match-beginning 0) (match-end 0)))
+ ts)))
+ ;; Open parens, or semantic-lists.
+ ((looking-at "\\s(")
+ (if (or (not depth) (< curdepth depth))
+ (progn
+ (setq curdepth (1+ curdepth))
+ (setq ts (cons (cons 'open-paren
+ (cons (match-beginning 0) (match-end 0)))
+ ts)))
+ (setq ts (cons
+ (cons 'semantic-list
+ (cons (match-beginning 0)
+ (save-excursion
+ (condition-case nil
+ (forward-list 1)
+ ;; This case makes flex robust
+ ;; to broken lists.
+ (error
+ (goto-char
+ (funcall
+ semantic-flex-unterminated-syntax-end-function
+ 'semantic-list
+ start end))))
+ (setq ep (point)))))
+ ts))))
+ ;; Close parens
+ ((looking-at "\\s)")
+ (setq ts (cons (cons 'close-paren
+ (cons (match-beginning 0) (match-end 0)))
+ ts))
+ (setq curdepth (1- curdepth)))
+ ;; String initiators
+ ((looking-at "\\s\"")
+ ;; Zing to the end of this string.
+ (setq ts (cons (cons 'string
+ (cons (match-beginning 0)
+ (save-excursion
+ (condition-case nil
+ (forward-sexp 1)
+ ;; This case makes flex
+ ;; robust to broken strings.
+ (error
+ (goto-char
+ (funcall
+ semantic-flex-unterminated-syntax-end-function
+ 'string
+ start end))))
+ (setq ep (point)))))
+ ts)))
+ ;; comments
+ ((looking-at cs)
+ (if (and semantic-ignore-comments
+ (not semantic-flex-enable-whitespace))
+ ;; If the language doesn't deal with comments nor
+ ;; whitespaces, ignore them here.
+ (let ((comment-start-point (point)))
+ (forward-comment 1)
+ (if (eq (point) comment-start-point)
+ ;; In this case our start-skip string failed
+ ;; to work properly. Lets try and move over
+ ;; whatever white space we matched to begin
+ ;; with.
+ (skip-syntax-forward "-.'"
+ (save-excursion
+ (end-of-line)
+ (point)))
+ ;;(forward-comment 1)
+ ;; Generate newline token if enabled
+ (if (and semantic-flex-enable-newlines
+ (bolp))
+ (backward-char 1)))
+ (if (eq (point) comment-start-point)
+ (error "Strange comment syntax prevents lexical analysis"))
+ (setq ep (point)))
+ (let ((tk (if semantic-ignore-comments 'whitespace 'comment)))
+ (save-excursion
+ (forward-comment 1)
+ ;; Generate newline token if enabled
+ (if (and semantic-flex-enable-newlines
+ (bolp))
+ (backward-char 1))
+ (setq ep (point)))
+ ;; Language wants comments or want them as whitespaces,
+ ;; link them together.
+ (if (eq (car (car ts)) tk)
+ (setcdr (cdr (car ts)) ep)
+ (setq ts (cons (cons tk (cons (match-beginning 0) ep))
+ ts))))))
+ ;; punctuation
+ ((looking-at "\\(\\s.\\|\\s$\\|\\s'\\)")
+ (setq ts (cons (cons 'punctuation
+ (cons (match-beginning 0) (match-end 0)))
+ ts)))
+ ;; unknown token
+ (t
+ (error "What is that?")))
+ (goto-char (or ep (match-end 0)))
+ (setq ep nil)))
+ ;; maybe catch the last beginning of line when needed
+ (and semantic-flex-enable-bol
+ (= (point) end)
+ (bolp)
+ (setq ts (cons (cons 'bol (cons (point) (point))) ts)))
+ (goto-char pos)
+ ;;(message "Flexing muscles...done")
+ (nreverse ts)))
+
+(provide 'semantic/lex)
+
+;;; semantic-lex.el ends here