From: Mattias EngdegÄrd Date: Mon, 8 May 2023 16:38:33 +0000 (+0200) Subject: Speed up skip-chars-{forward|reverse} with char classes X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=56468b52b2355a00c1dff2137c54136dbb031922;p=emacs.git Speed up skip-chars-{forward|reverse} with char classes * src/regex-emacs.h (re_wctype_t): Add RECC_NUM_CLASSES. * src/syntax.c (skip_chars, in_classes): Use an array on the stack instead of a Lisp list for storing character classes. Don't check all classes if there is a match in one. Remove useless handle_iso_classes argument. --- diff --git a/src/regex-emacs.h b/src/regex-emacs.h index 1bc973363e9..bc357633135 100644 --- a/src/regex-emacs.h +++ b/src/regex-emacs.h @@ -187,7 +187,8 @@ typedef enum { RECC_ERROR = 0, RECC_DIGIT, RECC_XDIGIT, RECC_BLANK, RECC_SPACE, RECC_MULTIBYTE, RECC_NONASCII, - RECC_ASCII, RECC_UNIBYTE + RECC_ASCII, RECC_UNIBYTE, + RECC_NUM_CLASSES = RECC_UNIBYTE } re_wctype_t; extern bool re_iswctype (int ch, re_wctype_t cc); diff --git a/src/syntax.c b/src/syntax.c index e9e04e2d638..839ab36bb2f 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -178,14 +178,14 @@ static ptrdiff_t find_start_begv; static modiff_count find_start_modiff; -static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool); +static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object); static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object); static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool); static void scan_sexps_forward (struct lisp_parse_state *, ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT, bool, int); static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *); -static bool in_classes (int, Lisp_Object); +static bool in_classes (int c, int num_classes, const unsigned char *classes); static void parse_sexp_propertize (ptrdiff_t charpos); /* This setter is used only in this file, so it can be private. */ @@ -1607,7 +1607,7 @@ Char classes, e.g. `[:alpha:]', are supported. Returns the distance traveled, either zero or positive. */) (Lisp_Object string, Lisp_Object lim) { - return skip_chars (1, string, lim, 1); + return skip_chars (1, string, lim); } DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0, @@ -1616,7 +1616,7 @@ See `skip-chars-forward' for details. Returns the distance traveled, either zero or negative. */) (Lisp_Object string, Lisp_Object lim) { - return skip_chars (0, string, lim, 1); + return skip_chars (0, string, lim); } DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0, @@ -1643,8 +1643,7 @@ of this is the distance traveled. */) } static Lisp_Object -skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, - bool handle_iso_classes) +skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim) { int c; char fastmap[0400]; @@ -1661,11 +1660,9 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, ptrdiff_t size_byte; const unsigned char *str; int len; - Lisp_Object iso_classes; USE_SAFE_ALLOCA; CHECK_STRING (string); - iso_classes = Qnil; if (NILP (lim)) XSETINT (lim, forwardp ? ZV : BEGV); @@ -1700,6 +1697,8 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, If STRING contains non-ASCII characters, setup char_ranges for them and use fastmap only for their leading codes. */ + int nclasses = 0; + unsigned char classes[RECC_NUM_CLASSES]; if (! string_multibyte) { bool string_has_eight_bit = 0; @@ -1707,18 +1706,16 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, /* At first setup fastmap. */ while (i_byte < size_byte) { - if (handle_iso_classes) + const unsigned char *ch = str + i_byte; + re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte); + if (cc == 0) + error ("Invalid ISO C character class"); + if (cc != -1) { - const unsigned char *ch = str + i_byte; - re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte); - if (cc == 0) - error ("Invalid ISO C character class"); - if (cc != -1) - { - iso_classes = Fcons (make_fixnum (cc), iso_classes); - i_byte = ch - str; - continue; - } + if (!(nclasses && memchr (classes, cc, nclasses))) + classes[nclasses++] = cc; + i_byte = ch - str; + continue; } c = str[i_byte++]; @@ -1803,18 +1800,16 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, { int leading_code = str[i_byte]; - if (handle_iso_classes) + const unsigned char *ch = str + i_byte; + re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte); + if (cc == 0) + error ("Invalid ISO C character class"); + if (cc != -1) { - const unsigned char *ch = str + i_byte; - re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte); - if (cc == 0) - error ("Invalid ISO C character class"); - if (cc != -1) - { - iso_classes = Fcons (make_fixnum (cc), iso_classes); - i_byte = ch - str; - continue; - } + if (!(nclasses && memchr (classes, cc, nclasses))) + classes[nclasses++] = cc; + i_byte = ch - str; + continue; } if (leading_code== '\\') @@ -1960,7 +1955,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, stop = endp; } c = string_char_and_length (p, &nbytes); - if (! NILP (iso_classes) && in_classes (c, iso_classes)) + if (nclasses && in_classes (c, nclasses, classes)) { if (negate) break; @@ -2001,7 +1996,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, stop = endp; } - if (!NILP (iso_classes) && in_classes (*p, iso_classes)) + if (nclasses && in_classes (*p, nclasses, classes)) { if (negate) break; @@ -2035,7 +2030,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, c = STRING_CHAR (p); - if (! NILP (iso_classes) && in_classes (c, iso_classes)) + if (nclasses && in_classes (c, nclasses, classes)) { if (negate) break; @@ -2069,7 +2064,7 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim, stop = endp; } - if (! NILP (iso_classes) && in_classes (p[-1], iso_classes)) + if (nclasses && in_classes (p[-1], nclasses, classes)) { if (negate) break; @@ -2253,26 +2248,16 @@ skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim) } } -/* Return true if character C belongs to one of the ISO classes - in the list ISO_CLASSES. Each class is represented by an - integer which is its type according to re_wctype. */ +/* Return true if character C belongs to one of the ISO classes in the + array. */ static bool -in_classes (int c, Lisp_Object iso_classes) +in_classes (int c, int nclasses, const unsigned char *classes) { - bool fits_class = 0; - - while (CONSP (iso_classes)) - { - Lisp_Object elt; - elt = XCAR (iso_classes); - iso_classes = XCDR (iso_classes); - - if (re_iswctype (c, XFIXNAT (elt))) - fits_class = 1; - } - - return fits_class; + for (int i = 0; i < nclasses; i++) + if (re_iswctype (c, classes[i])) + return true; + return false; } /* Jump over a comment, assuming we are at the beginning of one.