From fb11a5e11769a7ca909bf5a7798f9e7193fabb13 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sat, 23 May 2015 15:18:12 -0700 Subject: [PATCH] Cleanup etags.c to use locale-independent code Although this doesn't alter behavior (as etags doesn't use setlocale), the new version is more clearly locale-independent and the executable is a bit smaller on my platform. * lib-src/etags.c: Include , for UCHAR_MAX. Include instead of . (CHARS, CHAR, init, _wht, _nin, _itk, _btk, _etk, white, nonam, endtk) (begtk, midtk): Remove; no longer needed. (iswhite, ISALNUM, ISALPHA, ISDIGIT, ISLOWER, lowcase): Remove. All callers changed to use c_isspace, c_isalnum, c_isalpha, c_isdigit, c_islower, c_tolower, respectively. (notinname, begtoken, intoken, endtoken): Rewrite as functions instead of macros, and initialize the tables at compile-time rather than at run-time. --- lib-src/etags.c | 214 ++++++++++++++++++++++++------------------------ 1 file changed, 109 insertions(+), 105 deletions(-) diff --git a/lib-src/etags.c b/lib-src/etags.c index 28729da8720..facb462f67b 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -119,17 +119,18 @@ char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4"; # define O_CLOEXEC O_NOINHERIT #endif /* WINDOWSNT */ +#include #include #include #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -156,21 +157,71 @@ char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4"; #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n)) #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n)) -#define CHARS 256 /* 2^sizeof(char) */ -#define CHAR(x) ((unsigned int)(x) & (CHARS - 1)) -#define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */ -#define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */ -#define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */ -#define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */ -#define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */ +/* C is not in a name. */ +static bool +notinname (unsigned char c) +{ + /* Look at make_tag before modifying! */ + static bool const table[UCHAR_MAX + 1] = { + ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1, + ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1 + }; + return table[c]; +} -#define ISALNUM(c) isalnum (CHAR (c)) -#define ISALPHA(c) isalpha (CHAR (c)) -#define ISDIGIT(c) isdigit (CHAR (c)) -#define ISLOWER(c) islower (CHAR (c)) +/* C can start a token. */ +static bool +begtoken (unsigned char c) +{ + static bool const table[UCHAR_MAX + 1] = { + ['$']=1, ['@']=1, + ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1, + ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1, + ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1, + ['Y']=1, ['Z']=1, + ['_']=1, + ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1, + ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1, + ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1, + ['y']=1, ['z']=1, + ['~']=1 + }; + return table[c]; +} -#define lowcase(c) tolower (CHAR (c)) +/* C can be in the middle of a token. */ +static bool +intoken (unsigned char c) +{ + static bool const table[UCHAR_MAX + 1] = { + ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1, + ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1, + ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1, + ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1, + ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1, + ['Y']=1, ['Z']=1, + ['_']=1, + ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1, + ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1, + ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1, + ['y']=1, ['z']=1 + }; + return table[c]; +} +/* C can end a token. */ +static bool +endtoken (unsigned char c) +{ + static bool const table[UCHAR_MAX + 1] = { + ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1, + ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1, + ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1, + ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1, + ['{']=1, ['|']=1, ['}']=1, ['~']=1 + }; + return table[c]; +} /* * xnew, xrnew -- allocate, reallocate storage @@ -316,7 +367,6 @@ _Noreturn void fatal (const char *, const char *); static _Noreturn void pfatal (const char *); static void add_node (node *, node **); -static void init (void); static void process_file_name (char *, language *); static void process_file (FILE *, char *, language *); static void find_entries (FILE *); @@ -370,20 +420,6 @@ static linebuffer lb; /* the current line */ static linebuffer filebuf; /* a buffer containing the whole file */ static linebuffer token_name; /* a buffer containing a tag name */ -/* boolean "functions" (see init) */ -static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS]; -static const char - /* white chars */ - *white = " \f\t\n\r\v", - /* not in a name */ - *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */ - /* token ending chars */ - *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?", - /* token starting chars */ - *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@", - /* valid in-token chars */ - *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789"; - static bool append_to_tagfile; /* -a: append to tags */ /* The next five default to true in C and derived languages. */ static bool typedefs; /* -t: create tags for C and Ada typedefs */ @@ -1167,8 +1203,6 @@ main (int argc, char **argv) tagfiledir = absolute_dirname (tagfile, cwd); } - init (); /* set up boolean "functions" */ - linebuffer_init (&lb); linebuffer_init (&filename_lb); linebuffer_init (&filebuf); @@ -1636,34 +1670,6 @@ process_file (FILE *fh, char *fn, language *lang) } } -/* - * This routine sets up the boolean pseudo-functions which work - * by setting boolean flags dependent upon the corresponding character. - * Every char which is NOT in that string is not a white char. Therefore, - * all of the array "_wht" is set to false, and then the elements - * subscripted by the chars in "white" are set to true. Thus "_wht" - * of a char is true if it is the string "white", else false. - */ -static void -init (void) -{ - const char *sp; - int i; - - for (i = 0; i < CHARS; i++) - iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) - = false; - for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = true; - for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = true; - notinname ('\0') = notinname ('\n'); - for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = true; - begtoken ('\0') = begtoken ('\n'); - for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = true; - intoken ('\0') = intoken ('\n'); - for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = true; - endtoken ('\0') = endtoken ('\n'); -} - /* * This routine opens the specified file and calls the function * which finds the function and type definitions. @@ -3179,7 +3185,7 @@ C_entries (int c_ext, FILE *inf) followed by an end of comment, this is a preprocessor token. */ for (cp = newlb.buffer; cp < lp-1; cp++) - if (!iswhite (*cp)) + if (!c_isspace (*cp)) { if (*cp == '*' && cp[1] == '/') { @@ -3262,7 +3268,7 @@ C_entries (int c_ext, FILE *inf) if (*lp != '\0') lp += 1; while (*lp != '\0' - && !iswhite (*lp) && *lp != '(') + && !c_isspace (*lp) && *lp != '(') lp += 1; c = *lp++; toklen += lp - oldlp; @@ -3934,14 +3940,14 @@ F_takeprec (void) dbp += 3; return; } - if (!ISDIGIT (*dbp)) + if (!c_isdigit (*dbp)) { --dbp; /* force failure */ return; } do dbp++; - while (ISDIGIT (*dbp)); + while (c_isdigit (*dbp)); } static void @@ -3959,7 +3965,7 @@ F_getit (FILE *inf) dbp += 6; dbp = skip_spaces (dbp); } - if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$') + if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$') return; for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++) continue; @@ -3988,7 +3994,7 @@ Fortran_functions (FILE *inf) if (LOOKING_AT_NOCASE (dbp, "elemental")) dbp = skip_spaces (dbp); - switch (lowcase (*dbp)) + switch (c_tolower (*dbp)) { case 'i': if (nocase_tail ("integer")) @@ -4021,7 +4027,7 @@ Fortran_functions (FILE *inf) dbp = skip_spaces (dbp); if (*dbp == '\0') continue; - switch (lowcase (*dbp)) + switch (c_tolower (*dbp)) { case 'f': if (nocase_tail ("function")) @@ -4075,7 +4081,7 @@ Ada_getit (FILE *inf, const char *name_qualifier) readline (&lb, inf); dbp = lb.buffer; } - switch (lowcase (*dbp)) + switch (c_tolower (*dbp)) { case 'b': if (nocase_tail ("body")) @@ -4102,8 +4108,7 @@ Ada_getit (FILE *inf, const char *name_qualifier) { dbp = skip_spaces (dbp); for (cp = dbp; - (*cp != '\0' - && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.')); + c_isalnum (*cp) || *cp == '_' || *cp == '.'; cp++) continue; if (cp == dbp) @@ -4179,7 +4184,7 @@ Ada_funcs (FILE *inf) } /* We are at the beginning of a token. */ - switch (lowcase (*dbp)) + switch (c_tolower (*dbp)) { case 'f': if (!packages_only && nocase_tail ("function")) @@ -4244,13 +4249,13 @@ Asm_labels (FILE *inf) { /* If first char is alphabetic or one of [_.$], test for colon following identifier. */ - if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$') + if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$') { /* Read past label. */ cp++; - while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$') + while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$') cp++; - if (*cp == ':' || iswhite (*cp)) + if (*cp == ':' || c_isspace (*cp)) /* Found end of label, so copy it and add it to the table. */ make_tag (lb.buffer, cp - lb.buffer, true, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); @@ -4338,7 +4343,7 @@ Perl_functions (FILE *inf) varstart += 1; do cp++; - while (ISALNUM (*cp) || *cp == '_'); + while (c_isalnum (*cp) || *cp == '_'); } else if (qual) { @@ -4433,7 +4438,7 @@ PHP_functions (FILE *inf) if (*cp != '\0') { name = cp; - while (*cp != '\0' && !iswhite (*cp)) + while (*cp != '\0' && !c_isspace (*cp)) cp++; make_tag (name, cp - name, false, lb.buffer, cp - lb.buffer + 1, lineno, linecharno); @@ -4485,10 +4490,10 @@ Cobol_paragraphs (FILE *inf) bp += 8; /* If eoln, compiler option or comment ignore whole line. */ - if (bp[-1] != ' ' || !ISALNUM (bp[0])) + if (bp[-1] != ' ' || !c_isalnum (bp[0])) continue; - for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++) + for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++) continue; if (*ep++ == '.') make_tag (bp, ep - bp, true, @@ -4636,7 +4641,7 @@ Pascal_functions (FILE *inf) /* Check if this is an "extern" declaration. */ if (*dbp == '\0') continue; - if (lowcase (*dbp) == 'e') + if (c_tolower (*dbp) == 'e') { if (nocase_tail ("extern")) /* superfluous, really! */ { @@ -4644,7 +4649,7 @@ Pascal_functions (FILE *inf) verify_tag = false; } } - else if (lowcase (*dbp) == 'f') + else if (c_tolower (*dbp) == 'f') { if (nocase_tail ("forward")) /* check for forward reference */ { @@ -4691,7 +4696,7 @@ Pascal_functions (FILE *inf) else if (!incomment && !inquote && !found_tag) { /* Check for proc/fn keywords. */ - switch (lowcase (c)) + switch (c_tolower (c)) { case 'p': if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */ @@ -4855,13 +4860,13 @@ Forth_words (FILE *inf) LOOP_ON_INPUT_LINES (inf, lb, bp) while ((bp = skip_spaces (bp))[0] != '\0') - if (bp[0] == '\\' && iswhite (bp[1])) + if (bp[0] == '\\' && c_isspace (bp[1])) break; /* read next line */ - else if (bp[0] == '(' && iswhite (bp[1])) + else if (bp[0] == '(' && c_isspace (bp[1])) do /* skip to ) or eol */ bp++; while (*bp != ')' && *bp != '\0'); - else if ((bp[0] == ':' && iswhite (bp[1]) && bp++) + else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++) || LOOKING_AT_NOCASE (bp, "constant") || LOOKING_AT_NOCASE (bp, "code") || LOOKING_AT_NOCASE (bp, "create") @@ -4972,7 +4977,7 @@ TeX_commands (FILE *inf) cp++; } for (p = cp; - (!iswhite (*p) && *p != '#' && + (!c_isspace (*p) && *p != '#' && *p != TEX_opgrp && *p != TEX_clgrp); p++) continue; @@ -5130,7 +5135,7 @@ HTML_labels (FILE *inf) else if (intag) /* look for "name=" or "id=" */ { while (*dbp != '\0' && *dbp != '>' - && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i') + && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i') dbp++; if (*dbp == '\0') break; /* go to next line */ @@ -5172,7 +5177,7 @@ HTML_labels (FILE *inf) if (*dbp == '<') { intag = true; - inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2])); + inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2])); continue; /* look on the same line */ } @@ -5192,7 +5197,7 @@ HTML_labels (FILE *inf) if (*dbp == '\0') break; /* go to next line */ intag = true; - if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2])) + if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2])) { inanchor = true; continue; /* look on the same line */ @@ -5239,7 +5244,7 @@ Prolog_functions (FILE *inf) { if (cp[0] == '\0') /* Empty line */ continue; - else if (iswhite (cp[0])) /* Not a predicate */ + else if (c_isspace (cp[0])) /* Not a predicate */ continue; else if (cp[0] == '/' && cp[1] == '*') /* comment. */ prolog_skip_comment (&lb, inf); @@ -5331,11 +5336,11 @@ prolog_atom (char *s, size_t pos) origpos = pos; - if (ISLOWER (s[pos]) || (s[pos] == '_')) + if (c_islower (s[pos]) || s[pos] == '_') { /* The atom is unquoted. */ pos++; - while (ISALNUM (s[pos]) || (s[pos] == '_')) + while (c_isalnum (s[pos]) || s[pos] == '_') { pos++; } @@ -5399,7 +5404,7 @@ Erlang_functions (FILE *inf) { if (cp[0] == '\0') /* Empty line */ continue; - else if (iswhite (cp[0])) /* Not function nor attribute */ + else if (c_isspace (cp[0])) /* Not function nor attribute */ continue; else if (cp[0] == '%') /* comment */ continue; @@ -5506,12 +5511,12 @@ erlang_atom (char *s) { int pos = 0; - if (ISALPHA (s[pos]) || s[pos] == '_') + if (c_isalpha (s[pos]) || s[pos] == '_') { /* The atom is unquoted. */ do pos++; - while (ISALNUM (s[pos]) || s[pos] == '_'); + while (c_isalnum (s[pos]) || s[pos] == '_'); } else if (s[pos] == '\'') { @@ -5729,10 +5734,10 @@ add_regex (char *regexp_pattern, language *lang) *patbuf = zeropattern; if (ignore_case) { - static char lc_trans[CHARS]; + static char lc_trans[UCHAR_MAX + 1]; int i; - for (i = 0; i < CHARS; i++) - lc_trans[i] = lowcase (i); + for (i = 0; i < UCHAR_MAX + 1; i++) + lc_trans[i] = c_tolower (i); patbuf->translate = lc_trans; /* translation table to fold case */ } @@ -5787,7 +5792,7 @@ substitute (char *in, char *out, struct re_registers *regs) for (t = strchr (out, '\\'); t != NULL; t = strchr (t + 2, '\\')) - if (ISDIGIT (t[1])) + if (c_isdigit (t[1])) { dig = t[1] - '0'; diglen = regs->end[dig] - regs->start[dig]; @@ -5801,7 +5806,7 @@ substitute (char *in, char *out, struct re_registers *regs) result = xnew (size + 1, char); for (t = result; *out != '\0'; out++) - if (*out == '\\' && ISDIGIT (*++out)) + if (*out == '\\' && c_isdigit (*++out)) { dig = *out - '0'; diglen = regs->end[dig] - regs->start[dig]; @@ -5921,9 +5926,9 @@ regex_tag_multiline (void) static bool nocase_tail (const char *cp) { - register int len = 0; + int len = 0; - while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len])) + while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len])) cp++, len++; if (*cp == '\0' && !intoken (dbp[len])) { @@ -6257,7 +6262,7 @@ savenstr (const char *cp, int len) static char * skip_spaces (char *cp) { - while (iswhite (*cp)) + while (c_isspace (*cp)) cp++; return cp; } @@ -6266,7 +6271,7 @@ skip_spaces (char *cp) static char * skip_non_spaces (char *cp) { - while (*cp != '\0' && !iswhite (*cp)) + while (*cp != '\0' && !c_isspace (*cp)) cp++; return cp; } @@ -6527,7 +6532,7 @@ filename_is_absolute (char *fn) { return (fn[0] == '/' #ifdef DOS_NT - || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/') + || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/') #endif ); } @@ -6541,9 +6546,8 @@ canonicalize_filename (register char *fn) #ifdef DOS_NT /* Canonicalize drive letter case. */ -# define ISUPPER(c) isupper (CHAR (c)) - if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0])) - fn[0] = lowcase (fn[0]); + if (c_isupper (fn[0]) && fn[1] == ':') + fn[0] = c_tolower (fn[0]); /* Collapse multiple forward- and back-slashes into a single forward slash. */ -- 2.39.2