From: Paul Eggert Date: Thu, 6 Jul 2017 00:51:31 +0000 (-0700) Subject: Convert hex digits more systematically X-Git-Tag: emacs-26.0.90~520^2~3 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=d6662694d05be03fdd070353637dd22a324c8b7a;p=emacs.git Convert hex digits more systematically This makes the code a bit smaller and presumably faster, as it substitutes a single lookup for conditional jumps. * src/character.c (hexdigit): New constant. (syms_of_character) [HEXDIGIT_IS_CONST]: Initialize it. * src/character.h (HEXDIGIT_CONST, HEXDIGIT_IS_CONST): New macros. (hexdigit): New decl. (char_hexdigit): New inline function. * src/charset.c: Do not include c-ctype.h. * src/charset.c (read_hex): * src/editfns.c (styled_format): * src/image.c (xbm_scan): * src/lread.c (read_escape): * src/regex.c (ISXDIGIT) [emacs]: Use char_hexdigit insted of doing it by hand. --- diff --git a/src/character.c b/src/character.c index cf460540725..1c6020ee468 100644 --- a/src/character.c +++ b/src/character.c @@ -1050,9 +1050,26 @@ blankp (int c) return XINT (category) == UNICODE_CATEGORY_Zs; /* separator, space */ } +signed char HEXDIGIT_CONST hexdigit[UCHAR_MAX + 1] = + { +#if HEXDIGIT_IS_CONST + [0 ... UCHAR_MAX] = -1, +#endif + ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, + ['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, + ['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14, ['F'] = 15, + ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13, ['e'] = 14, ['f'] = 15 + }; + void syms_of_character (void) { +#if !HEXDIGIT_IS_CONST + /* Set the non-hex digit values to -1. */ + for (int i = 0; i <= UCHAR_MAX; i++) + hexdigit[i] -= i != '0' && !hexdigit[i]; +#endif + DEFSYM (Qcharacterp, "characterp"); DEFSYM (Qauto_fill_chars, "auto-fill-chars"); diff --git a/src/character.h b/src/character.h index 62d252e91ba..b073a0dd1e4 100644 --- a/src/character.h +++ b/src/character.h @@ -700,6 +700,24 @@ char_table_translate (Lisp_Object obj, int ch) return CHARACTERP (obj) ? XINT (obj) : ch; } +#if defined __GNUC__ && !defined __STRICT_ANSI__ +# define HEXDIGIT_CONST const +# define HEXDIGIT_IS_CONST true +#else +# define HEXDIGIT_CONST +# define HEXDIGIT_IS_CONST false +#endif +extern signed char HEXDIGIT_CONST hexdigit[]; + +/* If C is a hexadecimal digit ('0'-'9', 'a'-'f', 'A'-'F'), return its + value (0-15). Otherwise return -1. */ + +INLINE int +char_hexdigit (int c) +{ + return 0 <= c && c <= UCHAR_MAX ? hexdigit[c] : -1; +} + INLINE_HEADER_END #endif /* EMACS_CHARACTER_H */ diff --git a/src/charset.c b/src/charset.c index d0840f7d2a9..9c3b8db2a53 100644 --- a/src/charset.c +++ b/src/charset.c @@ -33,7 +33,6 @@ along with GNU Emacs. If not, see . */ #include #include #include -#include #include "lisp.h" #include "character.h" #include "charset.h" @@ -434,14 +433,15 @@ read_hex (FILE *fp, bool *eof, bool *overflow) return 0; } n = 0; - while (c_isxdigit (c = getc_unlocked (fp))) + while (true) { + c = getc_unlocked (fp); + int digit = char_hexdigit (c); + if (digit < 0) + break; if (INT_LEFT_SHIFT_OVERFLOW (n, 4)) *overflow = 1; - n = ((n << 4) - | (c - ('0' <= c && c <= '9' ? '0' - : 'A' <= c && c <= 'F' ? 'A' - 10 - : 'a' - 10))); + n = (n << 4) + digit; } if (c != EOF) ungetc (c, fp); diff --git a/src/editfns.c b/src/editfns.c index da99c055b54..d599fcfec80 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -4704,10 +4704,8 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) char src0 = src[0]; int exponent_bytes = 0; bool signedp = src0 == '-' || src0 == '+' || src0 == ' '; - if (zero_flag - && ((src[signedp] >= '0' && src[signedp] <= '9') - || (src[signedp] >= 'a' && src[signedp] <= 'f') - || (src[signedp] >= 'A' && src[signedp] <= 'F'))) + unsigned char after_sign = src[signedp]; + if (zero_flag && 0 <= char_hexdigit (after_sign)) { leading_zeros += padding; padding = 0; diff --git a/src/image.c b/src/image.c index 07c4769e9e3..6b748ba2af6 100644 --- a/src/image.c +++ b/src/image.c @@ -2595,13 +2595,8 @@ xbm_scan (char **s, char *end, char *sval, int *ival) while (*s < end) { c = *(*s)++; - if (c_isdigit (c)) - digit = c - '0'; - else if (c >= 'a' && c <= 'f') - digit = c - 'a' + 10; - else if (c >= 'A' && c <= 'F') - digit = c - 'A' + 10; - else + digit = char_hexdigit (c); + if (digit < 0) break; value = 16 * value + digit; } diff --git a/src/lread.c b/src/lread.c index 182f96223a5..7c554ba8536 100644 --- a/src/lread.c +++ b/src/lread.c @@ -2426,25 +2426,13 @@ read_escape (Lisp_Object readcharfun, bool stringp) while (1) { c = READCHAR; - if (c >= '0' && c <= '9') - { - i *= 16; - i += c - '0'; - } - else if ((c >= 'a' && c <= 'f') - || (c >= 'A' && c <= 'F')) - { - i *= 16; - if (c >= 'a' && c <= 'f') - i += c - 'a' + 10; - else - i += c - 'A' + 10; - } - else + int digit = char_hexdigit (c); + if (digit < 0) { UNREAD (c); break; } + i = (i << 4) + digit; /* Allow hex escapes as large as ?\xfffffff, because some packages use them to denote characters with modifiers. */ if ((CHAR_META | (CHAR_META - 1)) < i) @@ -2474,11 +2462,10 @@ read_escape (Lisp_Object readcharfun, bool stringp) c = READCHAR; /* `isdigit' and `isalpha' may be locale-specific, which we don't want. */ - if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); - else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; - else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; - else + int digit = char_hexdigit (c); + if (digit < 0) error ("Non-hex digit used for Unicode escape"); + i = (i << 4) + digit; } if (i > 0x10FFFF) error ("Non-Unicode character: 0x%x", i); diff --git a/src/regex.c b/src/regex.c index 240a91f2ba8..fb48765c96c 100644 --- a/src/regex.c +++ b/src/regex.c @@ -306,9 +306,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; /* In Emacs, these are only used for single-byte characters. */ # define ISDIGIT(c) ((c) >= '0' && (c) <= '9') # define ISCNTRL(c) ((c) < ' ') -# define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \ - || ((c) >= 'a' && (c) <= 'f') \ - || ((c) >= 'A' && (c) <= 'F')) +# define ISXDIGIT(c) (0 <= char_hexdigit (c)) /* The rest must handle multibyte characters. */