Convert hex digits more systematically

author Paul Eggert <eggert@cs.ucla.edu>

Thu, 6 Jul 2017 00:51:31 +0000 (17:51 -0700)

committer Paul Eggert <eggert@cs.ucla.edu>

Thu, 6 Jul 2017 01:59:31 +0000 (18:59 -0700)
author Paul Eggert <eggert@cs.ucla.edu>
Thu, 6 Jul 2017 00:51:31 +0000 (17:51 -0700)
committer Paul Eggert <eggert@cs.ucla.edu>
Thu, 6 Jul 2017 01:59:31 +0000 (18:59 -0700)
diff --git a/src/character.c b/src/character.c

index cf460540725e1dbf6e7d1fa4aff05fdee549e4d2..1c6020ee46807996d9507cc1e22900de515943fa 100644 (file)
--- a/src/character.c
+++ b/src/character.c
@@ -1050,9 +1050,26 @@ blankp (int c)
    return XINT (category) == UNICODE_CATEGORY_Zs; /* separator, space */
  }
  
+signed char HEXDIGIT_CONST hexdigit[UCHAR_MAX + 1] =
+  {
+#if HEXDIGIT_IS_CONST
+    [0 ... UCHAR_MAX] = -1,
+#endif
+    ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4,
+    ['5'] = 5, ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9,
+    ['A'] = 10, ['B'] = 11, ['C'] = 12, ['D'] = 13, ['E'] = 14, ['F'] = 15,
+    ['a'] = 10, ['b'] = 11, ['c'] = 12, ['d'] = 13, ['e'] = 14, ['f'] = 15
+  };
+
  void
  syms_of_character (void)
  {
+#if !HEXDIGIT_IS_CONST
+  /* Set the non-hex digit values to -1.  */
+  for (int i = 0; i <= UCHAR_MAX; i++)
+    hexdigit[i] -= i != '0' && !hexdigit[i];
+#endif
+
    DEFSYM (Qcharacterp, "characterp");
    DEFSYM (Qauto_fill_chars, "auto-fill-chars");
  
diff --git a/src/character.h b/src/character.h

index 62d252e91ba3600b9232729d2a812539f0adcd2d..b073a0dd1e462fa60808298f8727ed6ed9f45631 100644 (file)
--- a/src/character.h
+++ b/src/character.h
@@ -700,6 +700,24 @@ char_table_translate (Lisp_Object obj, int ch)
    return CHARACTERP (obj) ? XINT (obj) : ch;
  }
  
+#if defined __GNUC__ && !defined __STRICT_ANSI__
+# define HEXDIGIT_CONST const
+# define HEXDIGIT_IS_CONST true
+#else
+# define HEXDIGIT_CONST
+# define HEXDIGIT_IS_CONST false
+#endif
+extern signed char HEXDIGIT_CONST hexdigit[];
+
+/* If C is a hexadecimal digit ('0'-'9', 'a'-'f', 'A'-'F'), return its
+   value (0-15).  Otherwise return -1.  */
+
+INLINE int
+char_hexdigit (int c)
+{
+  return 0 <= c && c <= UCHAR_MAX ? hexdigit[c] : -1;
+}
+
  INLINE_HEADER_END
  
  #endif /* EMACS_CHARACTER_H */
diff --git a/src/charset.c b/src/charset.c

index d0840f7d2a922217374cd7450e6f4cf728dde9cb..9c3b8db2a538b8340784754adaed0b78c05d7b01 100644 (file)
--- a/src/charset.c
+++ b/src/charset.c
@@ -33,7 +33,6 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  #include <unistd.h>
  #include <limits.h>
  #include <sys/types.h>
-#include <c-ctype.h>
  #include "lisp.h"
  #include "character.h"
  #include "charset.h"
@@ -434,14 +433,15 @@ read_hex (FILE *fp, bool *eof, bool *overflow)
        return 0;
      }
    n = 0;
-  while (c_isxdigit (c = getc_unlocked (fp)))
+  while (true)
      {
+      c = getc_unlocked (fp);
+      int digit = char_hexdigit (c);
+      if (digit < 0)
+       break;
        if (INT_LEFT_SHIFT_OVERFLOW (n, 4))
         *overflow = 1;
-      n = ((n << 4)
-          | (c - ('0' <= c && c <= '9' ? '0'
-                  : 'A' <= c && c <= 'F' ? 'A' - 10
-                  : 'a' - 10)));
+      n = (n << 4) + digit;
      }
    if (c != EOF)
      ungetc (c, fp);
diff --git a/src/editfns.c b/src/editfns.c

index da99c055b544695696d64329e3ac7fd0050e7ea8..d599fcfec80703f12874e59c34a2e186518fa175 100644 (file)
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -4704,10 +4704,8 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
                   char src0 = src[0];
                   int exponent_bytes = 0;
                   bool signedp = src0 == '-' || src0 == '+' || src0 == ' ';
-                 if (zero_flag
-                     && ((src[signedp] >= '0' && src[signedp] <= '9')
-                         || (src[signedp] >= 'a' && src[signedp] <= 'f')
-                         || (src[signedp] >= 'A' && src[signedp] <= 'F')))
+                 unsigned char after_sign = src[signedp];
+                 if (zero_flag && 0 <= char_hexdigit (after_sign))
                     {
                       leading_zeros += padding;
                       padding = 0;
diff --git a/src/image.c b/src/image.c

index 07c4769e9e38ef90e0f55753d861064c6dfb4a7d..6b748ba2af6b74eabd1cba9676cf14c1c6a253c3 100644 (file)
--- a/src/image.c
+++ b/src/image.c
@@ -2595,13 +2595,8 @@ xbm_scan (char **s, char *end, char *sval, int *ival)
               while (*s < end)
                 {
                   c = *(*s)++;
-                 if (c_isdigit (c))
-                   digit = c - '0';
-                 else if (c >= 'a' && c <= 'f')
-                   digit = c - 'a' + 10;
-                 else if (c >= 'A' && c <= 'F')
-                   digit = c - 'A' + 10;
-                 else
+                 digit = char_hexdigit (c);
+                 if (digit < 0)
                     break;
                   value = 16 * value + digit;
                 }
diff --git a/src/lread.c b/src/lread.c

index 182f96223a5171b472c77e99a71e224554d18b62..7c554ba853631cfc6f26a43d0ead6882b11cd662 100644 (file)
--- a/src/lread.c
+++ b/src/lread.c
@@ -2426,25 +2426,13 @@ read_escape (Lisp_Object readcharfun, bool stringp)
         while (1)
           {
             c = READCHAR;
-           if (c >= '0' && c <= '9')
-             {
-               i *= 16;
-               i += c - '0';
-             }
-           else if ((c >= 'a' && c <= 'f')
-                    || (c >= 'A' && c <= 'F'))
-             {
-               i *= 16;
-               if (c >= 'a' && c <= 'f')
-                 i += c - 'a' + 10;
-               else
-                 i += c - 'A' + 10;
-             }
-           else
+           int digit = char_hexdigit (c);
+           if (digit < 0)
               {
                 UNREAD (c);
                 break;
               }
+           i = (i << 4) + digit;
             /* Allow hex escapes as large as ?\xfffffff, because some
                packages use them to denote characters with modifiers.  */
             if ((CHAR_META | (CHAR_META - 1)) < i)
@@ -2474,11 +2462,10 @@ read_escape (Lisp_Object readcharfun, bool stringp)
             c = READCHAR;
             /* `isdigit' and `isalpha' may be locale-specific, which we don't
                want.  */
-           if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
-           else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
-            else if (c >= 'A' && c <= 'F')  i = (i << 4) + (c - 'A') + 10;
-           else
+           int digit = char_hexdigit (c);
+           if (digit < 0)
               error ("Non-hex digit used for Unicode escape");
+           i = (i << 4) + digit;
           }
         if (i > 0x10FFFF)
           error ("Non-Unicode character: 0x%x", i);
diff --git a/src/regex.c b/src/regex.c

index 240a91f2ba83e69a2a9ff184e14f9d8bbd233a7e..fb48765c96cb27d90b4e59a0bf0ff524958285fa 100644 (file)
--- a/src/regex.c
+++ b/src/regex.c
@@ -306,9 +306,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
  /* In Emacs, these are only used for single-byte characters.  */
  # define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
  # define ISCNTRL(c) ((c) < ' ')
-# define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')               \
-                    || ((c) >= 'a' && (c) <= 'f')      \
-                    || ((c) >= 'A' && (c) <= 'F'))
+# define ISXDIGIT(c) (0 <= char_hexdigit (c))
  
  /* The rest must handle multibyte characters.  */
author	Paul Eggert <eggert@cs.ucla.edu>
	Thu, 6 Jul 2017 00:51:31 +0000 (17:51 -0700)
committer	Paul Eggert <eggert@cs.ucla.edu>
	Thu, 6 Jul 2017 01:59:31 +0000 (18:59 -0700)
src/character.c		patch \| blob \| history
src/character.h		patch \| blob \| history
src/charset.c		patch \| blob \| history
src/editfns.c		patch \| blob \| history
src/image.c		patch \| blob \| history
src/lread.c		patch \| blob \| history
src/regex.c		patch \| blob \| history