This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
matches any digit, as well as @samp{+} and @samp{-}.
@item [:graph:]
-This matches graphic characters---everything except @acronym{ASCII} control
-characters, space, and the delete character.
+This matches graphic characters---everything except space,
+@acronym{ASCII} and non-@acronym{ASCII} control characters,
+surrogates, and codepoints unassigned by Unicode, as indicated by the
+Unicode @samp{general-category} property (@pxref{Character
+Properties}).
@item [:lower:]
This matches any lower-case letter, as determined by the current case
table (@pxref{Case Tables}). If @code{case-fold-search} is
@item [:nonascii:]
This matches any non-@acronym{ASCII} character.
@item [:print:]
-This matches printing characters---everything except @acronym{ASCII}
-and non-@acronym{ASCII} control characters (including the delete
-character), surrogates, and codepoints unassigned by Unicode, as
-indicated by the Unicode @samp{general-category} property
-(@pxref{Character Properties}).
+This matches any printing character---either space, or a graphic
+character matched by @samp{[:graph:]}.
@item [:punct:]
This matches any punctuation character. (At present, for multibyte
characters, it matches anything that has non-word syntax.)
*** gulp.el
+++
-** The character class [:print:] in regular expressions
-no longer matches any multibyte character. Instead, Emacs now
+** The character classes [:graph:] and [:print:] in regular expressions
+no longer match every multibyte character. Instead, Emacs now
consults the Unicode character properties to determine which
-characters are printable. In particular, surrogates and unassigned
-codepoints are now rejected by this class. If you want the old
-behavior, use [:multibyte:] instead.
+characters are graphic or printable. In particular, surrogates and
+unassigned codepoints are now rejected. If you want the old behavior,
+use [:multibyte:] instead.
\f
* New Modes and Packages in Emacs 25.1
matches space and tab only.
`graphic', `graph'
- matches graphic characters--everything except ASCII control chars,
- space, and DEL.
+ matches graphic characters--everything except space, ASCII
+ and non-ASCII control characters, surrogates, and codepoints
+ unassigned by Unicode.
`printing', `print'
- matches printing characters--everything except ASCII and non-ASCII
- control characters, surrogates, and codepoints unassigned by Unicode.
+ matches space and graphic characters.
`alphanumeric', `alnum'
matches alphabetic characters and digits. (For multibyte characters,
return gen_cat == UNICODE_CATEGORY_Nd;
}
+/* Return 'true' if C is a graphic character as defined by its
+ Unicode properties. */
+bool
+graphicp (int c)
+{
+ return c == ' ' || printablep (c);
+}
+
/* Return 'true' if C is a printable character as defined by its
Unicode properties. */
bool
extern bool alphabeticp (int);
extern bool decimalnump (int);
+extern bool graphicp (int);
extern bool printablep (int);
/* Return a translation table of id number ID. */
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
- : 1)
+ : graphicp (c))
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
#define BIT_MULTIBYTE 0x20
#define BIT_ALPHA 0x40
#define BIT_ALNUM 0x80
-#define BIT_PRINT 0x100
+#define BIT_GRAPH 0x100
+#define BIT_PRINT 0x200
\f
/* Set the bit for character C in a list. */
{
switch (cc)
{
- case RECC_NONASCII: case RECC_GRAPH:
+ case RECC_NONASCII:
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
case RECC_ALPHA: return BIT_ALPHA;
case RECC_ALNUM: return BIT_ALNUM;
case RECC_UPPER: return BIT_UPPER;
case RECC_PUNCT: return BIT_PUNCT;
case RECC_SPACE: return BIT_SPACE;
+ case RECC_GRAPH: return BIT_GRAPH;
case RECC_PRINT: return BIT_PRINT;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
| (class_bits & BIT_UPPER && ISUPPER (c))
| (class_bits & BIT_WORD && ISWORD (c))
| (class_bits & BIT_ALPHA && ISALPHA (c))
- | (class_bits & BIT_ALNUM && ISALNUM (c)))
+ | (class_bits & BIT_ALNUM && ISALNUM (c))
+ | (class_bits & BIT_GRAPH && ISGRAPH (c))
+ | (class_bits & BIT_PRINT && ISPRINT (c)))
not = !not;
else
CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);