From 0c5eb1c7e798fdf16c3f2694285fe0d18367c6ea Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Mon, 2 Nov 2020 23:37:16 +0100 Subject: [PATCH] Reduce integer-output-format to print-integers-as-characters The variable now only controls whether characters are printed, not the radix. Control chars are printed in human-readable syntax only when special escapes such as ?\n are available. Spaces, formatting and combining chars are excluded (bug#44155). Done in collaboration with Juri Linkov. * src/character.c (graphic_base_p): * src/print.c (named_escape): New functions. (print_object): Change semantics as described above. (syms_of_print): Rename integer-output-format. Update doc string. * doc/lispref/streams.texi (Output Variables): * etc/NEWS: * test/src/print-tests.el (print-integers-as-characters): Rename and update according to new semantics. The test now passes. --- doc/lispref/streams.texi | 18 +++++++---- etc/NEWS | 11 ++++--- src/character.c | 21 +++++++++++++ src/character.h | 1 + src/print.c | 64 ++++++++++++++++++++++++++-------------- test/src/print-tests.el | 39 +++++++++++++----------- 6 files changed, 102 insertions(+), 52 deletions(-) diff --git a/doc/lispref/streams.texi b/doc/lispref/streams.texi index f171f137790..0534afb67fa 100644 --- a/doc/lispref/streams.texi +++ b/doc/lispref/streams.texi @@ -903,10 +903,16 @@ in the C function @code{sprintf}. For further restrictions on what you can use, see the variable's documentation string. @end defvar -@defvar integer-output-format -This variable specifies how to print integer numbers. The default is -@code{nil}, meaning use the decimal format. When bound to @code{t}, -print integers as characters when an integer represents a character -(@pxref{Basic Char Syntax}). When bound to the number @code{16}, -print non-negative integers in the hexadecimal format. +@defvar print-integers-as-characters +When this variable is non-@code{nil}, integers that represent +graphic base characters will be printed using Lisp character syntax +(@pxref{Basic Char Syntax}). Other numbers are printed the usual way. +For example, the list @code{(4 65 -1 10)} would be printed as +@samp{(4 ?A -1 ?\n)}. + +More precisely, values printed in character syntax are those +representing characters belonging to the Unicode general categories +Letter, Number, Punctuation, Symbol and Private-use +(@pxref{Character Properties}), as well as the control characters +having their own escape syntax such as newline. @end defvar diff --git a/etc/NEWS b/etc/NEWS index 1a1cfc3751d..06a2864b7e3 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1711,12 +1711,6 @@ ledit.el, lmenu.el, lucid.el and old-whitespace.el. * Lisp Changes in Emacs 28.1 -** New variable 'integer-output-format' determines how to print integer values. -When this variable is bound to the value 't', integers are printed by -printing functions as characters when an integer represents a character. -When bound to the number 16, non-negative integers are printed in the -hexadecimal format. - +++ ** 'define-globalized-minor-mode' now takes a ':predicate' parameter. This can be used to control which major modes the minor mode should be @@ -1909,6 +1903,11 @@ file can affect code in another. For details, see the manual section 'replace-regexp-in-string', 'catch', 'throw', 'error', 'signal' and 'play-sound-file'. ++++ +** New variable 'print-integers-as-characters' modifies integer printing. +If this variable is non-nil, character syntax is used for printing +numbers when this makes sense, such as '?A' for 65. + * Changes in Emacs 28.1 on Non-Free Operating Systems diff --git a/src/character.c b/src/character.c index 5860f6a0c8c..00b73293a3f 100644 --- a/src/character.c +++ b/src/character.c @@ -982,6 +982,27 @@ printablep (int c) || gen_cat == UNICODE_CATEGORY_Cn)); /* unassigned */ } +/* Return true if C is graphic character that can be printed independently. */ +bool +graphic_base_p (int c) +{ + Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c); + if (! FIXNUMP (category)) + return false; + EMACS_INT gen_cat = XFIXNUM (category); + + return (!(gen_cat == UNICODE_CATEGORY_Mn /* mark, nonspacing */ + || gen_cat == UNICODE_CATEGORY_Mc /* mark, combining */ + || gen_cat == UNICODE_CATEGORY_Me /* mark, enclosing */ + || gen_cat == UNICODE_CATEGORY_Zs /* separator, space */ + || gen_cat == UNICODE_CATEGORY_Zl /* separator, line */ + || gen_cat == UNICODE_CATEGORY_Zp /* separator, paragraph */ + || gen_cat == UNICODE_CATEGORY_Cc /* other, control */ + || gen_cat == UNICODE_CATEGORY_Cs /* other, surrogate */ + || gen_cat == UNICODE_CATEGORY_Cf /* other, format */ + || gen_cat == UNICODE_CATEGORY_Cn)); /* other, unassigned */ +} + /* Return true if C is a horizontal whitespace character, as defined by https://www.unicode.org/reports/tr18/tr18-19.html#blank. */ bool diff --git a/src/character.h b/src/character.h index af5023f77cc..cbf43097ae2 100644 --- a/src/character.h +++ b/src/character.h @@ -583,6 +583,7 @@ extern bool alphanumericp (int); extern bool graphicp (int); extern bool printablep (int); extern bool blankp (int); +extern bool graphic_base_p (int); /* Look up the element in char table OBJ at index CH, and return it as an integer. If the element is not a character, return CH itself. */ diff --git a/src/print.c b/src/print.c index fa65a3cb268..9ff331fb8e1 100644 --- a/src/print.c +++ b/src/print.c @@ -1848,6 +1848,24 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag, return true; } +static char +named_escape (int i) +{ + switch (i) + { + case '\b': return 'b'; + case '\t': return 't'; + case '\n': return 'n'; + case '\f': return 'f'; + case '\r': return 'r'; + case ' ': return 's'; + /* \a, \v, \e and \d are excluded from printing as escapes since + they are somewhat rare as characters and more likely to be + plain integers. */ + } + return 0; +} + static void print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag) { @@ -1908,29 +1926,30 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag) { case_Lisp_Int: { - int c; - intmax_t i; + EMACS_INT i = XFIXNUM (obj); + char escaped_name; - if (EQ (Vinteger_output_format, Qt) && CHARACTERP (obj) - && (c = XFIXNUM (obj))) + if (print_integers_as_characters && i >= 0 && i <= MAX_UNICODE_CHAR + && ((escaped_name = named_escape (i)) + || graphic_base_p (i))) { printchar ('?', printcharfun); - if (escapeflag - && (c == ';' || c == '(' || c == ')' || c == '{' || c == '}' - || c == '[' || c == ']' || c == '\"' || c == '\'' || c == '\\')) + if (escaped_name) + { + printchar ('\\', printcharfun); + i = escaped_name; + } + else if (escapeflag + && (i == ';' || i == '\"' || i == '\'' || i == '\\' + || i == '(' || i == ')' + || i == '{' || i == '}' + || i == '[' || i == ']')) printchar ('\\', printcharfun); - printchar (c, printcharfun); - } - else if (INTEGERP (Vinteger_output_format) - && integer_to_intmax (Vinteger_output_format, &i) - && i == 16 && !NILP (Fnatnump (obj))) - { - int len = sprintf (buf, "#x%"pI"x", (EMACS_UINT) XFIXNUM (obj)); - strout (buf, len, len, printcharfun); + printchar (i, printcharfun); } else { - int len = sprintf (buf, "%"pI"d", XFIXNUM (obj)); + int len = sprintf (buf, "%"pI"d", i); strout (buf, len, len, printcharfun); } } @@ -2270,12 +2289,13 @@ A value of nil means to use the shortest notation that represents the number without losing information. */); Vfloat_output_format = Qnil; - DEFVAR_LISP ("integer-output-format", Vinteger_output_format, - doc: /* The format used to print integers. -When t, print characters from integers that represent a character. -When a number 16, print non-negative integers in the hexadecimal format. -Otherwise, by default print integers in the decimal format. */); - Vinteger_output_format = Qnil; + DEFVAR_BOOL ("print-integers-as-characters", print_integers_as_characters, + doc: /* Non-nil means integers are printed using characters syntax. +Only independent graphic characters, and control characters with named +escape sequences such as newline, are printed this way. Other +integers, including those corresponding to raw bytes, are printed +as numbers the usual way. */); + print_integers_as_characters = Qnil; DEFVAR_LISP ("print-length", Vprint_length, doc: /* Maximum length of list to print before abbreviating. diff --git a/test/src/print-tests.el b/test/src/print-tests.el index 7b026b6b21f..202555adb3b 100644 --- a/test/src/print-tests.el +++ b/test/src/print-tests.el @@ -383,25 +383,28 @@ otherwise, use a different charset." (let ((print-length 1)) (format "%S" h)))))) -(print-tests--deftest print-integer-output-format () +(print-tests--deftest print-integers-as-characters () ;; Bug#44155. - (let ((integer-output-format t) - (syms (list ?? ?\; ?\( ?\) ?\{ ?\} ?\[ ?\] ?\" ?\' ?\\ ?Á))) - (should (equal (read (print-tests--prin1-to-string syms)) syms)) - (should (equal (print-tests--prin1-to-string syms) - (concat "(" (mapconcat #'prin1-char syms " ") ")")))) - (let ((integer-output-format t) - (syms (list -1 0 1 ?\120 4194175 4194176 (max-char) (1+ (max-char))))) - (should (equal (read (print-tests--prin1-to-string syms)) syms))) - (let ((integer-output-format 16) - (syms (list -1 0 1 most-positive-fixnum (1+ most-positive-fixnum)))) - (should (equal (read (print-tests--prin1-to-string syms)) syms)) - (should (equal (print-tests--prin1-to-string syms) - (concat "(" (mapconcat - (lambda (i) - (if (and (>= i 0) (<= i most-positive-fixnum)) - (format "#x%x" i) (format "%d" i))) - syms " ") ")"))))) + (let* ((print-integers-as-characters t) + (chars '(?? ?\; ?\( ?\) ?\{ ?\} ?\[ ?\] ?\" ?\' ?\\ ?f ?~ ?Á 32 + ?\n ?\r ?\t ?\b ?\f ?\a ?\v ?\e ?\d)) + (nums '(-1 -65 0 1 31 #x80 #x9f #x110000 #x3fff80 #x3fffff)) + (nonprints '(#xd800 #xdfff #x030a #xffff #x2002 #x200c)) + (printed-chars (print-tests--prin1-to-string chars)) + (printed-nums (print-tests--prin1-to-string nums)) + (printed-nonprints (print-tests--prin1-to-string nonprints))) + (should (equal (read printed-chars) chars)) + (should (equal + printed-chars + (concat + "(?? ?\\; ?\\( ?\\) ?\\{ ?\\} ?\\[ ?\\] ?\\\" ?\\' ?\\\\" + " ?f ?~ ?Á ?\\s ?\\n ?\\r ?\\t ?\\b ?\\f 7 11 27 127)"))) + (should (equal (read printed-nums) nums)) + (should (equal printed-nums + "(-1 -65 0 1 31 128 159 1114112 4194176 4194303)")) + (should (equal (read printed-nonprints) nonprints)) + (should (equal printed-nonprints + "(55296 57343 778 65535 8194 8204)")))) (provide 'print-tests) ;;; print-tests.el ends here -- 2.39.2