From bed23cb24b269a01a437ca83e26bc0830ff5cfa5 Mon Sep 17 00:00:00 2001 From: "Richard M. Stallman" Date: Thu, 23 Apr 1998 21:22:51 +0000 Subject: [PATCH] (read_from_string_index_byte): New variable. (read_from_string_index): Now counts characters. (readchar, unreadchar, Fread_from_string): Changed accordingly. (readchar): Read a multibyte char all at once from a buffer, marker or string. (unreadchar): Unread a multibyte char all at once. (read1): Properly handle non-escaped multibyte chars. They force a string to be multibyte. When reading direct from a file, any multibyte sequence means a multibyte string. Insist on MAX_LENGTH_OF_MULTI_BYTE_FORM bytes when checking for read_buffer full; this way need not check specially for multibyte. --- src/lread.c | 225 ++++++++++++++++++++++++++-------------------------- 1 file changed, 114 insertions(+), 111 deletions(-) diff --git a/src/lread.c b/src/lread.c index 18fc07246ad..895fb6caa19 100644 --- a/src/lread.c +++ b/src/lread.c @@ -131,6 +131,7 @@ static int read_pure; /* For use within read-from-string (this reader is non-reentrant!!) */ static int read_from_string_index; +static int read_from_string_index_byte; static int read_from_string_limit; /* Number of bytes left to read in the buffer character @@ -169,64 +170,59 @@ readchar (readcharfun) Lisp_Object readcharfun; { Lisp_Object tem; - register struct buffer *inbuffer; register int c, mpos; if (BUFFERP (readcharfun)) { - inbuffer = XBUFFER (readcharfun); + register struct buffer *inbuffer = XBUFFER (readcharfun); - if (readchar_backlog == 0) - { - int pt_byte = BUF_PT_BYTE (inbuffer); - int orig_pt_byte = pt_byte; + int pt_byte = BUF_PT_BYTE (inbuffer); + int orig_pt_byte = pt_byte; - if (pt_byte >= BUF_ZV_BYTE (inbuffer)) - return -1; + if (pt_byte >= BUF_ZV_BYTE (inbuffer)) + return -1; - if (! NILP (inbuffer->enable_multibyte_characters)) - BUF_INC_POS (inbuffer, pt_byte); - else - pt_byte++; - SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); - readchar_backlog = pt_byte - orig_pt_byte; + if (! NILP (inbuffer->enable_multibyte_characters)) + { + unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, pt_byte); + BUF_INC_POS (inbuffer, pt_byte); + c = STRING_CHAR (p, pt_byte - orig_pt_byte); + } + else + { + c = BUF_FETCH_BYTE (inbuffer, pt_byte); + pt_byte++; } + SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte); - /* We get the address of the byte just passed, - which is the last byte of the character. - The other bytes in this character are consecutive with it, - because the gap can't be in the middle of a character. */ - return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1) - - --readchar_backlog); + return c; } if (MARKERP (readcharfun)) { - inbuffer = XMARKER (readcharfun)->buffer; - - if (readchar_backlog == 0) - { - int bytepos = marker_byte_position (readcharfun); - int orig_bytepos = bytepos; + register struct buffer *inbuffer = XMARKER (readcharfun)->buffer; - if (bytepos >= BUF_ZV_BYTE (inbuffer)) - return -1; + int bytepos = marker_byte_position (readcharfun); + int orig_bytepos = bytepos; - if (! NILP (inbuffer->enable_multibyte_characters)) - INC_POS (bytepos); - else - bytepos++; - XMARKER (readcharfun)->bytepos = bytepos; - XMARKER (readcharfun)->charpos++; + if (bytepos >= BUF_ZV_BYTE (inbuffer)) + return -1; - readchar_backlog = bytepos - orig_bytepos; + if (! NILP (inbuffer->enable_multibyte_characters)) + { + unsigned char *p = BUF_BYTE_ADDRESS (inbuffer, bytepos); + BUF_INC_POS (inbuffer, bytepos); + c = STRING_CHAR (p, bytepos - orig_bytepos); + } + else + { + c = BUF_FETCH_BYTE (inbuffer, bytepos); + bytepos++; } - /* We get the address of the byte just passed, - which is the last byte of the character. - The other bytes in this character are consecutive with it, - because the gap can't be in the middle of a character. */ - return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1) - - --readchar_backlog); + XMARKER (readcharfun)->bytepos = bytepos; + XMARKER (readcharfun)->charpos++; + + return c; } if (EQ (readcharfun, Qget_file_char)) { @@ -244,13 +240,14 @@ readchar (readcharfun) if (STRINGP (readcharfun)) { - register int c; - /* This used to be return of a conditional expression, - but that truncated -1 to a char on VMS. */ - if (read_from_string_index < read_from_string_limit) - c = XSTRING (readcharfun)->data[read_from_string_index++]; - else + if (read_from_string_index >= read_from_string_limit) c = -1; + else if (STRING_MULTIBYTE (readcharfun)) + FETCH_STRING_CHAR_ADVANCE (c, readcharfun, + read_from_string_index, + read_from_string_index_byte); + else + c = XSTRING (readcharfun)->data[read_from_string_index++]; return c; } @@ -276,42 +273,36 @@ unreadchar (readcharfun, c) ; else if (BUFFERP (readcharfun)) { - if (!SINGLE_BYTE_CHAR_P (c)) - readchar_backlog++; - else - { - struct buffer *b = XBUFFER (readcharfun); - int bytepos = BUF_PT_BYTE (b); + struct buffer *b = XBUFFER (readcharfun); + int bytepos = BUF_PT_BYTE (b); - BUF_PT (b)--; - if (! NILP (b->enable_multibyte_characters)) - BUF_DEC_POS (b, bytepos); - else - bytepos--; + BUF_PT (b)--; + if (! NILP (b->enable_multibyte_characters)) + BUF_DEC_POS (b, bytepos); + else + bytepos--; - BUF_PT_BYTE (b) = bytepos; - } + BUF_PT_BYTE (b) = bytepos; } else if (MARKERP (readcharfun)) { - if (!SINGLE_BYTE_CHAR_P (c)) - readchar_backlog++; - else - { - struct buffer *b = XMARKER (readcharfun)->buffer; - int bytepos = XMARKER (readcharfun)->bytepos; + struct buffer *b = XMARKER (readcharfun)->buffer; + int bytepos = XMARKER (readcharfun)->bytepos; - XMARKER (readcharfun)->charpos--; - if (! NILP (b->enable_multibyte_characters)) - BUF_DEC_POS (b, bytepos); - else - bytepos--; + XMARKER (readcharfun)->charpos--; + if (! NILP (b->enable_multibyte_characters)) + BUF_DEC_POS (b, bytepos); + else + bytepos--; - XMARKER (readcharfun)->bytepos = bytepos; - } + XMARKER (readcharfun)->bytepos = bytepos; } else if (STRINGP (readcharfun)) - read_from_string_index--; + { + read_from_string_index--; + read_from_string_index_byte + = string_char_to_byte (readcharfun, read_from_string_index); + } else if (EQ (readcharfun, Qget_file_char)) ungetc (c, instream); else @@ -321,7 +312,7 @@ unreadchar (readcharfun, c) static Lisp_Object read0 (), read1 (), read_list (), read_vector (); static int read_multibyte (); -/* get a character from the tty */ +/* Get a character from the tty. */ extern Lisp_Object read_char (); @@ -1180,12 +1171,11 @@ START and END optionally delimit a substring of STRING from which to read;\n\ CHECK_STRING (string,0); if (NILP (end)) - endval = STRING_BYTES (XSTRING (string)); + endval = XSTRING (string)->size; else { CHECK_NUMBER (end, 2); - endval = string_char_to_byte (string, XINT (end)); - if (endval < 0 || endval > STRING_BYTES (XSTRING (string))) + if (endval < 0 || endval > XSTRING (string)->size) args_out_of_range (string, end); } @@ -1194,21 +1184,19 @@ START and END optionally delimit a substring of STRING from which to read;\n\ else { CHECK_NUMBER (start, 1); - startval = string_char_to_byte (string, XINT (start)); if (startval < 0 || startval > endval) args_out_of_range (string, start); } read_from_string_index = startval; + read_from_string_index_byte = string_char_to_byte (string, startval); read_from_string_limit = endval; new_backquote_flag = 0; read_objects = Qnil; tem = read0 (string); - endval = string_byte_to_char (string, - read_from_string_index); - return Fcons (tem, make_number (endval)); + return Fcons (tem, make_number (read_from_string_index)); } /* Use this for recursive reads, in contexts where internal tokens @@ -1744,49 +1732,45 @@ read1 (readcharfun, pch, first_in_list) while ((c = READCHAR) >= 0 && c != '\"') { - if (p == end) + if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM) { char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); p += new - read_buffer; read_buffer += new - read_buffer; end = read_buffer + read_buffer_size; } + if (c == '\\') { c = read_escape (readcharfun, 1); - if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))) + + /* C is -1 if \ newline has just been seen */ + if (c == -1) { - unsigned char workbuf[4]; - unsigned char *str = workbuf; - int length; - - length = non_ascii_char_to_string (c, workbuf, &str); - if (length > 1) - force_multibyte = 1; - - if (p + length > end) - { - char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); - p += new - read_buffer; - read_buffer += new - read_buffer; - end = read_buffer + read_buffer_size; - } - - bcopy (str, p, length); - p += length; + if (p == read_buffer) + cancel = 1; continue; } + /* If an escape specifies a non-ASCII single-byte character, this must be a unibyte string. */ - else if (! ASCII_BYTE_P (c)) + if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_META)) + && ! ASCII_BYTE_P (c)) force_singlebyte = 1; } - /* c is -1 if \ newline has just been seen */ - if (c == -1) + if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_META))) { - if (p == read_buffer) - cancel = 1; + unsigned char workbuf[4]; + unsigned char *str = workbuf; + int length; + + length = non_ascii_char_to_string (c, workbuf, &str); + if (length > 1) + force_multibyte = 1; + + bcopy (str, p, length); + p += length; } else { @@ -1814,7 +1798,7 @@ read1 (readcharfun, pch, first_in_list) return make_number (0); if (force_singlebyte && force_multibyte) - error ("Multibyte and single-byte escapes in one string constant"); + error ("Multibyte and unibyte characters in one string constant"); if (force_singlebyte) nchars = p - read_buffer; @@ -1831,7 +1815,14 @@ read1 (readcharfun, pch, first_in_list) return Fstring_make_unibyte (string); } } + else if (EQ (readcharfun, Qget_file_char)) + /* Nowadays, reading directly from a file + is used only for compiled Emacs Lisp files, + and those always use the Emacs internal encoding. */ + nchars = multibyte_chars_in_text (read_buffer, p - read_buffer); else + /* In all other cases, if we read these bytes as + separate characters, treat them as separate characters now. */ nchars = p - read_buffer; if (read_pure) @@ -1884,7 +1875,7 @@ read1 (readcharfun, pch, first_in_list) || c == '[' || c == ']' || c == '#' )) { - if (p == end) + if (end - p < MAX_LENGTH_OF_MULTI_BYTE_FORM) { register char *new = (char *) xrealloc (read_buffer, read_buffer_size *= 2); p += new - read_buffer; @@ -1897,7 +1888,19 @@ read1 (readcharfun, pch, first_in_list) quoted = 1; } - *p++ = c; + if (! SINGLE_BYTE_CHAR_P (c)) + { + unsigned char workbuf[4]; + unsigned char *str = workbuf; + int length; + + length = non_ascii_char_to_string (c, workbuf, &str); + + bcopy (str, p, length); + p += length; + } + else + *p++ = c; c = READCHAR; } @@ -2553,7 +2556,7 @@ init_obarray () Qvariable_documentation = intern ("variable-documentation"); staticpro (&Qvariable_documentation); - read_buffer_size = 100; + read_buffer_size = 100 + MAX_LENGTH_OF_MULTI_BYTE_FORM; read_buffer = (char *) malloc (read_buffer_size); } -- 2.39.2