From b8ead34f5df92b771520f4d090ff6cde49ca5705 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 7 Jul 2017 18:12:16 -0700 Subject: [PATCH] Fix more ungetc bugs with encoding errors * src/lread.c (infile): New variable, replacing ... (instream): ... this. All uses changed. (readbyte_from_stdio): New function, which deals with lookahead. (readbyte_from_file, Fget_file_char): Use it. (Fget_file_char): When misused, signal an error instead of relying on undefined behavior. (close_infile_unwind): New function. (Fload): Use it. (readevalloop): 2nd arg is now struct infile *, not FILE *. All callers changed. (read1): Handle lookahead when copying doc strings with encoding errors. --- src/lread.c | 100 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 29 deletions(-) diff --git a/src/lread.c b/src/lread.c index 44eaf13996a..8e7cd3c5510 100644 --- a/src/lread.c +++ b/src/lread.c @@ -103,8 +103,20 @@ static Lisp_Object read_objects_map; (to reduce allocations), or nil. */ static Lisp_Object read_objects_completed; -/* File for get_file_char to read from. Use by load. */ -static FILE *instream; +/* File and lookahead for get-file-char and get-emacs-mule-file-char + to read from. Used by Fload. */ +static struct infile +{ + /* The input stream. */ + FILE *stream; + + /* Lookahead byte count. */ + signed char lookahead; + + /* Lookahead bytes, in reverse order. Keep these here because it is + not portable to ungetc more than one byte at a time. */ + unsigned char buf[MAX_MULTIBYTE_LENGTH - 1]; +} *infile; /* For use within read-from-string (this reader is non-reentrant!!) */ static ptrdiff_t read_from_string_index; @@ -149,7 +161,7 @@ static Lisp_Object Vloads_in_progress; static int read_emacs_mule_char (int, int (*) (int, Lisp_Object), Lisp_Object); -static void readevalloop (Lisp_Object, FILE *, Lisp_Object, bool, +static void readevalloop (Lisp_Object, struct infile *, Lisp_Object, bool, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object); @@ -361,8 +373,9 @@ skip_dyn_bytes (Lisp_Object readcharfun, ptrdiff_t n) if (FROM_FILE_P (readcharfun)) { block_input (); /* FIXME: Not sure if it's needed. */ - fseek (instream, n, SEEK_CUR); + fseek (infile->stream, n - infile->lookahead, SEEK_CUR); unblock_input (); + infile->lookahead = 0; } else { /* We're not reading directly from a file. In that case, it's difficult @@ -384,8 +397,9 @@ skip_dyn_eof (Lisp_Object readcharfun) if (FROM_FILE_P (readcharfun)) { block_input (); /* FIXME: Not sure if it's needed. */ - fseek (instream, 0, SEEK_END); + fseek (infile->stream, 0, SEEK_END); unblock_input (); + infile->lookahead = 0; } else while (READCHAR >= 0); @@ -458,15 +472,13 @@ readbyte_for_lambda (int c, Lisp_Object readcharfun) static int -readbyte_from_file (int c, Lisp_Object readcharfun) +readbyte_from_stdio (void) { - if (c >= 0) - { - block_input (); - ungetc (c, instream); - unblock_input (); - return 0; - } + if (infile->lookahead) + return infile->buf[--infile->lookahead]; + + int c; + FILE *instream = infile->stream; block_input (); @@ -485,6 +497,19 @@ readbyte_from_file (int c, Lisp_Object readcharfun) return (c == EOF ? -1 : c); } +static int +readbyte_from_file (int c, Lisp_Object readcharfun) +{ + if (c >= 0) + { + eassert (infile->lookahead < sizeof infile->buf); + infile->buf[infile->lookahead++] = c; + return 0; + } + + return readbyte_from_stdio (); +} + static int readbyte_from_string (int c, Lisp_Object readcharfun) { @@ -507,7 +532,7 @@ readbyte_from_string (int c, Lisp_Object readcharfun) } -/* Read one non-ASCII character from INSTREAM. The character is +/* Read one non-ASCII character from INFILE. The character is encoded in `emacs-mule' and the first byte is already read in C. */ @@ -777,11 +802,9 @@ DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0, doc: /* Don't use this yourself. */) (void) { - register Lisp_Object val; - block_input (); - XSETINT (val, getc_unlocked (instream)); - unblock_input (); - return val; + if (!infile) + error ("get-file-char misused"); + return make_number (readbyte_from_stdio ()); } @@ -1026,6 +1049,15 @@ suffix_p (Lisp_Object string, const char *suffix) return string_len >= suffix_len && !strcmp (SSDATA (string) + string_len - suffix_len, suffix); } +static void +close_infile_unwind (void *arg) +{ + FILE *stream = arg; + eassert (infile->stream == stream); + infile = NULL; + fclose (stream); +} + DEFUN ("load", Fload, Sload, 1, 5, 0, doc: /* Execute a file of Lisp code named FILE. First try FILE with `.elc' appended, then try with `.el', then try @@ -1345,7 +1377,7 @@ Return t if the file exists and loads successfully. */) } if (! stream) report_file_error ("Opening stdio stream", file); - set_unwind_protect_ptr (fd_index, fclose_unwind, stream); + set_unwind_protect_ptr (fd_index, close_infile_unwind, stream); if (! NILP (Vpurify_flag)) Vpreloaded_file_list = Fcons (Fpurecopy (file), Vpreloaded_file_list); @@ -1368,19 +1400,23 @@ Return t if the file exists and loads successfully. */) specbind (Qinhibit_file_name_operation, Qnil); specbind (Qload_in_progress, Qt); - instream = stream; + struct infile input; + input.stream = stream; + input.lookahead = 0; + infile = &input; + if (lisp_file_lexically_bound_p (Qget_file_char)) Fset (Qlexical_binding, Qt); if (! version || version >= 22) - readevalloop (Qget_file_char, stream, hist_file_name, + readevalloop (Qget_file_char, &input, hist_file_name, 0, Qnil, Qnil, Qnil, Qnil); else { /* We can't handle a file which was compiled with byte-compile-dynamic by older version of Emacs. */ specbind (Qload_force_doc_strings, Qt); - readevalloop (Qget_emacs_mule_file_char, stream, hist_file_name, + readevalloop (Qget_emacs_mule_file_char, &input, hist_file_name, 0, Qnil, Qnil, Qnil, Qnil); } unbind_to (count, Qnil); @@ -1811,7 +1847,7 @@ readevalloop_eager_expand_eval (Lisp_Object val, Lisp_Object macroexpand) static void readevalloop (Lisp_Object readcharfun, - FILE *stream, + struct infile *infile0, Lisp_Object sourcename, bool printflag, Lisp_Object unibyte, Lisp_Object readfun, @@ -1911,7 +1947,7 @@ readevalloop (Lisp_Object readcharfun, if (b && first_sexp) whole_buffer = (BUF_PT (b) == BUF_BEG (b) && BUF_ZV (b) == BUF_Z (b)); - instream = stream; + infile = infile0; read_next: c = READCHAR; if (c == ';') @@ -2001,7 +2037,7 @@ readevalloop (Lisp_Object readcharfun, } build_load_history (sourcename, - stream || whole_buffer); + infile0 || whole_buffer); unbind_to (count, Qnil); } @@ -2941,11 +2977,17 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) saved_doc_string_size = nskip + extra; } - saved_doc_string_position = file_tell (instream); + FILE *instream = infile->stream; + saved_doc_string_position = (file_tell (instream) + - infile->lookahead); - /* Copy that many characters into saved_doc_string. */ + /* Copy that many bytes into saved_doc_string. */ + i = 0; + for (int n = min (nskip, infile->lookahead); 0 < n; n--) + saved_doc_string[i++] + = c = infile->buf[--infile->lookahead]; block_input (); - for (i = 0; i < nskip && c >= 0; i++) + for (; i < nskip && 0 <= c; i++) saved_doc_string[i] = c = getc_unlocked (instream); unblock_input (); -- 2.39.2