From f8498081911e4c1381c4bed5ac3b664ceca57d64 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 8 Oct 2013 10:40:09 +0400 Subject: [PATCH] Do not allocate huge temporary memory areas and objects while encoding for file I/O, thus reducing an enormous memory usage for large buffers. See http://lists.gnu.org/archive/html/emacs-devel/2013-10/msg00180.html. * coding.h (struct coding_system): New member raw_destination. * coding.c (setup_coding_system): Initialize it to zero. (encode_coding_object): If raw_destination is set, do not create dst_object. Add comment. * fileio.c (toplevel): New constant E_WRITE_MAX. (e_write): Do not encode more than E_WRITE_MAX characters per one loop iteration. Use raw_destination if E_WRITE_MAX characters is encoded. --- src/ChangeLog | 13 +++++++++++++ src/coding.c | 6 ++++++ src/coding.h | 4 ++++ src/fileio.c | 42 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 8 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 217a8b3d269..06733a2455f 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,16 @@ +2013-10-08 Dmitry Antipov + + Do not allocate huge temporary memory areas and objects while encoding + for file I/O, thus reducing an enormous memory usage for large buffers. + See http://lists.gnu.org/archive/html/emacs-devel/2013-10/msg00180.html. + * coding.h (struct coding_system): New member raw_destination. + * coding.c (setup_coding_system): Initialize it to zero. + (encode_coding_object): If raw_destination is set, do not create + dst_object. Add comment. + * fileio.c (toplevel): New constant E_WRITE_MAX. + (e_write): Do not encode more than E_WRITE_MAX characters per one loop + iteration. Use raw_destination if E_WRITE_MAX characters is encoded. + 2013-10-08 Jan Djärv * nsterm.m (windowDidExitFullScreen:): diff --git a/src/coding.c b/src/coding.c index c10fb375672..ac828a48683 100644 --- a/src/coding.c +++ b/src/coding.c @@ -5761,6 +5761,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) coding->safe_charsets = SDATA (val); coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); coding->carryover_bytes = 0; + coding->raw_destination = 0; coding_type = CODING_ATTR_TYPE (attrs); if (EQ (coding_type, Qundecided)) @@ -8352,6 +8353,11 @@ encode_coding_object (struct coding_system *coding, { if (BUFFERP (coding->dst_object)) coding->dst_object = Fbuffer_string (); + else if (coding->raw_destination) + /* This is used to avoid creating huge Lisp string. + NOTE: caller who sets `raw_destination' is also + responsible for freeing `destination' buffer. */ + coding->dst_object = Qnil; else { coding->dst_object diff --git a/src/coding.h b/src/coding.h index 2c01a05d197..0472bec99de 100644 --- a/src/coding.h +++ b/src/coding.h @@ -512,6 +512,10 @@ struct coding_system `charbuf', but at `src_object'. */ unsigned chars_at_source : 1; + /* Nonzero if the result of conversion is in `destination' + buffer rather than in `dst_object'. */ + unsigned raw_destination : 1; + /* Set to 1 if charbuf contains an annotation. */ unsigned annotated : 1; diff --git a/src/fileio.c b/src/fileio.c index 1a2bdfa237c..c7125534e63 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -5263,6 +5263,10 @@ a_write (int desc, Lisp_Object string, ptrdiff_t pos, return 1; } +/* Maximum number of characters that the next + function encodes per one loop iteration. */ + +enum { E_WRITE_MAX = 8 * 1024 * 1024 }; /* Write text in the range START and END into descriptor DESC, encoding them with coding system CODING. If STRING is nil, START @@ -5289,9 +5293,16 @@ e_write (int desc, Lisp_Object string, ptrdiff_t start, ptrdiff_t end, coding->src_multibyte = SCHARS (string) < SBYTES (string); if (CODING_REQUIRE_ENCODING (coding)) { - encode_coding_object (coding, string, - start, string_char_to_byte (string, start), - end, string_char_to_byte (string, end), Qt); + ptrdiff_t nchars = min (end - start, E_WRITE_MAX); + + /* Avoid creating huge Lisp string in encode_coding_object. */ + if (nchars == E_WRITE_MAX) + coding->raw_destination = 1; + + encode_coding_object + (coding, string, start, string_char_to_byte (string, start), + start + nchars, string_char_to_byte (string, start + nchars), + Qt); } else { @@ -5308,8 +5319,15 @@ e_write (int desc, Lisp_Object string, ptrdiff_t start, ptrdiff_t end, coding->src_multibyte = (end - start) < (end_byte - start_byte); if (CODING_REQUIRE_ENCODING (coding)) { - encode_coding_object (coding, Fcurrent_buffer (), - start, start_byte, end, end_byte, Qt); + ptrdiff_t nchars = min (end - start, E_WRITE_MAX); + + /* Likewise. */ + if (nchars == E_WRITE_MAX) + coding->raw_destination = 1; + + encode_coding_object + (coding, Fcurrent_buffer (), start, start_byte, + start + nchars, CHAR_TO_BYTE (start + nchars), Qt); } else { @@ -5330,11 +5348,19 @@ e_write (int desc, Lisp_Object string, ptrdiff_t start, ptrdiff_t end, if (coding->produced > 0) { - char *buf = (STRINGP (coding->dst_object) - ? SSDATA (coding->dst_object) - : (char *) BYTE_POS_ADDR (coding->dst_pos_byte)); + char *buf = (coding->raw_destination ? (char *) coding->destination + : (STRINGP (coding->dst_object) + ? SSDATA (coding->dst_object) + : (char *) BYTE_POS_ADDR (coding->dst_pos_byte))); coding->produced -= emacs_write_sig (desc, buf, coding->produced); + if (coding->raw_destination) + { + /* We're responsible for freeing this, see + encode_coding_object to check why. */ + xfree (coding->destination); + coding->raw_destination = 0; + } if (coding->produced) return 0; } -- 2.39.2