From: Kenichi Handa Date: Fri, 19 May 2000 23:59:50 +0000 (+0000) Subject: (concat): Handle 8-bit characters correctly. X-Git-Tag: emacs-pretest-21.0.90~3876 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=2efdd1b92e19b2db5ae036673f4031d55115aa79;p=emacs.git (concat): Handle 8-bit characters correctly. (Fstring_as_unibyte): Be sure to make all 8-bit characters in unibyte in the result. (Fstring_as_multibyte): Be sure to make all 8-bit characters in valid multibyte form in the result. (map_char_table): Use MAKE_CHAR instead of MAKE_NON_ASCII_CHAR. (Fbase64_encode_region, Fbase64_encode_string): If base64_encode_1 return -1, signal an error. (base64_encode_1): New arg MULTIBYTE. Get each character by CHAR_STRING_AND_LENGTH if MULTIBYTE is nonzero. If a multibyte character is found, return -1. (Fbase64_decode_region): Delete codes for handling byte-combining. Treat each decoded byte as a unibyte character. (Fbase64_decode_string): Return unibyte string. (Fcompare_strings, concat, string_byte_to_char): Use FETCH_STRING_CHAR_ADVANCE_NO_CHECK instead off FETCH_STRING_CHAR_ADVANCE. (Fstring_lessp): Use FETCH_STRING_CHAR_ADVANCE unconditionally. (mapcar1): If SEQ is string, always use FETCH_STRING_CHAR_ADVANCE. --- diff --git a/src/fns.c b/src/fns.c index ed513098ee3..639ee5d95ed 100644 --- a/src/fns.c +++ b/src/fns.c @@ -290,7 +290,7 @@ If string STR1 is greater, the value is a positive number N;\n\ int c1, c2; if (STRING_MULTIBYTE (str1)) - FETCH_STRING_CHAR_ADVANCE (c1, str1, i1, i1_byte); + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c1, str1, i1, i1_byte); else { c1 = XSTRING (str1)->data[i1++]; @@ -298,7 +298,7 @@ If string STR1 is greater, the value is a positive number N;\n\ } if (STRING_MULTIBYTE (str2)) - FETCH_STRING_CHAR_ADVANCE (c2, str2, i2, i2_byte); + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c2, str2, i2, i2_byte); else { c2 = XSTRING (str2)->data[i2++]; @@ -367,15 +367,8 @@ Symbols are also allowed; their print names are used instead.") characters, not just the bytes. */ int c1, c2; - if (STRING_MULTIBYTE (s1)) - FETCH_STRING_CHAR_ADVANCE (c1, s1, i1, i1_byte); - else - c1 = XSTRING (s1)->data[i1++]; - - if (STRING_MULTIBYTE (s2)) - FETCH_STRING_CHAR_ADVANCE (c2, s2, i2, i2_byte); - else - c2 = XSTRING (s2)->data[i2++]; + FETCH_STRING_CHAR_ADVANCE (c1, s1, i1, i1_byte); + FETCH_STRING_CHAR_ADVANCE (c2, s2, i2, i2_byte); if (c1 != c2) return c1 < c2 ? Qt : Qnil; @@ -625,7 +618,7 @@ concat (nargs, args, target_type, last_special) wrong_type_argument (Qintegerp, ch); this_len_byte = CHAR_BYTES (XINT (ch)); result_len_byte += this_len_byte; - if (this_len_byte > 1) + if (!SINGLE_BYTE_CHAR_P (XINT (ch))) some_multibyte = 1; } else if (BOOL_VECTOR_P (this) && XBOOL_VECTOR (this)->size > 0) @@ -638,7 +631,7 @@ concat (nargs, args, target_type, last_special) wrong_type_argument (Qintegerp, ch); this_len_byte = CHAR_BYTES (XINT (ch)); result_len_byte += this_len_byte; - if (this_len_byte > 1) + if (!SINGLE_BYTE_CHAR_P (XINT (ch))) some_multibyte = 1; } else if (STRINGP (this)) @@ -753,9 +746,9 @@ concat (nargs, args, target_type, last_special) int c; if (STRING_MULTIBYTE (this)) { - FETCH_STRING_CHAR_ADVANCE (c, this, - thisindex, - thisindex_byte); + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, this, + thisindex, + thisindex_byte); XSETFASTINT (elt, c); } else @@ -799,7 +792,12 @@ concat (nargs, args, target_type, last_special) CHECK_NUMBER (elt, 0); if (SINGLE_BYTE_CHAR_P (XINT (elt))) { - XSTRING (val)->data[toindex_byte++] = XINT (elt); + if (some_multibyte) + toindex_byte + += CHAR_STRING (XINT (elt), + XSTRING (val)->data + toindex_byte); + else + XSTRING (val)->data[toindex_byte++] = XINT (elt); if (some_multibyte && toindex_byte > 0 && count_combining (XSTRING (val)->data, @@ -886,7 +884,8 @@ string_char_to_byte (string, char_index) while (best_below < char_index) { int c; - FETCH_STRING_CHAR_ADVANCE (c, string, best_below, best_below_byte); + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, + best_below, best_below_byte); } i = best_below; i_byte = best_below_byte; @@ -958,7 +957,8 @@ string_byte_to_char (string, byte_index) while (best_below_byte < byte_index) { int c; - FETCH_STRING_CHAR_ADVANCE (c, string, best_below, best_below_byte); + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, + best_below, best_below_byte); } i = best_below; i_byte = best_below_byte; @@ -1070,7 +1070,9 @@ DEFUN ("string-as-unibyte", Fstring_as_unibyte, Sstring_as_unibyte, 1, 1, 0, "Return a unibyte string with the same individual bytes as STRING.\n\ If STRING is unibyte, the result is STRING itself.\n\ -Otherwise it is a newly created string, with no text properties.") +Otherwise it is a newly created string, with no text properties.\n\ +If STRING is multibyte and contains a character of charset `binary',\n\ +it is converted to the corresponding single byte.") (string) Lisp_Object string; { @@ -1078,10 +1080,13 @@ Otherwise it is a newly created string, with no text properties.") if (STRING_MULTIBYTE (string)) { - string = Fcopy_sequence (string); - XSTRING (string)->size = STRING_BYTES (XSTRING (string)); - XSTRING (string)->intervals = NULL_INTERVAL; - SET_STRING_BYTES (XSTRING (string), -1); + int bytes = STRING_BYTES (XSTRING (string)); + unsigned char *str = (unsigned char *) xmalloc (bytes); + + bcopy (XSTRING (string)->data, str, bytes); + bytes = str_as_unibyte (str, bytes); + string = make_unibyte_string (str, bytes); + xfree (str); } return string; } @@ -1090,7 +1095,10 @@ DEFUN ("string-as-multibyte", Fstring_as_multibyte, Sstring_as_multibyte, 1, 1, 0, "Return a multibyte string with the same individual bytes as STRING.\n\ If STRING is multibyte, the result is STRING itself.\n\ -Otherwise it is a newly created string, with no text properties.") +Otherwise it is a newly created string, with no text properties.\n\ +If STRING is unibyte and contains an individual 8-bit byte (i.e. not\n\ +part of multibyte form), it is converted to the corresponding\n\ +multibyte character of charset `binary'.") (string) Lisp_Object string; { @@ -1098,12 +1106,19 @@ Otherwise it is a newly created string, with no text properties.") if (! STRING_MULTIBYTE (string)) { - int nbytes = STRING_BYTES (XSTRING (string)); - int newlen = multibyte_chars_in_text (XSTRING (string)->data, nbytes); - - string = Fcopy_sequence (string); - XSTRING (string)->size = newlen; - XSTRING (string)->size_byte = nbytes; + Lisp_Object new_string; + int nchars, nbytes; + + parse_str_as_multibyte (XSTRING (string)->data, + STRING_BYTES (XSTRING (string)), + &nchars, &nbytes); + new_string = make_uninit_multibyte_string (nchars, nbytes); + bcopy (XSTRING (string)->data, XSTRING (new_string)->data, + STRING_BYTES (XSTRING (string))); + if (nbytes != STRING_BYTES (XSTRING (string))) + str_as_multibyte (XSTRING (new_string)->data, nbytes, + STRING_BYTES (XSTRING (string)), NULL); + string = new_string; XSTRING (string)->intervals = NULL_INTERVAL; } return string; @@ -2374,7 +2389,7 @@ map_char_table (c_function, function, subtable, arg, depth, indices) elt = XCHAR_TABLE (subtable)->defalt; c1 = depth >= 1 ? XFASTINT (indices[1]) : 0; c2 = depth >= 2 ? XFASTINT (indices[2]) : 0; - c = MAKE_NON_ASCII_CHAR (charset, c1, c2); + c = MAKE_CHAR (charset, c1, c2); if (c_function) (*c_function) (arg, make_number (c), elt); else @@ -2513,20 +2528,8 @@ mapcar1 (leni, vals, fn, seq) vals[i] = dummy; } } - else if (STRINGP (seq) && ! STRING_MULTIBYTE (seq)) - { - /* Single-byte string. */ - for (i = 0; i < leni; i++) - { - XSETFASTINT (dummy, XSTRING (seq)->data[i]); - dummy = call1 (fn, dummy); - if (vals) - vals[i] = dummy; - } - } else if (STRINGP (seq)) { - /* Multi-byte string. */ int i_byte; for (i = 0, i_byte = 0; i < leni;) @@ -3100,7 +3103,7 @@ static short base64_char_to_value[128] = base64 characters. */ -static int base64_encode_1 P_ ((const char *, char *, int, int)); +static int base64_encode_1 P_ ((const char *, char *, int, int, int)); static int base64_decode_1 P_ ((const char *, char *, int)); DEFUN ("base64-encode-region", Fbase64_encode_region, Sbase64_encode_region, @@ -3135,10 +3138,19 @@ into shorter lines.") else encoded = (char *) xmalloc (allength); encoded_length = base64_encode_1 (BYTE_POS_ADDR (ibeg), encoded, length, - NILP (no_line_break)); + NILP (no_line_break), + !NILP (current_buffer->enable_multibyte_characters)); if (encoded_length > allength) abort (); + if (encoded_length < 0) + { + /* The encoding wasn't possible. */ + if (length > MAX_ALLOCA) + xfree (encoded); + error ("Base64 encoding failed"); + } + /* Now we have encoded the region, so we insert the new contents and delete the old. (Insert first in order to preserve markers.) */ SET_PT_BOTH (XFASTINT (beg), ibeg); @@ -3187,10 +3199,19 @@ into shorter lines.") encoded = (char *) xmalloc (allength); encoded_length = base64_encode_1 (XSTRING (string)->data, - encoded, length, NILP (no_line_break)); + encoded, length, NILP (no_line_break), + STRING_MULTIBYTE (string)); if (encoded_length > allength) abort (); + if (encoded_length < 0) + { + /* The encoding wasn't possible. */ + if (length > MAX_ALLOCA) + xfree (encoded); + error ("Base64 encoding failed"); + } + encoded_string = make_unibyte_string (encoded, encoded_length); if (allength > MAX_ALLOCA) xfree (encoded); @@ -3199,20 +3220,30 @@ into shorter lines.") } static int -base64_encode_1 (from, to, length, line_break) +base64_encode_1 (from, to, length, line_break, multibyte) const char *from; char *to; int length; int line_break; + int multibyte; { int counter = 0, i = 0; char *e = to; unsigned char c; unsigned int value; + int bytes; while (i < length) { - c = from[i++]; + if (multibyte) + { + c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes); + if (!SINGLE_BYTE_CHAR_P (c)) + return -1; + i += bytes; + } + else + c = from[i++]; /* Wrap line every 76 characters. */ @@ -3242,7 +3273,13 @@ base64_encode_1 (from, to, length, line_break) break; } - c = from[i++]; + if (multibyte) + { + c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes); + i += bytes; + } + else + c = from[i++]; *e++ = base64_value_to_char[value | (0x0f & c >> 4)]; value = (0x0f & c) << 2; @@ -3256,7 +3293,13 @@ base64_encode_1 (from, to, length, line_break) break; } - c = from[i++]; + if (multibyte) + { + c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes); + i += bytes; + } + else + c = from[i++]; *e++ = base64_value_to_char[value | (0x03 & c >> 6)]; *e++ = base64_value_to_char[0x3f & c]; @@ -3305,27 +3348,19 @@ If the region can't be decoded, signal an error and don't modify the buffer.") error ("Base64 decoding failed"); } + inserted_chars = decoded_length; + if (!NILP (current_buffer->enable_multibyte_characters)) + decoded_length = str_to_multibyte (decoded, length, decoded_length); + /* Now we have decoded the region, so we insert the new contents and delete the old. (Insert first in order to preserve markers.) */ - /* We insert two spaces, then insert the decoded text in between - them, at last, delete those extra two spaces. This is to avoid - byte combining while inserting. */ - TEMP_SET_PT_BOTH (XFASTINT (beg), ibeg); - insert_1_both (" ", 2, 2, 0, 1, 0); - TEMP_SET_PT_BOTH (XFASTINT (beg) + 1, ibeg + 1); - insert (decoded, decoded_length); - inserted_chars = PT - (XFASTINT (beg) + 1); + TEMP_SET_PT_BOTH (XFASTINT (beg), ibeg); + insert_1_both (decoded, inserted_chars, decoded_length, 0, 1, 0); if (length > MAX_ALLOCA) xfree (decoded); - /* At first delete the original text. This never causes byte - combining. */ - del_range_both (PT + 1, PT_BYTE + 1, XFASTINT (end) + inserted_chars + 2, - iend + decoded_length + 2, 1); - /* Next delete the extra spaces. This will cause byte combining - error. */ - del_range_both (PT, PT_BYTE, PT + 1, PT_BYTE + 1, 0); - del_range_both (XFASTINT (beg), ibeg, XFASTINT (beg) + 1, ibeg + 1, 0); - inserted_chars = PT - XFASTINT (beg); + /* Delete the original text. */ + del_range_both (PT, PT_BYTE, XFASTINT (end) + inserted_chars, + iend + decoded_length, 1); /* If point was outside of the region, restore it exactly; else just move to the beginning of the region. */ @@ -3361,7 +3396,7 @@ DEFUN ("base64-decode-string", Fbase64_decode_string, Sbase64_decode_string, if (decoded_length > length) abort (); else if (decoded_length >= 0) - decoded_string = make_string (decoded, decoded_length); + decoded_string = make_unibyte_string (decoded, decoded_length); else decoded_string = Qnil;