}
-/* Convert unibyte text at STR of BYTES bytes to a multibyte text
- that contains the same single-byte characters. It actually
- converts all 8-bit characters to multibyte forms. It is assured
- that we can use LEN bytes at STR as a work area and that is
- enough. */
-
-ptrdiff_t
-str_to_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t bytes)
+/* Convert unibyte text at SRC of NCHARS bytes to a multibyte text
+ at DST of NBYTES bytes, that contains the same single-byte characters. */
+void
+str_to_multibyte (unsigned char *dst, const unsigned char *src,
+ ptrdiff_t nchars, ptrdiff_t nbytes)
{
- unsigned char *p = str, *endp = str + bytes;
- unsigned char *to;
-
- while (p < endp && *p < 0x80) p++;
- if (p == endp)
- return bytes;
- to = p;
- bytes = endp - p;
- endp = str + len;
- memmove (endp - bytes, p, bytes);
- p = endp - bytes;
- while (p < endp)
+ const unsigned char *s = src + nchars;
+ unsigned char *d = dst + nbytes;
+ for (ptrdiff_t i = 0; i < nchars; i++)
{
- int c = *p++;
-
- if (c >= 0x80)
- c = BYTE8_TO_CHAR (c);
- to += CHAR_STRING (c, to);
+ unsigned char c = *--s;
+ if (c <= 0x7f)
+ *--d = c;
+ else
+ {
+ *--d = 0x80 + (c & 0x3f);
+ *--d = 0xc0 + ((c >> 6) & 1);
+ }
}
- return (to - str);
+ eassert (d == dst && s == src);
}
/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
ptrdiff_t *);
-extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
+extern void str_to_multibyte (unsigned char *dst, const unsigned char *src,
+ ptrdiff_t nchars, ptrdiff_t nbytes);
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
/* Convert STRING (if unibyte) to a multibyte string without changing
- the number of characters. Characters 0200 through 0237 are
- converted to eight-bit characters. */
+ the number of characters. Characters 0x80..0xff are interpreted as
+ raw bytes. */
Lisp_Object
string_to_multibyte (Lisp_Object string)
{
- unsigned char *buf;
- ptrdiff_t nbytes;
- Lisp_Object ret;
- USE_SAFE_ALLOCA;
-
if (STRING_MULTIBYTE (string))
return string;
- nbytes = count_size_as_multibyte (SDATA (string), SBYTES (string));
+ ptrdiff_t nchars = SCHARS (string);
+ ptrdiff_t nbytes = count_size_as_multibyte (SDATA (string), nchars);
/* If all the chars are ASCII, they won't need any more bytes once
converted. */
- if (nbytes == SBYTES (string))
+ if (nbytes == nchars)
return make_multibyte_string (SSDATA (string), nbytes, nbytes);
- buf = SAFE_ALLOCA (nbytes);
- memcpy (buf, SDATA (string), SBYTES (string));
- str_to_multibyte (buf, nbytes, SBYTES (string));
-
- ret = make_multibyte_string ((char *) buf, SCHARS (string), nbytes);
- SAFE_FREE ();
-
+ Lisp_Object ret = make_uninit_multibyte_string (nchars, nbytes);
+ str_to_multibyte (SDATA (ret), SDATA (string), nchars, nbytes);
return ret;
}
if (chars < bytes)
{
newstr = make_uninit_multibyte_string (chars, bytes);
- memcpy (SDATA (newstr), SDATA (string), chars);
- str_to_multibyte (SDATA (newstr), bytes, chars);
+ str_to_multibyte (SDATA (newstr), SDATA (string), chars, bytes);
string = newstr;
}
}
(should (equal (plist-member plist (copy-sequence "a") #'equal)
'("a" "c")))))
-(ert-deftest fns--string-to-unibyte ()
- (dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff"))
+(ert-deftest fns--string-to-unibyte-multibyte ()
+ (dolist (str (list "" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz" "\x80\xdd\xff"
+ (apply #'unibyte-string (number-sequence 0 255))))
(ert-info ((prin1-to-string str) :prefix "str: ")
(should-not (multibyte-string-p str))
(let* ((u (string-to-unibyte str)) ; should be identity
(m (string-to-multibyte u)) ; lossless conversion
- (uu (string-to-unibyte m))) ; also lossless
+ (mm (string-to-multibyte m)) ; should be identity
+ (uu (string-to-unibyte m)) ; also lossless
+ (ml (mapcar (lambda (c) (if (<= c #x7f) c (+ c #x3fff00))) u)))
(should-not (multibyte-string-p u))
(should (multibyte-string-p m))
+ (should (multibyte-string-p mm))
(should-not (multibyte-string-p uu))
(should (equal str u))
- (should (equal str uu)))))
+ (should (equal m mm))
+ (should (equal str uu))
+ (should (equal (append m nil) ml)))))
(should-error (string-to-unibyte "å"))
(should-error (string-to-unibyte "ABC∀BC")))