return (to - str);
}
-/* Convert eight-bit chars in SRC (in multibyte form) to the
- corresponding byte and store in DST. CHARS is the number of
- characters in SRC. The value is the number of bytes stored in DST.
- Usually, the value is the same as CHARS, but is less than it if SRC
- contains a non-ASCII, non-eight-bit character. */
-
-ptrdiff_t
-str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars)
-{
- ptrdiff_t i;
-
- for (i = 0; i < chars; i++)
- {
- int c = string_char_advance (&src);
-
- if (CHAR_BYTE8_P (c))
- c = CHAR_TO_BYTE8 (c);
- else if (! ASCII_CHAR_P (c))
- return i;
- *dst++ = c;
- }
- return i;
-}
-
-
static ptrdiff_t
string_count_byte8 (Lisp_Object string)
{
ptrdiff_t *);
extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
-extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
- ptrdiff_t);
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
ptrdiff_t *, ptrdiff_t *);
(Lisp_Object string)
{
CHECK_STRING (string);
+ if (!STRING_MULTIBYTE (string))
+ return string;
- if (STRING_MULTIBYTE (string))
+ ptrdiff_t chars = SCHARS (string);
+ Lisp_Object ret = make_uninit_string (chars);
+ unsigned char *src = SDATA (string);
+ unsigned char *dst = SDATA (ret);
+ for (ptrdiff_t i = 0; i < chars; i++)
{
- ptrdiff_t chars = SCHARS (string);
- unsigned char *str = xmalloc (chars);
- ptrdiff_t converted = str_to_unibyte (SDATA (string), str, chars);
-
- if (converted < chars)
- error ("Can't convert the %"pD"dth character to unibyte", converted);
- string = make_unibyte_string ((char *) str, chars);
- xfree (str);
+ unsigned char b = *src++;
+ if (b <= 0x7f)
+ *dst++ = b; /* ASCII */
+ else if (CHAR_BYTE8_HEAD_P (b))
+ *dst++ = 0x80 | (b & 1) << 6 | (*src++ & 0x3f); /* raw byte */
+ else
+ error ("Cannot convert character at index %"pD"d to unibyte", i);
}
- return string;
+ return ret;
}
\f
(should (equal (plist-member plist (copy-sequence "a") #'equal)
'("a" "c")))))
+(ert-deftest fns--string-to-unibyte ()
+ (dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff"))
+ (ert-info ((prin1-to-string str) :prefix "str: ")
+ (should-not (multibyte-string-p str))
+ (let* ((u (string-to-unibyte str)) ; should be identity
+ (m (string-to-multibyte u)) ; lossless conversion
+ (uu (string-to-unibyte m))) ; also lossless
+ (should-not (multibyte-string-p u))
+ (should (multibyte-string-p m))
+ (should-not (multibyte-string-p uu))
+ (should (equal str u))
+ (should (equal str uu)))))
+ (should-error (string-to-unibyte "å"))
+ (should-error (string-to-unibyte "ABC∀BC")))
+
;;; fns-tests.el ends here