From 3ffc5f44b09e2b500584545389efb8db3be04f95 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 6 Sep 2016 17:20:23 +0200 Subject: [PATCH] STRING_CHAR does not unify characters; update documentation * src/character.h (STRING_CHAR): Update doc. * src/buffer.h (FETCH_MULTIBYTE_CHAR): Update doc. While at it, change the function to use BYTE_POS_ADDR instead of open-coding it. --- src/buffer.h | 15 ++------------- src/character.h | 5 +---- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/buffer.h b/src/buffer.h index 87b7cee4413..fa4866e8f72 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -1182,23 +1182,12 @@ buffer_has_overlays (void) /* Return character code of multi-byte form at byte position POS. If POS doesn't point the head of valid multi-byte form, only the byte at - POS is returned. No range checking. - - WARNING: The character returned by this macro could be "unified" - inside STRING_CHAR, if the original character in the buffer belongs - to one of the Private Use Areas (PUAs) of codepoints that Emacs - uses to support non-unified CJK characters. If that happens, - CHAR_BYTES will return a value that is different from the length of - the original multibyte sequence stored in the buffer. Therefore, - do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through - the buffer to the next character after fetching this one. Instead, - use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */ + POS is returned. No range checking. */ INLINE int FETCH_MULTIBYTE_CHAR (ptrdiff_t pos) { - unsigned char *p = ((pos >= GPT_BYTE ? GAP_SIZE : 0) - + pos + BEG_ADDR - BEG_BYTE); + unsigned char *p = BYTE_POS_ADDR (pos); return STRING_CHAR (p); } diff --git a/src/character.h b/src/character.h index 0d0e31c4c92..7f01bc6a31e 100644 --- a/src/character.h +++ b/src/character.h @@ -308,10 +308,7 @@ enum } \ } while (false) -/* Return the character code of character whose multibyte form is at - P. Note that this macro unifies CJK characters whose codepoints - are in the Private Use Areas (PUAs), so it might return a different - codepoint from the one actually stored at P. */ +/* Return the character code of character whose multibyte form is at P. */ #define STRING_CHAR(p) \ (!((p)[0] & 0x80) \ -- 2.39.2