+2013-08-20 Kenichi Handa <handa@gnu.org>
+
+ * character.c (string_char): Improve commentary.
+
2013-08-20 Paul Eggert <eggert@cs.ucla.edu>
* image.c (SIGNATURE_DIGESTSIZE): Remove.
if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
{
+ /* 1-, 2-, and 3-byte sequences can be handled by the macro. */
c = STRING_CHAR_ADVANCE (p);
}
else if (! (*p & 0x08))
{
- c = ((((p)[0] & 0xF) << 18)
+ /* A 4-byte sequence of this form:
+ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
+ c = ((((p)[0] & 0x7) << 18)
| (((p)[1] & 0x3F) << 12)
| (((p)[2] & 0x3F) << 6)
| ((p)[3] & 0x3F));
}
else
{
- c = ((((p)[1] & 0x3F) << 18)
+ /* A 5-byte sequence of this form:
+
+ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+ Note that the top 4 `x's are always 0, so shifting p[1] can
+ never exceed the maximum valid character codepoint. */
+ c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */
+ (((p)[1] & 0x3F) << 18)
| (((p)[2] & 0x3F) << 12)
| (((p)[3] & 0x3F) << 6)
| ((p)[4] & 0x3F));