Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
Copyright (C) 2001 Free Software Foundation, Inc.
Licensed to the Free Software Foundation.
+ Copyright (C) 2001, 2002
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H13PRO009
This file is part of GNU Emacs.
#ifdef emacs
#include "lisp.h"
+#include "character.h"
#include "charset.h"
#include "ccl.h"
#include "coding.h"
#endif /* not emacs */
+Lisp_Object Qccl, Qcclp;
+
/* This contains all code conversion map available to CCL. */
Lisp_Object Vcode_conversion_map_vector;
/* Encode one character CH to multibyte form and write to the current
output buffer. If CH is less than 256, CH is written as is. */
-#define CCL_WRITE_CHAR(ch) \
- do { \
- int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
- { \
- if (bytes == 1) \
- { \
- *dst++ = (ch); \
- if ((ch) >= 0x80 && (ch) < 0xA0) \
- /* We may have to convert this eight-bit char to \
- multibyte form later. */ \
- extra_bytes++; \
- } \
- else if (CHAR_VALID_P (ch, 0)) \
- dst += CHAR_STRING (ch, dst); \
- else \
- CCL_INVALID_CMD; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
- } while (0)
-
-/* Encode one character CH to multibyte form and write to the current
- output buffer. The output bytes always forms a valid multibyte
- sequence. */
-#define CCL_WRITE_MULTIBYTE_CHAR(ch) \
- do { \
- int bytes = CHAR_BYTES (ch); \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
- { \
- if (CHAR_VALID_P ((ch), 0)) \
- dst += CHAR_STRING ((ch), dst); \
- else \
- CCL_INVALID_CMD; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
+#define CCL_WRITE_CHAR(ch) \
+ do { \
+ if (! dst) \
+ CCL_INVALID_CMD; \
+ else if (dst < dst_end) \
+ *dst++ = (ch); \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Write a string at ccl_prog[IC] of length LEN to the current output
buffer. */
#define CCL_WRITE_STRING(len) \
do { \
+ int i; \
if (!dst) \
CCL_INVALID_CMD; \
- else if (dst + len <= (dst_bytes ? dst_end : src)) \
+ else if (dst + len <= dst_end) \
for (i = 0; i < len; i++) \
*dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \
>> ((2 - (i % 3)) * 8)) & 0xFF; \
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
-/* Read one byte from the current input buffer into REGth register. */
-#define CCL_READ_CHAR(REG) \
- do { \
- if (!src) \
- CCL_INVALID_CMD; \
- else if (src < src_end) \
- { \
- REG = *src++; \
- if (REG == '\n' \
- && ccl->eol_type != CODING_EOL_LF) \
- { \
- /* We are encoding. */ \
- if (ccl->eol_type == CODING_EOL_CRLF) \
- { \
- if (ccl->cr_consumed) \
- ccl->cr_consumed = 0; \
- else \
- { \
- ccl->cr_consumed = 1; \
- REG = '\r'; \
- src--; \
- } \
- } \
- else \
- REG = '\r'; \
- } \
- if (REG == LEADING_CODE_8_BIT_CONTROL \
- && ccl->multibyte) \
- REG = *src++ - 0x20; \
- } \
- else if (ccl->last_block) \
- { \
- ic = ccl->eof_ic; \
- goto ccl_repeat; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
- } while (0)
-
-
-/* Set C to the character code made from CHARSET and CODE. This is
- like MAKE_CHAR but check the validity of CHARSET and CODE. If they
- are not valid, set C to (CODE & 0xFF) because that is usually the
- case that CCL_ReadMultibyteChar2 read an invalid code and it set
- CODE to that invalid byte. */
-
-#define CCL_MAKE_CHAR(charset, code, c) \
- do { \
- if (charset == CHARSET_ASCII) \
- c = code & 0xFF; \
- else if (CHARSET_DEFINED_P (charset) \
- && (code & 0x7F) >= 32 \
- && (code < 256 || ((code >> 7) & 0x7F) >= 32)) \
- { \
- int c1 = code & 0x7F, c2 = 0; \
- \
- if (code >= 256) \
- c2 = c1, c1 = (code >> 7) & 0x7F; \
- c = MAKE_CHAR (charset, c1, c2); \
- } \
- else \
- c = code & 0xFF; \
- } while (0)
-
-
-/* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting
- text goes to a place pointed by DESTINATION, the length of which
- should not exceed DST_BYTES. The bytes actually processed is
- returned as *CONSUMED. The return value is the length of the
- resulting text. As a side effect, the contents of CCL registers
- are updated. If SOURCE or DESTINATION is NULL, only operations on
- registers are permitted. */
+/* Read one byte from the current input buffer into Rth register. */
+#define CCL_READ_CHAR(r) \
+ do { \
+ if (! src) \
+ CCL_INVALID_CMD; \
+ else if (src < src_end) \
+ r = *src++; \
+ else if (ccl->last_block) \
+ { \
+ ic = ccl->eof_ic; \
+ goto ccl_repeat; \
+ } \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
+ } while (0)
+
+
+/* Execute CCL code on characters at SOURCE (length SRC_SIZE). The
+ resulting text goes to a place pointed by DESTINATION, the length
+ of which should not exceed DST_SIZE. As a side effect, how many
+ characters are consumed and produced are recorded in CCL->consumed
+ and CCL->produced, and the contents of CCL registers are updated.
+ If SOURCE or DESTINATION is NULL, only operations on registers are
+ permitted. */
#ifdef CCL_DEBUG
#define CCL_DEBUG_BACKTRACE_LEN 256
/* For the moment, we only support depth 256 of stack. */
static struct ccl_prog_stack ccl_prog_stack_struct[256];
-int
-ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
+void
+ccl_driver (ccl, source, destination, src_size, dst_size)
struct ccl_program *ccl;
- unsigned char *source, *destination;
- int src_bytes, dst_bytes;
- int *consumed;
+ int *source, *destination;
+ int src_size, dst_size;
{
register int *reg = ccl->reg;
register int ic = ccl->ic;
register int code = 0, field1, field2;
register Lisp_Object *ccl_prog = ccl->prog;
- unsigned char *src = source, *src_end = src + src_bytes;
- unsigned char *dst = destination, *dst_end = dst + dst_bytes;
+ int *src = source, *src_end = src + src_size;
+ int *dst = destination, *dst_end = dst + dst_size;
int jump_address;
int i = 0, j, op;
int stack_idx = ccl->stack_idx;
/* Instruction counter of the current CCL code. */
int this_ic = 0;
- /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But,
- each of them will be converted to multibyte form of 2-byte
- sequence. For that conversion, we remember how many more bytes
- we must keep in DESTINATION in this variable. */
- int extra_bytes = 0;
+ struct charset *charset;
if (ic >= ccl->eof_ic)
ic = CCL_HEADER_MAIN;
- if (ccl->buf_magnification == 0) /* We can't produce any bytes. */
+ if (ccl->buf_magnification == 0) /* We can't read/produce any bytes. */
dst = NULL;
/* Set mapping stack pointer. */
/* We can't just signal Qquit, instead break the loop as if
the whole data is processed. Don't reset Vquit_flag, it
must be handled later at a safer place. */
- if (consumed)
- src = source + src_bytes;
+ if (src)
+ src = source + src_size;
ccl->status = CCL_STAT_QUIT;
break;
}
case CCL_LE: reg[rrr] = i <= j; break;
case CCL_GE: reg[rrr] = i >= j; break;
case CCL_NE: reg[rrr] = i != j; break;
- case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break;
- case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break;
+ case CCL_DECODE_SJIS:
+ {
+ i = (i << 8) | j;
+ SJIS_TO_JIS (i);
+ reg[rrr] = i >> 8;
+ reg[7] = i & 0xFF;
+ break;
+ }
+ case CCL_ENCODE_SJIS:
+ {
+ i = (i << 8) | j;
+ JIS_TO_SJIS (i);
+ reg[rrr] = i >> 8;
+ reg[7] = i & 0xFF;
+ break;
+ }
default: CCL_INVALID_CMD;
}
code &= 0x1F;
case CCL_ReadMultibyteChar2:
if (!src)
CCL_INVALID_CMD;
-
- if (src >= src_end)
- {
- src++;
- goto ccl_read_multibyte_character_suspend;
- }
-
- if (!ccl->multibyte)
- {
- int bytes;
- if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
- {
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = *src++;
- break;
- }
- }
- i = *src++;
- if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
- {
- /* We are encoding. */
- if (ccl->eol_type == CODING_EOL_CRLF)
- {
- if (ccl->cr_consumed)
- ccl->cr_consumed = 0;
- else
- {
- ccl->cr_consumed = 1;
- i = '\r';
- src--;
- }
- }
- else
- i = '\r';
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i < 0x80)
- {
- /* ASCII */
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
- {
- int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
-
- if (dimension == 0)
- {
- /* `i' is a leading code for an undefined charset. */
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else if (src + dimension > src_end)
- goto ccl_read_multibyte_character_suspend;
- else
- {
- reg[RRR] = i;
- i = (*src++ & 0x7F);
- if (dimension == 1)
- reg[rrr] = i;
- else
- reg[rrr] = ((i << 7) | (*src++ & 0x7F));
- }
- }
- else if ((i == LEADING_CODE_PRIVATE_11)
- || (i == LEADING_CODE_PRIVATE_12))
- {
- if ((src + 1) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- reg[rrr] = (*src++ & 0x7F);
- }
- else if ((i == LEADING_CODE_PRIVATE_21)
- || (i == LEADING_CODE_PRIVATE_22))
- {
- if ((src + 2) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- i = (*src++ & 0x7F);
- reg[rrr] = ((i << 7) | (*src & 0x7F));
- src++;
- }
- else if (i == LEADING_CODE_8_BIT_CONTROL)
- {
- if (src >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = (*src++ - 0x20);
- }
- else if (i >= 0xA0)
- {
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else
- {
- /* INVALID CODE. Return a single byte character. */
- reg[RRR] = CHARSET_ASCII;
- reg[rrr] = i;
- }
- break;
-
- ccl_read_multibyte_character_suspend:
- if (src <= src_end && !ccl->multibyte && ccl->last_block)
- {
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = i;
- break;
- }
- src--;
- if (ccl->last_block)
- {
- ic = ccl->eof_ic;
- goto ccl_repeat;
- }
- else
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC);
-
+ CCL_READ_CHAR (i);
+ charset = CHAR_CHARSET (i);
+ reg[rrr] = CHARSET_ID (charset);
+ reg[RRR] = ENCODE_CHAR (charset, i);
break;
case CCL_WriteMultibyteChar2:
- i = reg[RRR]; /* charset */
- if (i == CHARSET_ASCII
- || i == CHARSET_8_BIT_CONTROL
- || i == CHARSET_8_BIT_GRAPHIC)
- i = reg[rrr] & 0xFF;
- else if (CHARSET_DIMENSION (i) == 1)
- i = ((i - 0x70) << 7) | (reg[rrr] & 0x7F);
- else if (i < MIN_CHARSET_PRIVATE_DIMENSION2)
- i = ((i - 0x8F) << 14) | reg[rrr];
- else
- i = ((i - 0xE0) << 14) | reg[rrr];
-
- CCL_WRITE_MULTIBYTE_CHAR (i);
-
+ if (! dst)
+ CCL_INVALID_CMD;
+ charset = CHARSET_FROM_ID (reg[RRR]);
+ i = DECODE_CHAR (charset, reg[rrr]);
+ CCL_WRITE_CHAR (i);
break;
case CCL_TranslateCharacter:
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
- op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]),
- i, -1, 0, 0);
- SPLIT_CHAR (op, reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
-
- reg[rrr] = i;
+ charset = CHARSET_FROM_ID (reg[RRR]);
+ i = DECODE_CHAR (charset, reg[rrr]);
+ op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), i);
+ charset = CHAR_CHARSET (op);
+ reg[RRR] = CHARSET_ID (charset);
+ reg[rrr] = ENCODE_CHAR (charset, op);
break;
case CCL_TranslateCharacterConstTbl:
op = XINT (ccl_prog[ic]); /* table */
ic++;
- CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
- op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0);
- SPLIT_CHAR (op, reg[RRR], i, j);
- if (j != -1)
- i = (i << 7) | j;
-
- reg[rrr] = i;
+ charset = CHARSET_FROM_ID (reg[RRR]);
+ i = DECODE_CHAR (charset, reg[rrr]);
+ op = translate_char (GET_TRANSLATION_TABLE (op), i);
+ charset = CHAR_CHARSET (op);
+ reg[RRR] = CHARSET_ID (charset);
+ reg[rrr] = ENCODE_CHAR (charset, op);
break;
case CCL_IterateMultipleMap:
}
msglen = strlen (msg);
- if (dst + msglen <= (dst_bytes ? dst_end : src))
- {
- bcopy (msg, dst, msglen);
- dst += msglen;
- }
-
- if (ccl->status == CCL_STAT_INVALID_CMD)
+ if (dst + msglen <= dst_end)
{
-#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them
- results in an invalid multibyte sequence. */
-
- /* Copy the remaining source data. */
- int i = src_end - src;
- if (dst_bytes && (dst_end - dst) < i)
- i = dst_end - dst;
- bcopy (src, dst, i);
- src += i;
- dst += i;
-#else
- /* Signal that we've consumed everything. */
- src = src_end;
-#endif
+ for (i = 0; i < msglen; i++)
+ *dst++ = msg[i];
}
}
ccl->ic = ic;
ccl->stack_idx = stack_idx;
ccl->prog = ccl_prog;
- ccl->eight_bit_control = (extra_bytes > 0);
- if (consumed)
- *consumed = src - source;
- return (dst ? dst - destination : 0);
+ ccl->consumed = src - source;
+ ccl->produced = dst - destination;
}
/* Resolve symbols in the specified CCL code (Lisp vector). This
ccl->private_state = 0;
ccl->status = 0;
ccl->stack_idx = 0;
- ccl->eol_type = CODING_EOL_LF;
ccl->suppress_error = 0;
return 0;
}
? XINT (AREF (reg, i))
: 0);
- ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0);
+ ccl_driver (&ccl, NULL, NULL, 0, 0);
QUIT;
if (ccl.status != CCL_STAT_SUCCESS)
error ("Error in CCL program at %dth code", ccl.ic);
{
Lisp_Object val;
struct ccl_program ccl;
- int i, produced;
+ int i;
int outbufsize;
- char *outbuf;
- struct gcpro gcpro1, gcpro2;
+ unsigned char *outbuf, *outp;
+ int str_chars, str_bytes;
+#define CCL_EXECUTE_BUF_SIZE 1024
+ int source[CCL_EXECUTE_BUF_SIZE], destination[CCL_EXECUTE_BUF_SIZE];
+ int consumed_chars, consumed_bytes, produced_chars;
if (setup_ccl_program (&ccl, ccl_prog) < 0)
error ("Invalid CCL program");
if (ASIZE (status) != 9)
error ("Length of vector STATUS is not 9");
CHECK_STRING (str);
-
- GCPRO2 (status, str);
+ str_chars = XSTRING (str)->size;
+ str_bytes = STRING_BYTES (XSTRING (str));
for (i = 0; i < 8; i++)
{
if (ccl.ic < i && i < ccl.size)
ccl.ic = i;
}
- outbufsize = STRING_BYTES (XSTRING (str)) * ccl.buf_magnification + 256;
- outbuf = (char *) xmalloc (outbufsize);
- ccl.last_block = NILP (contin);
- ccl.multibyte = STRING_MULTIBYTE (str);
- produced = ccl_driver (&ccl, XSTRING (str)->data, outbuf,
- STRING_BYTES (XSTRING (str)), outbufsize, (int *) 0);
- for (i = 0; i < 8; i++)
- XSET (AREF (status, i), Lisp_Int, ccl.reg[i]);
- XSETINT (AREF (status, 8), ccl.ic);
- UNGCPRO;
- if (NILP (unibyte_p))
+ outbufsize = (ccl.buf_magnification
+ ? str_bytes * ccl.buf_magnification + 256
+ : str_bytes + 256);
+ outp = outbuf = (unsigned char *) xmalloc (outbufsize);
+
+ consumed_chars = consumed_bytes = 0;
+ produced_chars = 0;
+ while (consumed_bytes < str_bytes)
{
- int nchars;
+ unsigned char *p = XSTRING (str)->data + consumed_bytes;
+ unsigned char *endp = XSTRING (str)->data + str_bytes;
+ int i = 0;
+ int *src, src_size;
+
+ if (endp - p == str_chars - consumed_chars)
+ while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
+ source[i++] = *p++;
+ else
+ while (i < CCL_EXECUTE_BUF_SIZE && p < endp)
+ source[i++] = STRING_CHAR_ADVANCE (p);
+ consumed_chars += i;
+ consumed_bytes = p - XSTRING (str)->data;
+
+ if (consumed_bytes == str_bytes)
+ ccl.last_block = NILP (contin);
+ src = source;
+ src_size = i;
+ while (1)
+ {
+ ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE);
+ if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
+ break;
+ produced_chars += ccl.produced;
+ if (NILP (unibyte_p))
+ {
+ if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced
+ > outbufsize)
+ {
+ int offset = outp - outbuf;
+ outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced;
+ outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
+ outp = outbuf + offset;
+ }
+ for (i = 0; i < ccl.produced; i++)
+ CHAR_STRING_ADVANCE (destination[i], outp);
+ }
+ else
+ {
+ if (outp - outbuf + ccl.produced > outbufsize)
+ {
+ int offset = outp - outbuf;
+ outbufsize += ccl.produced;
+ outbuf = (unsigned char *) xrealloc (outbuf, outbufsize);
+ outp = outbuf + offset;
+ }
+ for (i = 0; i < ccl.produced; i++)
+ *outp++ = destination[i];
+ }
+ src += ccl.consumed;
+ src_size -= ccl.consumed;
+ }
+
+ if (ccl.status != CCL_STAT_SUSPEND_BY_SRC)
+ break;
+ }
- produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars);
- val = make_multibyte_string (outbuf, nchars, produced);
- }
- else
- val = make_unibyte_string (outbuf, produced);
- xfree (outbuf);
- QUIT;
- if (ccl.status == CCL_STAT_SUSPEND_BY_DST)
- error ("Output buffer for the CCL programs overflow");
if (ccl.status != CCL_STAT_SUCCESS
&& ccl.status != CCL_STAT_SUSPEND_BY_SRC)
error ("Error in CCL program at %dth code", ccl.ic);
+
+ for (i = 0; i < 8; i++)
+ XSET (XVECTOR (status)->contents[i], Lisp_Int, ccl.reg[i]);
+ XSETINT (XVECTOR (status)->contents[8], ccl.ic);
+
+ if (NILP (unibyte_p))
+ val = make_multibyte_string ((char *) outbuf, produced_chars,
+ outp - outbuf);
+ else
+ val = make_unibyte_string ((char *) outbuf, produced_chars);
+ xfree (outbuf);
return val;
}
staticpro (&Vccl_program_table);
Vccl_program_table = Fmake_vector (make_number (32), Qnil);
+ Qccl = intern ("ccl");
+ staticpro (&Qccl);
+
+ Qcclp = intern ("cclp");
+ staticpro (&Qcclp);
+
Qccl_program = intern ("ccl-program");
staticpro (&Qccl_program);