;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
;; Licensed to the Free Software Foundation.
;; Copyright (C) 2001 Free Software Foundation, Inc.
+;; Copyright (C) 2001, 2002
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H13PRO009
;; Keywords: mule, multilingual, character set, coding system
;;; Code:
-(defconst mule-version "5.0 (SAKAKI)" "\
+(defconst mule-version "7.0 (SAKAKI)" "\
Version number and name of this version of MULE (multilingual environment).")
-(defconst mule-version-date "1999.12.7" "\
+(defconst mule-version-date "2002.2.28" "\
Distribution date of this version of MULE (multilingual environment).")
+
+\f
+;;; CHARACTER
+(defalias 'char-valid-p 'characterp)
+(make-obsolete 'char-valid-p 'characterp "22.1")
+
+\f
+;;; CHARSET
+
+(defun define-charset (name docstring &rest props)
+ "Define NAME (symbol) as a charset with DOCSTRING.
+The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
+may be any symbol. The followings have special meanings, and one of
+`:code-offset', `:map', `:parents' must be specified.
+
+`:short-name'
+
+VALUE must be a short string to identify the charset. If omitted,
+NAME is used.
+
+`:long-name'
+
+VALUE must be a string longer than `:short-name' to identify the
+charset. If omitted, the value of `:short-name' attribute is used.
+
+`:dimension'
+
+VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
+code-points of the charsets. If omitted, it is calculated from a
+value of `:code-space' attribute.
+
+`:code-space'
+
+VALUE must be a vector of length at most 8 specifying the byte code
+range of each dimension in this format:
+ [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
+where, MIN-N is the minimum byte value of Nth dimension of code-point,
+MAX-N is the maximum byte value of that.
+
+`:iso-final-char'
+
+VALUE must be a character in the range 32 to 127 (inclusive)
+specifying the final char of the charset for ISO-2022 encoding. If
+omitted, the charset can't be encoded by ISO-2022 based
+coding-systems.
+
+`:iso-revision-number'
+
+VALUE must be an integer in the range 0..63, specifying the revision
+number of the charset for ISO-2022 encoding.
+
+`:emacs-mule-id'
+
+VALUE must be an integer of 0, 128..255. If omitted, the charset
+can't be encoded by coding-systems of type `emacs-mule'.
+
+`:ascii-compatible-p'
+
+VALUE must be nil or t. If the VALUE is nil, the charset is a not
+compatible with ASCII. The default value is nil.
+
+`:supplementary-p'
+
+VALUE must be nil or t. If the VALUE is t, the charset is
+supplementary, which means the charset is used only as a parent of
+some other charset.
+
+`:invalid-code'
+
+VALUE must be a nonnegative integer that can be used as an invalid
+code point of the charset. If the minimum code is 0 and the maximum
+code is greater than Emacs' maximum integer value, `:invalid-code'
+should not be omitted.
+
+`:code-offset'
+
+VALUE must be an integer added to an index number of character to get
+the corresponding character code.
+
+`:map'
+
+VALUE must be vector or string.
+
+If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
+where CODE-n is a code-point of the charset, and CHAR-n is the
+corresponding charcter code.
+
+If it is a string, it is a name of file that contains the above
+information.
+
+`:parents'
+
+VALUE must be a list of parent charsets. The charset inherits
+characters from them. Each element of the list may be a cons (PARENT
+. OFFSET), where PARENT is a parent charset, and OFFSET is an offset
+value to add to a code point of this charset to get the corresponding
+code point of PARENT.
+
+`:unify-map'
+
+VALUE must be vector or string.
+
+If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
+where CODE-n is a code-point of the charset, and CHAR-n is the
+corresponding unified charcter code.
+
+If it is a string, it is a name of file that contains the above
+information."
+ (let ((attrs (mapcar 'list '(:dimension
+ :code-space
+ :iso-final-char
+ :iso-revision-number
+ :emacs-mule-id
+ :ascii-compatible-p
+ :supplementary-p
+ :invalid-code
+ :code-offset
+ :map
+ :parents
+ :unify-map
+ :plist))))
+
+ ;; If :dimension is omitted, get the dimension from :code-space.
+ (let ((dimension (plist-get props :dimension)))
+ (or dimension
+ (progn
+ (setq dimension (/ (length (plist-get props :code-space)) 2))
+ (setq props (plist-put props :dimension dimension)))))
+
+ (dolist (slot attrs)
+ (setcdr slot (plist-get props (car slot))))
+
+ ;; Make sure that the value of :code-space is a vector of 8
+ ;; elements.
+ (let* ((slot (assq :code-space attrs))
+ (val (cdr slot))
+ (len (length val)))
+ (if (< len 8)
+ (setcdr slot
+ (vconcat val (make-vector (- 8 len) 0)))))
+
+ ;; Add :name and :docstring properties to PROPS.
+ (setq props
+ (cons :name (cons name (cons :docstring (cons docstring props)))))
+ (or (plist-get props :short-name)
+ (plist-put props :short-name (symbol-name name)))
+ (or (plist-get props :long-name)
+ (plist-put props :long-name (plist-get props :short-name)))
+ (setcdr (assq :plist attrs) props)
+
+ (apply 'define-charset-internal name (mapcar 'cdr attrs))))
+
+
(defun load-with-code-conversion (fullname file &optional noerror nomessage)
"Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
The file contents are decoded before evaluation if necessary.
;; API (Application Program Interface) for charsets.
-(defsubst charset-quoted-standard-p (obj)
- "Return t if OBJ is a quoted symbol, and is the name of a standard charset."
- (and (listp obj) (eq (car obj) 'quote)
- (symbolp (car-safe (cdr obj)))
- (let ((vector (get (car-safe (cdr obj)) 'charset)))
- (and (vectorp vector)
- (< (aref vector 0) 160)))))
-
-(defsubst charsetp (object)
- "T if OBJECT is a charset."
- (and (symbolp object) (vectorp (get object 'charset))))
-
-(defsubst charset-info (charset)
- "Return a vector of information of CHARSET.
-The elements of the vector are:
- CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
- LEADING-CODE-BASE, LEADING-CODE-EXT,
- ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
- REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
- PLIST,
-where
-CHARSET-ID (integer) is the identification number of the charset.
-BYTES (integer) is the length of multi-byte form of a character in
- the charset: one of 1, 2, 3, and 4.
-DIMENSION (integer) is the number of bytes to represent a character of
-the charset: 1 or 2.
-CHARS (integer) is the number of characters in a dimension: 94 or 96.
-WIDTH (integer) is the number of columns a character in the charset
- occupies on the screen: one of 0, 1, and 2.
-DIRECTION (integer) is the rendering direction of characters in the
- charset when rendering. If 0, render from left to right, else
- render from right to left.
-LEADING-CODE-BASE (integer) is the base leading-code for the
- charset.
-LEADING-CODE-EXT (integer) is the extended leading-code for the
- charset. All charsets of less than 0xA0 has the value 0.
-ISO-FINAL-CHAR (character) is the final character of the
- corresponding ISO 2022 charset. If the charset is not assigned
- any final character, the value is -1.
-ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
- while encoding to variants of ISO 2022 coding system, one of the
- following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
- If the charset is not assigned any final character, the value is -1.
-REVERSE-CHARSET (integer) is the charset which differs only in
- LEFT-TO-RIGHT value from the charset. If there's no such a
- charset, the value is -1.
-SHORT-NAME (string) is the short name to refer to the charset.
-LONG-NAME (string) is the long name to refer to the charset
-DESCRIPTION (string) is the description string of the charset.
-PLIST (property list) may contain any type of information a user
- want to put and get by functions `put-charset-property' and
- `get-charset-property' respectively."
- (get charset 'charset))
-
-;; It is better not to use backquote in this file,
-;; because that makes a bootstrapping problem
-;; if you need to recompile all the Lisp files using interpreted code.
-
-(defmacro charset-id (charset)
- "Return charset identification number of CHARSET."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 0)
- (list 'aref (list 'charset-info charset) 0)))
-
-(defmacro charset-bytes (charset)
- "Return bytes of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 1)
- (list 'aref (list 'charset-info charset) 1)))
-
-(defmacro charset-dimension (charset)
- "Return dimension of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 2)
- (list 'aref (list 'charset-info charset) 2)))
-
-(defmacro charset-chars (charset)
- "Return character numbers contained in a dimension of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 3)
- (list 'aref (list 'charset-info charset) 3)))
-
-(defmacro charset-width (charset)
- "Return width (how many column occupied on a screen) of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 4)
- (list 'aref (list 'charset-info charset) 4)))
-
-(defmacro charset-direction (charset)
- "Return direction of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 5)
- (list 'aref (list 'charset-info charset) 5)))
-
-(defmacro charset-iso-final-char (charset)
- "Return final char of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 8)
- (list 'aref (list 'charset-info charset) 8)))
-
-(defmacro charset-iso-graphic-plane (charset)
- "Return graphic plane of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 9)
- (list 'aref (list 'charset-info charset) 9)))
-
-(defmacro charset-reverse-charset (charset)
- "Return reverse charset of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 10)
- (list 'aref (list 'charset-info charset) 10)))
+;;; Charset property
+
+(defun get-charset-property (charset propname)
+ "Return the value of CHARSET's PROPNAME property.
+This is the last value stored with
+ (put-charset-property CHARSET PROPNAME VALUE)."
+ (plist-get (charset-plist charset) propname))
+
+(defun put-charset-property (charset propname value)
+ "Store CHARSETS's PROPNAME property with value VALUE.
+It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
+ (set-charset-plist charset
+ (plist-put (charset-plist charset) propname value)))
+
+
+(defun charset-description (charset)
+ "Return description string of CHARSET."
+ (plist-get (charset-plist charset) :docstring))
+
+(defun charset-dimension (charset)
+ "Return dimension string of CHARSET."
+ (plist-get (charset-plist charset) :dimension))
+
+(defun charset-chars (charset)
+ "Return character numbers contained in a dimension of CHARSET."
+ (let ((code-space (plist-get (cahrset-plist charset) :code-space)))
+ (1+ (- (aref code-space 1) (aref code-space 0)))))
+
+(defun charset-iso-final-char (charset)
+ "Return final char of CHARSET."
+ (or (plist-get (charset-plist charset) :iso-final-char)
+ -1))
(defmacro charset-short-name (charset)
- "Return short name of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 11)
- (list 'aref (list 'charset-info charset) 11)))
+ "Return short name of CHARSET."
+ (plist-get (charset-plist charset) :short-name))
(defmacro charset-long-name (charset)
- "Return long name of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 12)
- (list 'aref (list 'charset-info charset) 12)))
-
-(defmacro charset-description (charset)
- "Return description of CHARSET.
-See the function `charset-info' for more detail."
- (if (charset-quoted-standard-p charset)
- (aref (charset-info (nth 1 charset)) 13)
- (list 'aref (list 'charset-info charset) 13)))
-
-(defmacro charset-plist (charset)
- "Return list charset property of CHARSET.
-See the function `charset-info' for more detail."
- (list 'aref
- (if (charset-quoted-standard-p charset)
- (charset-info (nth 1 charset))
- (list 'charset-info charset))
- 14))
-
-(defun set-charset-plist (charset plist)
- "Set CHARSET's property list to PLIST, and return PLIST."
- (aset (charset-info charset) 14 plist))
-
-(defun make-char (charset &optional code1 code2)
- "Return a character of CHARSET whose position codes are CODE1 and CODE2.
-CODE1 and CODE2 are optional, but if you don't supply
-sufficient position codes, return a generic character which stands for
-all characters or group of characters in the character set.
-A generic character can be used to index a char table (e.g. syntax-table).
-
-Such character sets as ascii, eight-bit-control, and eight-bit-graphic
-don't have corresponding generic characters. If CHARSET is one of
-them and you don't supply CODE1, return the character of the smallest
-code in CHARSET.
-
-If CODE1 or CODE2 are invalid (out of range), this function signals an
-error. However, the eighth bit of both CODE1 and CODE2 is zeroed
-before they are used to index CHARSET. Thus you may use, say, the
-actual ISO 8859 character code rather than subtracting 128, as you
-would need to index the corresponding Emacs charset."
- (make-char-internal (charset-id charset) code1 code2))
-
-(put 'make-char 'byte-compile
- (function
- (lambda (form)
- (let ((charset (nth 1 form)))
- (if (charset-quoted-standard-p charset)
- (byte-compile-normal-call
- (cons 'make-char-internal
- (cons (charset-id (nth 1 charset)) (nthcdr 2 form))))
- (byte-compile-normal-call
- (cons 'make-char-internal
- (cons (list 'charset-id charset) (nthcdr 2 form)))))))))
+ "Return long name of CHARSET."
+ (plist-get (charset-plist charset) :long-name))
(defun charset-list ()
"Return list of charsets ever defined.
Now we have the variable `charset-list'."
charset-list)
-(defsubst generic-char-p (char)
- "Return t if and only if CHAR is a generic character.
-See also the documentation of `make-char'."
- (and (>= char 0400)
- (let ((l (split-char char)))
- (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0))
- (not (eq (car l) 'composition))))))
-
-(defun decode-char (ccs code-point &optional restriction)
- "Return character specified by coded character set CCS and CODE-POINT in it.
-Return nil if such a character is not supported.
-Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set).
-
-Optional argument RESTRICTION specifies a way to map the pair of CCS
-and CODE-POINT to a character. Currently not supported and just ignored."
- (cond ((eq ccs 'ucs)
- (cond ((< code-point 160)
- code-point)
- ((< code-point 256)
- (make-char 'latin-iso8859-1 code-point))
- ((< code-point #x2500)
- (setq code-point (- code-point #x0100))
- (make-char 'mule-unicode-0100-24ff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ((< code-point #x3400)
- (setq code-point (- code-point #x2500))
- (make-char 'mule-unicode-2500-33ff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ((and (>= code-point #xe000) (< code-point #x10000))
- (setq code-point (- code-point #xe000))
- (make-char 'mule-unicode-e000-ffff
- (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
- ))))
-
-(defun encode-char (char ccs &optional restriction)
- "Return code-point in coded character set CCS that corresponds to CHAR.
-Return nil if CHAR is not included in CCS.
-Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set).
-
-CHAR should be in one of these charsets:
- ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
- mule-unicode-e000-ffff, eight-bit-control
-Otherwise, return nil.
-
-Optional argument RESTRICTION specifies a way to map CHAR to a
-code-point in CCS. Currently not supported and just ignored."
- (let* ((split (split-char char))
- (charset (car split)))
- (cond ((eq ccs 'ucs)
- (cond ((eq charset 'ascii)
- char)
- ((eq charset 'latin-iso8859-1)
- (+ (nth 1 split) 128))
- ((eq charset 'mule-unicode-0100-24ff)
- (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'mule-unicode-2500-33ff)
- (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'mule-unicode-e000-ffff)
- (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
- (- (nth 2 split) 32))))
- ((eq charset 'eight-bit-control)
- char))))))
-
+(defun generic-char-p (char)
+ "Always return nil. This exists only for backward compatibility."
+ nil)
\f
;; Coding system stuff
-;; Coding system is a symbol that has the property `coding-system'.
-;;
-;; The value of the property `coding-system' is a vector of the
-;; following format:
-;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
-;; We call this vector as coding-spec. See comments in src/coding.c
-;; for more detail.
-
-(defconst coding-spec-type-idx 0)
-(defconst coding-spec-mnemonic-idx 1)
-(defconst coding-spec-doc-string-idx 2)
-(defconst coding-spec-plist-idx 3)
-(defconst coding-spec-flags-idx 4)
-
-;; PLIST is a property list of a coding system. To share PLIST among
-;; alias coding systems, a coding system has PLIST in coding-spec
-;; instead of having it in normal property list of Lisp symbol.
-;; Here's a list of coding system properties currently being used.
-;;
-;; o coding-category
-;;
-;; The value is a coding category the coding system belongs to. The
-;; function `make-coding-system' sets this value automatically
-;; unless its argument PROPERTIES specifies this property.
-;;
-;; o alias-coding-systems
-;;
-;; The value is a list of coding systems of the same alias group. The
-;; first element is the coding system made at first, which we call as
-;; `base coding system'. The function `make-coding-system' sets this
-;; value automatically and `define-coding-system-alias' updates it.
-;;
-;; See the documentation of make-coding-system for the meanings of the
-;; following properties.
-;;
-;; o post-read-conversion
-;; o pre-write-conversion
-;; o translation-table-for-decode
-;; o translation-table-for-encode
-;; o safe-chars
-;; o safe-charsets
-;; o mime-charset
-;; o valid-codes (meaningful only for a coding system based on CCL)
+;; Coding system is a symbol that has been defined by the function
+;; `define-coding-system'.
+(defconst coding-system-iso-2022-flags
+ '(long-form
+ ascii-at-eol
+ ascii-at-cntl
+ 7-bit
+ locking-shift
+ single-shift
+ designation
+ revision
+ direction
+ init-at-bol
+ designate-at-bol
+ safe
+ latin-extra
+ composition
+ euc-tw-shift)
+ "List of symbols that control ISO-2022 encoder/decoder.
-(defsubst coding-system-spec (coding-system)
- "Return coding-spec of CODING-SYSTEM."
- (get (check-coding-system coding-system) 'coding-system))
+The value of `:flags' attribute in the argument of the function
+`define-coding-system' must be one of them.
-(defun coding-system-type (coding-system)
- "Return the coding type of CODING-SYSTEM.
-A coding type is an integer value indicating the encoding method
-of CODING-SYSTEM. See the function `make-coding-system' for more detail."
- (aref (coding-system-spec coding-system) coding-spec-type-idx))
+If `long-form' is specified, use a long designation sequence on
+encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
+and `japanese-jisx0208'. The long designation sequence doesn't
+conform to ISO 2022, but used by such a coding system as
+`compound-text'.
+
+If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
+on encoding.
+
+If `ascii-at-cntl' is specified, designate ASCII to g0 before control
+codes and SPC on encoding.
+
+If `7-bit' is specified, use 7-bit code only on encoding.
+
+If `locking-shift' is specified, decode locking-shift code correctly
+on decoding, and use locking-shift to invoke a graphic element on
+encoding.
+
+If `single-shift' is specified, decode single-shift code correctly on
+decoding, and use single-shift to invoke a graphic element on encoding.
+
+If `designation' is specified, decode designation code correctly on
+decoding, and use designation to designate a charset to a graphic
+element on encoding.
+
+If `revision' is specified, produce an escape sequence to specify
+revision number of a charset on encoding. Such an escape sequence is
+always correctly decoded on decoding.
+
+If `direction' is specified, decode ISO6429's code for specifying
+direction correctly, and produced the code on encoding.
+
+If `init-at-bol' is specified, on encoding, it is assumed that
+invocation and designation statuses are reset at each beginning of
+line even if `ascii-at-eol' is not specified thus no code for
+resetting them are produced.
+
+If `safe' is specified, on encoding, characters not supported by a
+coding are replaced with `?'.
+
+If `latin-extra' is specified, code-detection routine assumes that a
+code specified in `latin-extra-code-table' (which see) is valid.
+
+If `composition' is specified, an escape sequence to specify
+composition sequence is correctly decode on decoding, and is produced
+on encoding.
+
+If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
+correctly decoded on decoding, and is produced on encoding.")
+
+(defun define-coding-system (name docstring &rest props)
+ "Define NAME (symbol) as a coding system with DOCSTRING and attributes.
+The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
+may be any symbol.
+
+The following attributes have special meanings. If labeled as
+\"(required)\", it should not be omitted.
+
+`:mnemonic' (required)
+
+VALUE is a character to display on mode line for the coding system.
+
+`:coding-type' (required)
+
+VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022',
+`emacs-mule', `sjis', `big5', `ccl', `raw-text', `undecided'.
+
+`:eol-type' (optional)
+
+VALUE is an EOL (end-of-line) format of the coding system. It must be
+one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
+\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF),
+and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on
+decoding by the coding system, Emacs automatically detects an EOL
+format of the source text.
+
+`:charset-list' (required)
+
+VALUE must be a list of charsets supported by the coding system. On
+encoding by the coding system, if a character belongs to multiple
+charsets in the list, a charset that comes earlier in the list is
+selected.
+
+`:ascii-compatible-p' (optional)
+
+If VALUE is non-nil, the coding system decodes all 7-bit bytes into
+the correponding ASCII characters, and encodes all ASCII characters
+back to the correponding 7-bit bytes. If omitted, the VALUE defaults
+to nil.
+
+`:decode-translation-table' (optional)
+
+VALUE must be a translation table to use on decoding.
+
+`:encode-translation-table' (optional)
+
+VALUE must be a translation table to use on encoding.
+
+`:post-read-conversion' (optional)
+
+VALUE must be a function to call after some text is inserted and
+decoded by the coding system itself and before any functions in
+`after-insert-functions' are called. The arguments to this function
+is the same as those of a function in `after-insert-functions',
+i.e. LENGTH of a text while putting point at the head of the text to
+be decoded
+
+`:pre-write-conversion'
+
+VALUE must be a function to call after all functions in
+`write-region-annotate-functions' and `buffer-file-format' are called,
+and before the text is encoded by the coding system itself. The
+arguments to this function is the same as those of a function in
+`write-region-annotate-functions', i.e. FROM and TO specifying region
+of a text.
+
+`:default-char'
+
+VALUE must be a character. On encoding, a character not supported by
+the coding system is replaced with VALUE.
+
+`:eol-type'
+
+VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like
+EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like
+EOL (CR). If omitted, on decoding, the coding system detect EOL
+format automatically, and on encoding, used Unix-like EOL.
+
+`:mime-charset'
+
+VALUE must be a symbol who has MIME-charset name.
+
+`:flags'
+
+VALUE must be a list of symbols that control ISO-2022 converter. Each
+symbol must be a member of the variable `coding-system-iso-2022-flags'
+\(which see). This attribute has a meaning only when `:coding-type'
+is `iso-2022'.
+
+`:designation'
+
+VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
+GN-USAGE specifies the usage of graphic register GN as follows.
+
+If it is nil, no charset can be designated to GN.
+
+If it is a charset, the charset is initially designaged to GN, and
+never used by the other charsets.
+
+If it is a list, the elements must be charsets, nil, 94, or 96. GN
+can be used by all listed charsets. If the list contains 94, any
+charsets whose iso-chars is 94 can be designaged to GN. If the list
+contains 96, any charsets whose iso-chars is 96 can be designaged to
+GN. If the first element is a charset, the charset is initially
+designaged to GN.
+
+This attribute has a meaning only when `:coding-type' is `iso-2022'.
+
+`:bom'
+
+VALUE must nil, t, or cons of coding systems whose `:coding-type' is
+`utf-16'.
+
+This attribute has a meaning only when `:coding-type' is `utf-16'.
+
+`:endian'
+
+VALUE must be t or nil. See the above description for the detail.
+
+This attribute has a meaning only when `:coding-type' is `utf-16'.
+
+`:ccl-decoder'
+
+This attribute has a meaning only when `:coding-type' is `ccl'.
+
+`:ccl-encoder'
+
+This attribute has a meaning only when `:coding-type' is `ccl'."
+ (let* ((common-attrs (mapcar 'list
+ '(:mnemonic
+ :coding-type
+ :charset-list
+ :ascii-compatible-p
+ :docode-translation-table
+ :encode-translation-table
+ :post-read-conversion
+ :pre-write-conversion
+ :default-char
+ :plist
+ :eol-type)))
+ (coding-type (plist-get props :coding-type))
+ (spec-attrs (mapcar 'list
+ (cond ((eq coding-type 'iso-2022)
+ '(:initial
+ :reg-usage
+ :request
+ :flags))
+ ((eq coding-type 'utf-16)
+ '(:bom
+ :endian))
+ ((eq coding-type 'ccl)
+ '(:ccl-decoder
+ :ccl-encoder
+ :valids))))))
+
+ (dolist (slot common-attrs)
+ (setcdr slot (plist-get props (car slot))))
+
+ (dolist (slot spec-attrs)
+ (setcdr slot (plist-get props (car slot))))
+
+ (if (eq coding-type 'iso-2022)
+ (let ((designation (plist-get props :designation))
+ (flags (plist-get props :flags))
+ (initial (make-vector 4 nil))
+ (reg-usage (cons 4 4))
+ request elt)
+ (dotimes (i 4)
+ (setq elt (aref designation i))
+ (cond ((charsetp elt)
+ (aset initial i elt)
+ (setq request (cons (cons elt i) request)))
+ ((consp elt)
+ (aset initial i (car elt))
+ (if (charsetp (car elt))
+ (setq request (cons (cons (car elt) i) request)))
+ (dolist (e (cdr elt))
+ (cond ((charsetp e)
+ (setq request (cons (cons e i) request)))
+ ((eq e 94)
+ (setcar reg-usage i))
+ ((eq e 96)
+ (setcdr reg-usage i))
+ ((eq e t)
+ (setcar reg-usage i)
+ (setcdr reg-usage i)))))))
+ (setcdr (assq :initial spec-attrs) initial)
+ (setcdr (assq :reg-usage spec-attrs) reg-usage)
+ (setcdr (assq :request spec-attrs) request)
+
+ ;; Change :flags value from a list to a bit-mask.
+ (let ((bits 0)
+ (i 0))
+ (dolist (elt coding-system-iso-2022-flags)
+ (if (memq elt flags)
+ (setq bits (logior bits (lsh 1 i))))
+ (setq i (1+ i)))
+ (setcdr (assq :flags spec-attrs) bits))))
+
+ ;; Add :name and :docstring properties to PROPS.
+ (setq props
+ (cons :name (cons name (cons :docstring (cons docstring props)))))
+ (setcdr (assq :plist common-attrs) props)
+
+ (apply 'define-coding-system-internal
+ name (mapcar 'cdr (append common-attrs spec-attrs)))))
+
+(defun coding-system-doc-string (coding-system)
+ "Return the documentation string for CODING-SYSTEM."
+ (plist-get (coding-system-plist coding-system) :docstring))
(defun coding-system-mnemonic (coding-system)
"Return the mnemonic character of CODING-SYSTEM.
The mnemonic character of a coding system is used in mode line
to indicate the coding system. If the arg is nil, return ?-."
- (let ((spec (coding-system-spec coding-system)))
- (if spec (aref spec coding-spec-mnemonic-idx) ?-)))
-
-(defun coding-system-doc-string (coding-system)
- "Return the documentation string for CODING-SYSTEM."
- (aref (coding-system-spec coding-system) coding-spec-doc-string-idx))
+ (plist-get (coding-system-plist coding-system) :mnemonic))
-(defun coding-system-plist (coding-system)
- "Return the property list of CODING-SYSTEM."
- (aref (coding-system-spec coding-system) coding-spec-plist-idx))
+(defun coding-system-type (coding-system)
+ "Return the coding type of CODING-SYSTEM.
+A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
+See the function `define-coding-system' for more detail."
+ (plist-get (coding-system-plist coding-system) :coding-type))
-(defun coding-system-flags (coding-system)
- "Return `flags' of CODING-SYSTEM.
-A `flags' of a coding system is a vector of length 32 indicating detailed
-information of a coding system. See the function `make-coding-system'
-for more detail."
- (aref (coding-system-spec coding-system) coding-spec-flags-idx))
+(defun coding-system-charset-list (coding-system)
+ "Return list of charsets supported by COIDNG-SYSTEM.
+If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
+If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
+ (plist-get (coding-system-plist coding-system) :charset-list))
(defun coding-system-get (coding-system prop)
"Extract a value from CODING-SYSTEM's property list for property PROP."
(defun coding-system-put (coding-system prop val)
"Change value in CODING-SYSTEM's property list PROP to VAL."
- (let ((plist (coding-system-plist coding-system)))
- (if plist
- (plist-put plist prop val)
- (aset (coding-system-spec coding-system) coding-spec-plist-idx
- (list prop val)))))
-
-(defun coding-system-category (coding-system)
- "Return the coding category of CODING-SYSTEM.
-See also `coding-category-list'."
- (coding-system-get coding-system 'coding-category))
-
-(defun coding-system-base (coding-system)
- "Return the base coding system of CODING-SYSTEM.
-A base coding system is what made by `make-coding-system'.
-Any alias nor subsidiary coding systems are not base coding system."
- (car (coding-system-get coding-system 'alias-coding-systems)))
+ (plist-put (coding-system-plist coding-system) prop val))
(defalias 'coding-system-parent 'coding-system-base)
(make-obsolete 'coding-system-parent 'coding-system-base "20.3")
;; automatically. Nth element of the vector is the subsidiary coding
;; system whose `eol-type' property is N.
-(defun coding-system-eol-type (coding-system)
- "Return eol-type of CODING-SYSTEM.
-An eol-type is integer 0, 1, 2, or a vector of coding systems.
-
-Integer values 0, 1, and 2 indicate a format of end-of-line; LF,
-CRLF, and CR respectively.
-
-A vector value indicates that a format of end-of-line should be
-detected automatically. Nth element of the vector is the subsidiary
-coding system whose eol-type is N."
- (get coding-system 'eol-type))
-
(defun coding-system-lessp (x y)
(cond ((eq x 'no-conversion) t)
((eq y 'no-conversion) nil)
(setq tail (cdr tail)))))
codings))
-(defun map-charset-chars (func charset)
- "Use FUNC to map over all characters in CHARSET for side effects.
-FUNC is a function of two args, the start and end (inclusive) of a
-character code range. Thus FUNC should iterate over [START, END]."
- (let* ((dim (charset-dimension charset))
- (chars (charset-chars charset))
- (start (if (= chars 94)
- 33
- 32)))
- (if (= dim 1)
- (funcall func
- (make-char charset start)
- (make-char charset (+ start chars -1)))
- (dotimes (i chars)
- (funcall func
- (make-char charset (+ i start) start)
- (make-char charset (+ i start) (+ start chars -1)))))))
-
-(defun register-char-codings (coding-system safe-chars)
- "Add entries for CODING-SYSTEM to `char-coding-system-table'.
-If SAFE-CHARS is a char-table, its non-nil entries specify characters
-which CODING-SYSTEM encodes safely. If SAFE-CHARS is t, register
-CODING-SYSTEM as a general one which can encode all characters."
- (let ((general (char-table-extra-slot char-coding-system-table 0))
- ;; Charsets which have some members in the table, but not all
- ;; of them (i.e. not just a generic character):
- (partials (char-table-extra-slot char-coding-system-table 1)))
- (if (eq safe-chars t)
- (or (memq coding-system general)
- (set-char-table-extra-slot char-coding-system-table 0
- (cons coding-system general)))
- (map-char-table
- (lambda (key val)
- (if (and (>= key 128) val)
- (let ((codings (aref char-coding-system-table key))
- (charset (char-charset key)))
- (unless (memq coding-system codings)
- (if (and (generic-char-p key)
- (memq charset partials))
- ;; The generic char would clobber individual
- ;; entries already in the table. First save the
- ;; separate existing entries for all chars of the
- ;; charset (with the generic entry added, if
- ;; necessary).
- (let (entry existing)
- (map-charset-chars
- (lambda (start end)
- (while (<= start end)
- (setq entry (aref char-coding-system-table start))
- (when entry
- (push (cons
- start
- (if (memq coding-system entry)
- entry
- (cons coding-system entry)))
- existing))
- (setq start (1+ start))))
- charset)
- ;; Update the generic entry.
- (aset char-coding-system-table key
- (cons coding-system codings))
- ;; Override with the saved entries.
- (dolist (elt existing)
- (aset char-coding-system-table (car elt) (cdr elt))))
- (aset char-coding-system-table key
- (cons coding-system codings))
- (unless (or (memq charset partials)
- (generic-char-p key))
- (push charset partials)))))))
- safe-chars)
- (set-char-table-extra-slot char-coding-system-table 1 partials))))
-
-
-(defun make-subsidiary-coding-system (coding-system)
- "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM."
- (let ((coding-spec (coding-system-spec coding-system))
- (subsidiaries (vector (intern (format "%s-unix" coding-system))
- (intern (format "%s-dos" coding-system))
- (intern (format "%s-mac" coding-system))))
- (i 0)
- temp)
- (while (< i 3)
- (put (aref subsidiaries i) 'coding-system coding-spec)
- (put (aref subsidiaries i) 'eol-type i)
- (add-to-coding-system-list (aref subsidiaries i))
- (setq coding-system-alist
- (cons (list (symbol-name (aref subsidiaries i)))
- coding-system-alist))
- (setq i (1+ i)))
- subsidiaries))
-
-(defun transform-make-coding-system-args (name type &optional doc-string props)
- "For internal use only.
-Transform XEmacs style args for `make-coding-system' to Emacs style.
-Value is a list of transformed arguments."
- (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
- (eol-type (plist-get props 'eol-type))
- properties tmp)
- (cond
- ((eq eol-type 'lf) (setq eol-type 'unix))
- ((eq eol-type 'crlf) (setq eol-type 'dos))
- ((eq eol-type 'cr) (setq eol-type 'mac)))
- (if (setq tmp (plist-get props 'post-read-conversion))
- (setq properties (plist-put properties 'post-read-conversion tmp)))
- (if (setq tmp (plist-get props 'pre-write-conversion))
- (setq properties (plist-put properties 'pre-write-conversion tmp)))
- (cond
- ((eq type 'shift-jis)
- `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
- ((eq type 'iso2022) ; This is not perfect.
- (if (plist-get props 'escape-quoted)
- (error "escape-quoted is not supported: %S"
- `(,name ,type ,doc-string ,props)))
- (let ((g0 (plist-get props 'charset-g0))
- (g1 (plist-get props 'charset-g1))
- (g2 (plist-get props 'charset-g2))
- (g3 (plist-get props 'charset-g3))
- (use-roman
- (and
- (eq (cadr (assoc 'latin-jisx0201
- (plist-get props 'input-charset-conversion)))
- 'ascii)
- (eq (cadr (assoc 'ascii
- (plist-get props 'output-charset-conversion)))
- 'latin-jisx0201)))
- (use-oldjis
- (and
- (eq (cadr (assoc 'japanese-jisx0208-1978
- (plist-get props 'input-charset-conversion)))
- 'japanese-jisx0208)
- (eq (cadr (assoc 'japanese-jisx0208
- (plist-get props 'output-charset-conversion)))
- 'japanese-jisx0208-1978))))
- (if (charsetp g0)
- (if (plist-get props 'force-g0-on-output)
- (setq g0 `(nil ,g0))
- (setq g0 `(,g0 t))))
- (if (charsetp g1)
- (if (plist-get props 'force-g1-on-output)
- (setq g1 `(nil ,g1))
- (setq g1 `(,g1 t))))
- (if (charsetp g2)
- (if (plist-get props 'force-g2-on-output)
- (setq g2 `(nil ,g2))
- (setq g2 `(,g2 t))))
- (if (charsetp g3)
- (if (plist-get props 'force-g3-on-output)
- (setq g3 `(nil ,g3))
- (setq g3 `(,g3 t))))
- `(,name 2 ,mnemonic ,doc-string
- (,g0 ,g1 ,g2 ,g3
- ,(plist-get props 'short)
- ,(not (plist-get props 'no-ascii-eol))
- ,(not (plist-get props 'no-ascii-cntl))
- ,(plist-get props 'seven)
- t
- ,(not (plist-get props 'lock-shift))
- ,use-roman
- ,use-oldjis
- ,(plist-get props 'no-iso6429)
- nil nil nil nil)
- ,properties ,eol-type)))
- ((eq type 'big5)
- `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
- ((eq type 'ccl)
- `(,name 4 ,mnemonic ,doc-string
- (,(plist-get props 'decode) . ,(plist-get props 'encode))
- ,properties ,eol-type))
- (t
- (error "unsupported XEmacs style make-coding-style arguments: %S"
- `(,name ,type ,doc-string ,props))))))
-
-(defun make-coding-system (coding-system type mnemonic doc-string
- &optional
- flags
- properties
- eol-type)
- "Define a new coding system CODING-SYSTEM (symbol).
-Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional),
-and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM
-in the following format:
- [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
-
-TYPE is an integer value indicating the type of the coding system as follows:
- 0: Emacs internal format,
- 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC,
- 2: ISO-2022 including many variants,
- 3: Big5 used mainly on Chinese PC,
- 4: private, CCL programs provide encoding/decoding algorithm,
- 5: Raw-text, which means that text contains random 8-bit codes.
-
-MNEMONIC is a character to be displayed on mode line for the coding system.
-
-DOC-STRING is a documentation string for the coding system.
-
-FLAGS specifies more detailed information of the coding system as follows:
-
- If TYPE is 2 (ISO-2022), FLAGS is a list of these elements:
- CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM,
- ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT,
- USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL,
- SAFE, ACCEPT-LATIN-EXTRA-CODE.
- CHARSETn are character sets initially designated to Gn graphic registers.
- If CHARSETn is nil, Gn is never used.
- If CHARSETn is t, Gn can be used but nothing designated initially.
- If CHARSETn is a list of character sets, those character sets are
- designated to Gn on output, but nothing designated to Gn initially.
- But, character set `ascii' can be designated only to G0.
- SHORT-FORM non-nil means use short designation sequence on output.
- ASCII-EOL non-nil means designate ASCII to g0 at end of line on output.
- ASCII-CNTL non-nil means designate ASCII to g0 before control codes and
- SPACE on output.
- SEVEN non-nil means use 7-bit code only on output.
- LOCKING-SHIFT non-nil means use locking-shift.
- SINGLE-SHIFT non-nil means use single-shift.
- USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII.
- USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983.
- NO-ISO6429 non-nil means not use ISO6429's direction specification.
- INIT-BOL non-nil means any designation state is assumed to be reset
- to initial at each beginning of line on output.
- DESIGNATION-BOL non-nil means designation sequences should be placed
- at beginning of line on output.
- SAFE non-nil means convert unsafe characters to `?' on output.
- Characters not specified in the property `safe-charsets' nor
- `safe-chars' are unsafe.
- ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts
- a code specified in `latin-extra-code-table' (which see) as a valid
- code of the coding system.
-
- If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for
- decoding and encoding. CCL programs should be specified by their
- symbols.
-
-PROPERTIES is an alist of properties vs the corresponding values. The
-following properties are recognized:
-
- o post-read-conversion
-
- The value is a function to call after some text is inserted and
- decoded by the coding system itself and before any functions in
- `after-insert-functions' are called. The argument of this
- function is the same as for a function in
- `after-insert-file-functions', i.e. LENGTH of the text inserted,
- with point at the head of the text to be decoded.
-
- o pre-write-conversion
-
- The value is a function to call after all functions in
- `write-region-annotate-functions' and `buffer-file-format' are
- called, and before the text is encoded by the coding system itself.
- The arguments to this function are the same as those of a function
- in `write-region-annotate-functions', i.e. FROM and TO, specifying
- a region of text.
-
- o translation-table-for-decode
-
- The value is a translation table to be applied on decoding. See
- the function `make-translation-table' for the format of translation
- table. This is not applicable to type 4 (CCL-based) coding systems.
-
- o translation-table-for-encode
-
- The value is a translation table to be applied on encoding. This is
- not applicable to type 4 (CCL-based) coding systems.
-
- o safe-chars
-
- The value is a char table. If a character has non-nil value in it,
- the character is safely supported by the coding system. This
- overrides the specification of safe-charsets.
-
- o safe-charsets
-
- The value is a list of charsets safely supported by the coding
- system. The value t means that all charsets Emacs handles are
- supported. Even if some charset is not in this list, it doesn't
- mean that the charset can't be encoded in the coding system;
- it just means that some other receiver of text encoded
- in the coding system won't be able to handle that charset.
-
- o mime-charset
-
- The value is a symbol of which name is `MIME-charset' parameter of
- the coding system.
-
- o valid-codes (meaningful only for a coding system based on CCL)
-
- The value is a list to indicate valid byte ranges of the encoded
- file. Each element of the list is an integer or a cons of integer.
- In the former case, the integer value is a valid byte code. In the
- latter case, the integers specify the range of valid byte codes.
-
-These properties are set in PLIST, a property list. This function
-also sets properties `coding-category' and `alias-coding-systems'
-automatically.
-
-EOL-TYPE specifies the EOL type of the coding-system in one of the
-following formats:
-
- o symbol (unix, dos, or mac)
-
- The symbol `unix' means Unix-like EOL (LF), `dos' means
- DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR).
-
- o number (0, 1, or 2)
-
- The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL
- respectively.
-
- o vector of coding-systems of length 3
-
- The EOL type is detected automatically for the coding system.
- And, according to the detected EOL type, one of the coding
- systems in the vector is selected. Elements of the vector
- corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL
- in this order.
-
-Kludgy features for backward compatibility:
-
-1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is
-treated as a compiled CCL code.
-
-2. If PROPERTIES is just a list of character sets, the list is set as
-a value of `safe-charsets' in PLIST."
-
- ;; For compatiblity with XEmacs, we check the type of TYPE. If it
- ;; is a symbol, perhaps, this function is called with XEmacs-style
- ;; arguments. Here, try to transform that kind of arguments to
- ;; Emacs style.
- (if (symbolp type)
- (let ((args (transform-make-coding-system-args coding-system type
- mnemonic doc-string)))
- (setq coding-system (car args)
- type (nth 1 args)
- mnemonic (nth 2 args)
- doc-string (nth 3 args)
- flags (nth 4 args)
- properties (nth 5 args)
- eol-type (nth 6 args))))
-
- ;; Set a value of `coding-system' property.
- (let ((coding-spec (make-vector 5 nil))
- (no-initial-designation t)
- (no-alternative-designation t)
- (accept-latin-extra-code nil)
- coding-category)
- (if (or (not (integerp type)) (< type 0) (> type 5))
- (error "TYPE argument must be 0..5"))
- (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127))
- (error "MNEMONIC argument must be an ASCII printable character"))
- (aset coding-spec coding-spec-type-idx type)
- (aset coding-spec coding-spec-mnemonic-idx mnemonic)
- (aset coding-spec coding-spec-doc-string-idx
- (purecopy (if (stringp doc-string) doc-string "")))
- (cond ((= type 0)
- (setq coding-category 'coding-category-emacs-mule))
- ((= type 1)
- (setq coding-category 'coding-category-sjis))
- ((= type 2) ; ISO2022
- (let ((i 0)
- (vec (make-vector 32 nil))
- (g1-designation nil)
- (fl flags))
- (while (< i 4)
- (let ((charset (car fl)))
- (if (and no-initial-designation
- (> i 0)
- (or (charsetp charset)
- (and (consp charset)
- (charsetp (car charset)))))
- (setq no-initial-designation nil))
- (if (charsetp charset)
- (if (= i 1) (setq g1-designation charset))
- (if (consp charset)
- (let ((tail charset)
- elt)
- (while tail
- (setq elt (car tail))
- (if (eq elt t)
- (setq no-alternative-designation nil)
- (if (and elt (not (charsetp elt)))
- (error "Invalid charset: %s" elt)))
- (setq tail (cdr tail)))
- (setq g1-designation (car charset)))
- (if charset
- (if (eq charset t)
- (setq no-alternative-designation nil)
- (error "Invalid charset: %s" charset)))))
- (aset vec i charset))
- (setq fl (cdr fl) i (1+ i)))
- (while (and (< i 32) fl)
- (aset vec i (car fl))
- (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE
- (car fl))
- (setq accept-latin-extra-code t))
- (setq fl (cdr fl) i (1+ i)))
- (aset coding-spec 4 vec)
- (setq coding-category
- (if (aref vec 8) ; Use locking-shift.
- (or (and (aref vec 7) 'coding-category-iso-7-else)
- 'coding-category-iso-8-else)
- (if (aref vec 7) ; 7-bit only.
- (if (aref vec 9) ; Use single-shift.
- 'coding-category-iso-7-else
- (if no-alternative-designation
- 'coding-category-iso-7-tight
- 'coding-category-iso-7))
- (if (or no-initial-designation
- (not no-alternative-designation))
- 'coding-category-iso-8-else
- (if (and (charsetp g1-designation)
- (= (charset-dimension g1-designation) 2))
- 'coding-category-iso-8-2
- 'coding-category-iso-8-1)))))))
- ((= type 3)
- (setq coding-category 'coding-category-big5))
- ((= type 4) ; private
- (setq coding-category 'coding-category-ccl)
- (if (not (consp flags))
- (error "Invalid FLAGS argument for TYPE 4 (CCL)")
- (let ((decoder (check-ccl-program
- (car flags)
- (intern (format "%s-decoder" coding-system))))
- (encoder (check-ccl-program
- (cdr flags)
- (intern (format "%s-encoder" coding-system)))))
- (if (and decoder encoder)
- (aset coding-spec 4 (cons decoder encoder))
- (error "Invalid FLAGS argument for TYPE 4 (CCL)")))))
- (t ; i.e. (= type 5)
- (setq coding-category 'coding-category-raw-text)))
-
- (let ((plist (list 'coding-category coding-category
- 'alias-coding-systems (list coding-system))))
- (if no-initial-designation
- (plist-put plist 'no-initial-designation t))
- (if (and properties
- (or (eq properties t)
- (not (consp (car properties)))))
- ;; In the old version, the arg PROPERTIES is a list to be
- ;; set in PLIST as a value of property `safe-charsets'.
- (setq properties (list (cons 'safe-charsets properties))))
- ;; In the current version PROPERTIES is a property list.
- ;; Reflect it into PLIST one by one while handling safe-chars
- ;; specially.
- (let ((safe-charsets (cdr (assq 'safe-charsets properties)))
- (safe-chars (cdr (assq 'safe-chars properties)))
- (l properties)
- prop val)
- ;; If only safe-charsets is specified, make a char-table from
- ;; it, and store that char-table as the value of `safe-chars'.
- (if (and (not safe-chars) safe-charsets)
- (let (charset)
- (if (eq safe-charsets t)
- (setq safe-chars t)
- (setq safe-chars (make-char-table 'safe-chars))
- (while safe-charsets
- (setq charset (car safe-charsets)
- safe-charsets (cdr safe-charsets))
- (cond ((eq charset 'ascii)) ; just ignore
- ((eq charset 'eight-bit-control)
- (let ((i 128))
- (while (< i 160)
- (aset safe-chars i t)
- (setq i (1+ i)))))
- ((eq charset 'eight-bit-graphic)
- (let ((i 160))
- (while (< i 256)
- (aset safe-chars i t)
- (setq i (1+ i)))))
- (t
- (aset safe-chars (make-char charset) t))))
- (if accept-latin-extra-code
- (let ((i 128))
- (while (< i 160)
- (if (aref latin-extra-code-table i)
- (aset safe-chars i t))
- (setq i (1+ i))))))
- (setq l (cons (cons 'safe-chars safe-chars) l))))
- (while l
- (setq prop (car (car l)) val (cdr (car l)) l (cdr l))
- (if (eq prop 'safe-chars)
- (progn
- (if (and (symbolp val)
- (get val 'translation-table))
- (setq safe-chars (get val 'translation-table)))
- (register-char-codings coding-system safe-chars)
- (setq val safe-chars)))
- (plist-put plist prop val)))
- ;; The property `coding-category' may have been set differently
- ;; through PROPERTIES.
- (setq coding-category (plist-get plist 'coding-category))
- (aset coding-spec coding-spec-plist-idx plist))
- (put coding-system 'coding-system coding-spec)
- (put coding-category 'coding-systems
- (cons coding-system (get coding-category 'coding-systems))))
-
- ;; Next, set a value of `eol-type' property.
- (if (not eol-type)
- ;; If EOL-TYPE is nil, set a vector of subsidiary coding
- ;; systems, each corresponds to a coding system for the detected
- ;; EOL format.
- (setq eol-type (make-subsidiary-coding-system coding-system)))
- (setq eol-type
- (cond ((or (eq eol-type 'unix) (null eol-type))
- 0)
- ((eq eol-type 'dos)
- 1)
- ((eq eol-type 'mac)
- 2)
- ((or (and (vectorp eol-type)
- (= (length eol-type) 3))
- (and (numberp eol-type)
- (and (>= eol-type 0)
- (<= eol-type 2))))
- eol-type)
- (t
- (error "Invalid EOL-TYPE spec:%S" eol-type))))
- (put coding-system 'eol-type eol-type)
-
- ;; At last, register CODING-SYSTEM in `coding-system-list' and
- ;; `coding-system-alist'.
- (add-to-coding-system-list coding-system)
- (setq coding-system-alist (cons (list (symbol-name coding-system))
- coding-system-alist))
-
- ;; For a coding system of cateogory iso-8-1 and iso-8-2, create
- ;; XXX-with-esc variants.
- (let ((coding-category (coding-system-category coding-system)))
- (if (or (eq coding-category 'coding-category-iso-8-1)
- (eq coding-category 'coding-category-iso-8-2))
- (let ((esc (intern (concat (symbol-name coding-system) "-with-esc")))
- (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system))
- (safe-charsets (assq 'safe-charsets properties))
- (mime-charset (assq 'mime-charset properties)))
- (if safe-charsets
- (setcdr safe-charsets t)
- (setq properties (cons (cons 'safe-charsets t) properties)))
- (if mime-charset
- (setcdr mime-charset nil))
- (make-coding-system esc type mnemonic doc
- (if (listp (car flags))
- (cons (append (car flags) '(t)) (cdr flags))
- (cons (list (car flags) t) (cdr flags)))
- properties))))
-
- coding-system)
-
-(defun define-coding-system-alias (alias coding-system)
- "Define ALIAS as an alias for coding system CODING-SYSTEM."
- (put alias 'coding-system (coding-system-spec coding-system))
- (nconc (coding-system-get alias 'alias-coding-systems) (list alias))
- (add-to-coding-system-list alias)
- (setq coding-system-alist (cons (list (symbol-name alias))
- coding-system-alist))
- (let ((eol-type (coding-system-eol-type coding-system)))
- (if (vectorp eol-type)
- (put alias 'eol-type (make-subsidiary-coding-system alias))
- (put alias 'eol-type eol-type))))
-
(defun set-buffer-file-coding-system (coding-system &optional force)
"Set the file coding-system of the current buffer to CODING-SYSTEM.
This means that when you save the buffer, it will be converted
(defun set-coding-priority (arg)
"Set priority of coding categories according to ARG.
-ARG is a list of coding categories ordered by priority."
+ARG is a list of coding categories ordered by priority.
+
+This function is provided for backward compatibility.
+Now we have more convenient function `set-coding-system-priority'."
(let ((l arg)
(current-list (copy-sequence coding-category-list)))
;; Check the validity of ARG while deleting coding categories in
;; Must return nil, as build_annotations_2 expects that.
nil)
+(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0")
+
;;; FILE I/O
(defcustom auto-coding-alist
(when coding-system
(set-buffer-file-coding-system coding-system t)
(if (and enable-multibyte-characters
- (or (eq coding-system 'no-conversion)
- (eq (coding-system-type coding-system) 5))
+ (or (eq (coding-system-type coding-system) 'raw-text))
;; If buffer was unmodified and the size is the
;; same as INSERTED, we must be visiting it.
(not modified-p)
;; But eol-type is not yet set.
(setq local-eol nil))
(if (and buffer-file-coding-system
- (not (eq (coding-system-type buffer-file-coding-system) t)))
- ;; This is not `undecided'.
+ (not (eq (coding-system-type buffer-file-coding-system)
+ 'undecided)))
(setq local-coding (coding-system-base buffer-file-coding-system)))
(if (and (local-variable-p 'buffer-file-coding-system)
;; But eol-type is not found.
;; If EOL conversions are inhibited, force unix eol-type.
(setq found-eol (if inhibit-eol-conversion 0)))
- (if (eq (coding-system-type coding) t)
- (setq found-coding 'undecided)
- (setq found-coding (coding-system-base coding)))
+ (setq found-coding (coding-system-base coding))
(if (and (not found-eol) (eq found-coding 'undecided))
;; No valid coding information found.