"Return a list of Unicode data for unicode CHAR.
Each element is a list of a property description and the property value.
The list is null if CHAR isn't found in `unicodedata-file'."
- (if unicodedata-file
- (save-excursion
- (set-buffer (find-file-noselect unicodedata-file))
- (goto-char (point-min))
- (let ((hex (format "%04X" char))
- found first last)
- (if (re-search-forward (concat "^" hex) nil t)
- (setq found t)
- ;; It's not listed explicitly. Look for ranges, e.g. CJK
- ;; ideographs, and check whether it's in one of them.
- (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
- (>= char (setq first
- (string-to-number (match-string 1) 16)))
- (progn
- (forward-line 1)
- (looking-at "^\\([^;]+\\);[^;]+Last>;")
- (> char
- (setq last
- (string-to-number (match-string 1) 16))))))
- (if (and (>= char first)
- (<= char last))
- (setq found t)))
- (if found
- (let ((fields (mapcar (lambda (elt)
- (if (> (length elt) 0)
- elt))
- (cdr (split-string
- (buffer-substring
- (line-beginning-position)
- (line-end-position))
- ";")))))
- ;; The length depends on whether the last field was empty.
- (unless (or (= 13 (length fields))
- (= 14 (length fields)))
- (error "Invalid contents in %s" unicodedata-file))
- ;; The field names and values lists are slightly
- ;; modified from Mule-UCS unidata.el.
- (list
- (list "Name" (let ((name (nth 0 fields)))
- ;; Check for <..., First>, <..., Last>
- (if (string-match "\\`\\(<[^,]+\\)," name)
- (concat (match-string 1 name) ">")
- name)))
- (list "Category"
- (cdr (assoc
- (nth 1 fields)
- '(("Lu" . "uppercase letter")
- ("Ll" . "lowercase letter")
- ("Lt" . "titlecase letter")
- ("Mn" . "non-spacing mark")
- ("Mc" . "spacing-combining mark")
- ("Me" . "enclosing mark")
- ("Nd" . "decimal digit")
- ("Nl" . "letter number")
- ("No" . "other number")
- ("Zs" . "space separator")
- ("Zl" . "line separator")
- ("Zp" . "paragraph separator")
- ("Cc" . "other control")
- ("Cf" . "other format")
- ("Cs" . "surrogate")
- ("Co" . "private use")
- ("Cn" . "not assigned")
- ("Lm" . "modifier letter")
- ("Lo" . "other letter")
- ("Pc" . "connector punctuation")
- ("Pd" . "dash punctuation")
- ("Ps" . "open punctuation")
- ("Pe" . "close punctuation")
- ("Pi" . "initial-quotation punctuation")
- ("Pf" . "final-quotation punctuation")
- ("Po" . "other punctuation")
- ("Sm" . "math symbol")
- ("Sc" . "currency symbol")
- ("Sk" . "modifier symbol")
- ("So" . "other symbol")))))
- (list "Combining class"
- (cdr (assoc
- (string-to-number (nth 2 fields))
- '((0 . "Spacing")
- (1 . "Overlays and interior")
- (7 . "Nuktas")
- (8 . "Hiragana/Katakana voicing marks")
- (9 . "Viramas")
- (10 . "Start of fixed position classes")
- (199 . "End of fixed position classes")
- (200 . "Below left attached")
- (202 . "Below attached")
- (204 . "Below right attached")
- (208 . "Left attached (reordrant around \
+ (when unicodedata-file
+ (unless (file-exists-p unicodedata-file)
+ (error "`unicodedata-file' %s not found" unicodedata-file))
+ (save-excursion
+ (set-buffer (find-file-noselect unicodedata-file))
+ (goto-char (point-min))
+ (let ((hex (format "%04X" char))
+ found first last)
+ (if (re-search-forward (concat "^" hex) nil t)
+ (setq found t)
+ ;; It's not listed explicitly. Look for ranges, e.g. CJK
+ ;; ideographs, and check whether it's in one of them.
+ (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
+ (>= char (setq first
+ (string-to-number (match-string 1) 16)))
+ (progn
+ (forward-line 1)
+ (looking-at "^\\([^;]+\\);[^;]+Last>;")
+ (> char
+ (setq last
+ (string-to-number (match-string 1) 16))))))
+ (if (and (>= char first)
+ (<= char last))
+ (setq found t)))
+ (if found
+ (let ((fields (mapcar (lambda (elt)
+ (if (> (length elt) 0)
+ elt))
+ (cdr (split-string
+ (buffer-substring
+ (line-beginning-position)
+ (line-end-position))
+ ";")))))
+ ;; The length depends on whether the last field was empty.
+ (unless (or (= 13 (length fields))
+ (= 14 (length fields)))
+ (error "Invalid contents in %s" unicodedata-file))
+ ;; The field names and values lists are slightly
+ ;; modified from Mule-UCS unidata.el.
+ (list
+ (list "Name" (let ((name (nth 0 fields)))
+ ;; Check for <..., First>, <..., Last>
+ (if (string-match "\\`\\(<[^,]+\\)," name)
+ (concat (match-string 1 name) ">")
+ name)))
+ (list "Category"
+ (cdr (assoc
+ (nth 1 fields)
+ '(("Lu" . "uppercase letter")
+ ("Ll" . "lowercase letter")
+ ("Lt" . "titlecase letter")
+ ("Mn" . "non-spacing mark")
+ ("Mc" . "spacing-combining mark")
+ ("Me" . "enclosing mark")
+ ("Nd" . "decimal digit")
+ ("Nl" . "letter number")
+ ("No" . "other number")
+ ("Zs" . "space separator")
+ ("Zl" . "line separator")
+ ("Zp" . "paragraph separator")
+ ("Cc" . "other control")
+ ("Cf" . "other format")
+ ("Cs" . "surrogate")
+ ("Co" . "private use")
+ ("Cn" . "not assigned")
+ ("Lm" . "modifier letter")
+ ("Lo" . "other letter")
+ ("Pc" . "connector punctuation")
+ ("Pd" . "dash punctuation")
+ ("Ps" . "open punctuation")
+ ("Pe" . "close punctuation")
+ ("Pi" . "initial-quotation punctuation")
+ ("Pf" . "final-quotation punctuation")
+ ("Po" . "other punctuation")
+ ("Sm" . "math symbol")
+ ("Sc" . "currency symbol")
+ ("Sk" . "modifier symbol")
+ ("So" . "other symbol")))))
+ (list "Combining class"
+ (cdr (assoc
+ (string-to-number (nth 2 fields))
+ '((0 . "Spacing")
+ (1 . "Overlays and interior")
+ (7 . "Nuktas")
+ (8 . "Hiragana/Katakana voicing marks")
+ (9 . "Viramas")
+ (10 . "Start of fixed position classes")
+ (199 . "End of fixed position classes")
+ (200 . "Below left attached")
+ (202 . "Below attached")
+ (204 . "Below right attached")
+ (208 . "Left attached (reordrant around \
single base character)")
- (210 . "Right attached")
- (212 . "Above left attached")
- (214 . "Above attached")
- (216 . "Above right attached")
- (218 . "Below left")
- (220 . "Below")
- (222 . "Below right")
- (224 . "Left (reordrant around single base \
+ (210 . "Right attached")
+ (212 . "Above left attached")
+ (214 . "Above attached")
+ (216 . "Above right attached")
+ (218 . "Below left")
+ (220 . "Below")
+ (222 . "Below right")
+ (224 . "Left (reordrant around single base \
character)")
- (226 . "Right")
- (228 . "Above left")
- (230 . "Above")
- (232 . "Above right")
- (233 . "Double below")
- (234 . "Double above")
- (240 . "Below (iota subscript)")))))
- (list "Bidi category"
- (cdr (assoc
- (nth 3 fields)
- '(("L" . "Left-to-Right")
- ("LRE" . "Left-to-Right Embedding")
- ("LRO" . "Left-to-Right Override")
- ("R" . "Right-to-Left")
- ("AL" . "Right-to-Left Arabic")
- ("RLE" . "Right-to-Left Embedding")
- ("RLO" . "Right-to-Left Override")
- ("PDF" . "Pop Directional Format")
- ("EN" . "European Number")
- ("ES" . "European Number Separator")
- ("ET" . "European Number Terminator")
- ("AN" . "Arabic Number")
- ("CS" . "Common Number Separator")
- ("NSM" . "Non-Spacing Mark")
- ("BN" . "Boundary Neutral")
- ("B" . "Paragraph Separator")
- ("S" . "Segment Separator")
- ("WS" . "Whitespace")
- ("ON" . "Other Neutrals")))))
- (list "Decomposition"
- (if (nth 4 fields)
- (let* ((parts (split-string (nth 4 fields)))
- (info (car parts)))
- (if (string-match "\\`<\\(.+\\)>\\'" info)
- (setq info (match-string 1 info))
- (setq info nil))
- (if info (setq parts (cdr parts)))
- (setq parts (mapconcat
- (lambda (arg)
- (string (string-to-number arg 16)))
- parts " "))
- (concat info parts))))
- (list "Decimal digit value"
- (if (nth 5 fields)
- (string-to-number (nth 5 fields))))
- (list "Digit value"
- (if (nth 6 fields)
- (string-to-number (nth 6 fields))))
- (list "Numeric value"
- (if (nth 7 fields)
- (string-to-number (nth 6 fields))))
- (list "Mirrored"
- (if (equal "Y" (nth 8 fields))
- "yes"))
- (list "Old name" (nth 9 fields))
- (list "ISO 10646 comment" (nth 10 fields))
- (list "Uppercase" (and (nth 11 fields)
- (string (string-to-number
- (nth 11 fields) 16))))
- (list "Lowercase" (and (nth 12 fields)
- (string (string-to-number
- (nth 12 fields) 16))))
- (list "Titlecase" (and (nth 13 fields)
- (string (string-to-number
- (nth 13 fields) 16)))))))))))
+ (226 . "Right")
+ (228 . "Above left")
+ (230 . "Above")
+ (232 . "Above right")
+ (233 . "Double below")
+ (234 . "Double above")
+ (240 . "Below (iota subscript)")))))
+ (list "Bidi category"
+ (cdr (assoc
+ (nth 3 fields)
+ '(("L" . "Left-to-Right")
+ ("LRE" . "Left-to-Right Embedding")
+ ("LRO" . "Left-to-Right Override")
+ ("R" . "Right-to-Left")
+ ("AL" . "Right-to-Left Arabic")
+ ("RLE" . "Right-to-Left Embedding")
+ ("RLO" . "Right-to-Left Override")
+ ("PDF" . "Pop Directional Format")
+ ("EN" . "European Number")
+ ("ES" . "European Number Separator")
+ ("ET" . "European Number Terminator")
+ ("AN" . "Arabic Number")
+ ("CS" . "Common Number Separator")
+ ("NSM" . "Non-Spacing Mark")
+ ("BN" . "Boundary Neutral")
+ ("B" . "Paragraph Separator")
+ ("S" . "Segment Separator")
+ ("WS" . "Whitespace")
+ ("ON" . "Other Neutrals")))))
+ (list "Decomposition"
+ (if (nth 4 fields)
+ (let* ((parts (split-string (nth 4 fields)))
+ (info (car parts)))
+ (if (string-match "\\`<\\(.+\\)>\\'" info)
+ (setq info (match-string 1 info))
+ (setq info nil))
+ (if info (setq parts (cdr parts)))
+ (setq parts (mapconcat
+ (lambda (arg)
+ (string (string-to-number arg 16)))
+ parts " "))
+ (concat info parts))))
+ (list "Decimal digit value"
+ (if (nth 5 fields)
+ (string-to-number (nth 5 fields))))
+ (list "Digit value"
+ (if (nth 6 fields)
+ (string-to-number (nth 6 fields))))
+ (list "Numeric value"
+ (if (nth 7 fields)
+ (string-to-number (nth 6 fields))))
+ (list "Mirrored"
+ (if (equal "Y" (nth 8 fields))
+ "yes"))
+ (list "Old name" (nth 9 fields))
+ (list "ISO 10646 comment" (nth 10 fields))
+ (list "Uppercase" (and (nth 11 fields)
+ (string (string-to-number
+ (nth 11 fields) 16))))
+ (list "Lowercase" (and (nth 12 fields)
+ (string (string-to-number
+ (nth 12 fields) 16))))
+ (list "Titlecase" (and (nth 13 fields)
+ (string (string-to-number
+ (nth 13 fields) 16)))))))))))
;;; mule-diag.el ends here