From 9d7973530f912c6001445ba9b83b7893b466aee8 Mon Sep 17 00:00:00 2001 From: Noam Postavsky Date: Sat, 1 Jul 2017 22:39:16 -0400 Subject: [PATCH] Optimize skkdic conversion (Bug#28043) The primary speedup comes from the optimizing lookup-nested-alist and set-nested-alist for the case where the key is a string. This brings the time down to less than half the original. * lisp/international/mule-util.el (lookup-nested-alist) (set-nested-alist): Use `assq' instead of `assoc' when KEYSEQ is a string. * lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi) (skkdic-convert-okuri-nasi): Use progress-reporter functions instead of calculating ratio of work done inline. (skkdic-reduced-candidates): Call `char-category-set' on the first character of the string directly, instead of using a regexp for the character category. (skkdic--japanese-category-set): New constant. (skkdic-collect-okuri-nasi): Just set `skkdic-okuri-nasi-entries-count' at once at the end rather than updating it throughout the loop. (skkdic-convert-postfix skkdic-convert-prefix) skkdic-get-candidate-list, skkdic-collect-okuri-nasi) (skkdic-extract-conversion-data): Use `match-string-no-properties' instead of `match-string'. --- lisp/international/ja-dic-cnv.el | 61 ++++++++++++------------- lisp/international/mule-util.el | 77 ++++++++++++++++++++++---------- 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el index e80b1b28810..63eede093d5 100644 --- a/lisp/international/ja-dic-cnv.el +++ b/lisp/international/ja-dic-cnv.el @@ -125,10 +125,10 @@ ;; Search postfix entries. (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|ー\\)+\\) " nil t) - (let ((kana (match-string 1)) + (let ((kana (match-string-no-properties 1)) str candidates) (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/") - (setq str (match-string 1)) + (setq str (match-string-no-properties 1)) (if (not (member str candidates)) (setq candidates (cons str candidates))) (goto-char (match-end 1))) @@ -158,10 +158,10 @@ "(skkdic-set-prefix\n")) (save-excursion (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\)[<>?] " nil t) - (let ((kana (match-string 1)) + (let ((kana (match-string-no-properties 1)) str candidates) (while (looking-at "/\\([^/\n]+\\)/") - (setq str (match-string 1)) + (setq str (match-string-no-properties 1)) (if (not (member str candidates)) (setq candidates (cons str candidates))) (goto-char (match-end 1))) @@ -180,8 +180,8 @@ (let (candidates) (goto-char from) (while (re-search-forward "/[^/ \n]+" to t) - (setq candidates (cons (buffer-substring (1+ (match-beginning 0)) - (match-end 0)) + (setq candidates (cons (buffer-substring-no-properties + (1+ (match-beginning 0)) (match-end 0)) candidates))) candidates)) @@ -251,12 +251,16 @@ ;; Return list of candidates which excludes some from CANDIDATES. ;; Excluded candidates can be derived from another entry. +(defconst skkdic--japanese-category-set (make-category-set "j")) + (defun skkdic-reduced-candidates (skkbuf kana candidates) (let (elt l) (while candidates (setq elt (car candidates)) (if (or (= (length elt) 1) - (and (string-match "^\\cj" elt) + (and (bool-vector-subsetp + skkdic--japanese-category-set + (char-category-set (aref elt 0))) (not (skkdic-breakup-string skkbuf kana elt 0 (length elt) 'first)))) (setq l (cons elt l))) @@ -267,24 +271,18 @@ (defvar skkdic-okuri-nasi-entries-count 0) (defun skkdic-collect-okuri-nasi () - (message "Collecting OKURI-NASI entries ...") (save-excursion - (let ((prev-ratio 0) - ratio) + (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries" + (point) (point-max) + nil 10))) (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\) \\(/\\cj.*\\)/$" nil t) - (let ((kana (match-string 1)) + (let ((kana (match-string-no-properties 1)) (candidates (skkdic-get-candidate-list (match-beginning 3) (match-end 3)))) (setq skkdic-okuri-nasi-entries - (cons (cons kana candidates) skkdic-okuri-nasi-entries) - skkdic-okuri-nasi-entries-count - (1+ skkdic-okuri-nasi-entries-count)) - (setq ratio (floor (* (point) 100.0) (point-max))) - (if (/= (/ prev-ratio 10) (/ ratio 10)) - (progn - (message "collected %2d%% ..." ratio) - (setq prev-ratio ratio))) + (cons (cons kana candidates) skkdic-okuri-nasi-entries)) + (progress-reporter-update progress (point)) (while candidates (let ((entry (lookup-nested-alist (car candidates) skkdic-word-list nil nil t))) @@ -292,26 +290,24 @@ (setcar entry (cons kana (car entry))) (set-nested-alist (car candidates) (list kana) skkdic-word-list))) - (setq candidates (cdr candidates)))))))) + (setq candidates (cdr candidates))))) + (setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries)) + (progress-reporter-done progress)))) (defun skkdic-convert-okuri-nasi (skkbuf buf) - (message "Processing OKURI-NASI entries ...") (with-current-buffer buf (insert ";; Setting okuri-nasi entries.\n" "(skkdic-set-okuri-nasi\n") (let ((l (nreverse skkdic-okuri-nasi-entries)) - (count 0) - (prev-ratio 0) - ratio) + (progress (make-progress-reporter "Processing OKURI-NASI entries" + 0 skkdic-okuri-nasi-entries-count + nil 10)) + (count 0)) (while l (let ((kana (car (car l))) (candidates (cdr (car l)))) - (setq ratio (floor (* count 100.0) skkdic-okuri-nasi-entries-count) - count (1+ count)) - (if (/= (/ prev-ratio 10) (/ ratio 10)) - (progn - (message "processed %2d%% ..." ratio) - (setq prev-ratio ratio))) + (setq count (1+ count)) + (progress-reporter-update progress count) (if (setq candidates (skkdic-reduced-candidates skkbuf kana candidates)) (progn @@ -320,7 +316,8 @@ (insert " " (car candidates)) (setq candidates (cdr candidates))) (insert "\"\n")))) - (setq l (cdr l)))) + (setq l (cdr l))) + (progress-reporter-done progress)) (insert ")\n\n"))) (defun skkdic-convert (filename &optional dirname) @@ -467,7 +464,7 @@ To get complete usage, invoke: (i (match-end 0)) candidates) (while (string-match "[^ ]+" entry i) - (setq candidates (cons (match-string 0 entry) candidates)) + (setq candidates (cons (match-string-no-properties 0 entry) candidates)) (setq i (match-end 0))) (cons (skkdic-get-kana-compact-codes kana) candidates))) diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el index e34b01c3064..257f8854c38 100644 --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el @@ -143,20 +143,43 @@ longer than KEYSEQ. See the documentation of `nested-alist-p' for more detail." (or (nested-alist-p alist) (error "Invalid argument %s" alist)) - (let ((islist (listp keyseq)) - (len (or len (length keyseq))) - (i 0) - key-elt slot) - (while (< i len) - (if (null (nested-alist-p alist)) - (error "Keyseq %s is too long for this nested alist" keyseq)) - (setq key-elt (if islist (nth i keyseq) (aref keyseq i))) - (setq slot (assoc key-elt (cdr alist))) - (unless slot - (setq slot (cons key-elt (list t))) - (setcdr alist (cons slot (cdr alist)))) - (setq alist (cdr slot)) - (setq i (1+ i))) + (let ((len (or len (length keyseq))) + (i 0)) + (cond + ((stringp keyseq) ; We can use `assq' for characters. + (while (< i len) + (if (null (nested-alist-p alist)) + (error "Keyseq %s is too long for this nested alist" keyseq)) + (let* ((key-elt (aref keyseq i)) + (slot (assq key-elt (cdr alist)))) + (unless slot + (setq slot (list key-elt t)) + (push slot (cdr alist))) + (setq alist (cdr slot))) + (setq i (1+ i)))) + ((arrayp keyseq) + (while (< i len) + (if (null (nested-alist-p alist)) + (error "Keyseq %s is too long for this nested alist" keyseq)) + (let* ((key-elt (aref keyseq i)) + (slot (assoc key-elt (cdr alist)))) + (unless slot + (setq slot (list key-elt t)) + (push slot (cdr alist))) + (setq alist (cdr slot))) + (setq i (1+ i)))) + ((listp keyseq) + (while (< i len) + (if (null (nested-alist-p alist)) + (error "Keyseq %s is too long for this nested alist" keyseq)) + (let* ((key-elt (pop keyseq)) + (slot (assoc key-elt (cdr alist)))) + (unless slot + (setq slot (list key-elt t)) + (push slot (cdr alist))) + (setq alist (cdr slot))) + (setq i (1+ i)))) + (t (signal 'wrong-type-argument (list keyseq)))) (setcar alist entry) (if branches (setcdr (last alist) branches)))) @@ -179,15 +202,23 @@ Optional 5th argument NIL-FOR-TOO-LONG non-nil means return nil (setq len (length keyseq))) (let ((i (or start 0))) (if (catch 'lookup-nested-alist-tag - (if (listp keyseq) - (while (< i len) - (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist)))) - (setq i (1+ i)) - (throw 'lookup-nested-alist-tag t)))) - (while (< i len) - (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) - (setq i (1+ i)) - (throw 'lookup-nested-alist-tag t)))) + (cond ((stringp keyseq) ; We can use `assq' for characters. + (while (< i len) + (if (setq alist (cdr (assq (aref keyseq i) (cdr alist)))) + (setq i (1+ i)) + (throw 'lookup-nested-alist-tag t)))) + ((arrayp keyseq) + (while (< i len) + (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) + (setq i (1+ i)) + (throw 'lookup-nested-alist-tag t)))) + ((listp keyseq) + (setq keyseq (nthcdr i keyseq)) + (while (< i len) + (if (setq alist (cdr (assoc (pop keyseq) (cdr alist)))) + (setq i (1+ i)) + (throw 'lookup-nested-alist-tag t)))) + (t (signal 'wrong-type-argument (list keyseq))))) ;; KEYSEQ is too long. (if nil-for-too-long nil i) alist))) -- 2.39.2