Improve scoring algorithm for flex-style completions

author João Távora <joaotavora@gmail.com>

Sun, 27 Oct 2019 00:33:54 +0000 (01:33 +0100)

committer João Távora <joaotavora@gmail.com>

Sun, 27 Oct 2019 00:46:56 +0000 (01:46 +0100)
author João Távora <joaotavora@gmail.com>
Sun, 27 Oct 2019 00:33:54 +0000 (01:33 +0100)
committer João Távora <joaotavora@gmail.com>
Sun, 27 Oct 2019 00:46:56 +0000 (01:46 +0100)
diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el

index 542e672400ac52454a76d9d7226f2adf7c2630c9..c92a91e76ced7ce14ddcd3e9d82f7fad86c5d7b4 100644 (file)
--- a/lisp/minibuffer.el
+++ b/lisp/minibuffer.el
@@ -3060,16 +3060,18 @@ PATTERN is as returned by `completion-pcm--string->pattern'."
             (when (string-match-p regex c) (push c poss)))
           (nreverse poss))))))
  
-(defvar flex-score-match-tightness 100
+(defvar flex-score-match-tightness 3
    "Controls how the `flex' completion style scores its matches.
  
-Value is a positive number.  Values smaller than one make the
-scoring formula value matches scattered along the string, while
-values greater than one make the formula value tighter matches.
-I.e \"foo\" matches both strings \"barbazfoo\" and \"fabrobazo\",
-which are of equal length, but only a value greater than one will
-score the former (which has one \"hole\") higher than the
-latter (which has two).")
+Value is a positive number.  A number smaller than 1 makes the
+scoring formula reward matches scattered along the string, while
+a number greater than one make the formula reward matches that
+are clumped together.  I.e \"foo\" matches both strings
+\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but
+only a value greater than one will score the former (which has
+one large \"hole\" and a clumped-together \"oo\" match) higher
+than the latter (which has two \"holes\" and three
+one-letter-long matches).")
  
  (defun completion-pcm--hilit-commonality (pattern completions)
    (when completions
@@ -3086,27 +3088,39 @@ latter (which has two).")
                  (end (pop md))
                  (len (length str))
                  ;; To understand how this works, consider these bad
-                ;; ascii(tm) diagrams showing how the pattern \"foo\"
-                ;; flex-matches \"fabrobazo" and
-                ;; \"barfoobaz\":
+                ;; ascii(tm) diagrams showing how the pattern "foo"
+                ;; flex-matches "fabrobazo", "fbarbazoo" and
+                ;; "barfoobaz":
  
                  ;;      f abr o baz o
                  ;;      + --- + --- +
  
+                ;;      f barbaz oo
+                ;;      + ------ ++
+
                  ;;      bar foo baz
-                ;;      --- +++ ---
+                ;;          +++
  
-                ;; Where + indicates parts where the pattern matched,
-                ;; - where it didn't match.  The score is a number
+                ;; "+" indicates parts where the pattern matched.  A
+                ;; "hole" in the middle of the string is indicated by
+                ;; "-".  Note that there are no "holes" near the edges
+                ;; of the string.  The completion score is a number
                  ;; bound by ]0..1]: the higher the better and only a
                  ;; perfect match (pattern equals string) will have
                  ;; score 1.  The formula takes the form of a quotient.
                  ;; For the numerator, we use the number of +, i.e. the
                  ;; length of the pattern.  For the denominator, it
-                ;; sums (1+ (/ (grouplen - 1)
-                ;; flex-score-match-tightness)) across all groups of
-                ;; -, sums one to that total, and then multiples by
-                ;; the length of the string.
+                ;; first computes
+                ;;
+                ;;     hole_i_contrib = 1 + (Li-1)^(1/tightness)
+                ;;
+                ;; , for each hole "i" of length "Li", where tightness
+                ;; is given by `flex-score-match-tightness'.  The
+                ;; final value for the denominator is then given by:
+                ;;
+                ;;    (SUM_across_i(hole_i_contrib) + 1) * len
+                ;;
+                ;; , where "len" is the string's length.
                  (score-numerator 0)
                  (score-denominator 0)
                  (last-b 0)
@@ -3115,13 +3129,15 @@ latter (which has two).")
                     "Update score variables given match range (A B)."
                     (setq
                      score-numerator   (+ score-numerator (- b a)))
-                   (unless (= a last-b)
+                   (unless (or (= a last-b)
+                               (zerop last-b)
+                               (= a (length str)))
                       (setq
                        score-denominator (+ score-denominator
                                             1
-                                           (/ (- a last-b 1)
-                                              flex-score-match-tightness
-                                              1.0))))
+                                           (expt (- a last-b 1)
+                                                 (/ 1.0
+                                                    flex-score-match-tightness)))))
                     (setq
                      last-b              b))))
             (funcall update-score start start)
author	João Távora <joaotavora@gmail.com>
	Sun, 27 Oct 2019 00:33:54 +0000 (01:33 +0100)
committer	João Távora <joaotavora@gmail.com>
	Sun, 27 Oct 2019 00:46:56 +0000 (01:46 +0100)