]> git.eshelyaron.com Git - emacs.git/commitdiff
Improve scoring algorithm for flex-style completions
authorJoão Távora <joaotavora@gmail.com>
Sun, 27 Oct 2019 00:33:54 +0000 (01:33 +0100)
committerJoão Távora <joaotavora@gmail.com>
Sun, 27 Oct 2019 00:46:56 +0000 (01:46 +0100)
The previous algorithm had two problems: it considered non-matches in
the beginning and end of the string as matching "holes" and failed to
penalize larger holes, making flex-score-match-tightness only
effective in some corner cases.

The new formula, which is described in code and in pseudo-code in the
comments, fixes these problems.

As a result, by default, C-h f flex now correctly bubbles up
"company-search-flex-regexp" to the top, in front of "file-exists-p".
With a flex-score-match-tightness smaller than 1.0, the situation is
reversed.

* lisp/minibuffer.el (flex-score-match-tightness): Adjust default
value.  Improve docstring example.
(completion-pcm--hilit-commonality): Improve example.  Remove unused
variable.  Improve algorithm.

lisp/minibuffer.el

index 542e672400ac52454a76d9d7226f2adf7c2630c9..c92a91e76ced7ce14ddcd3e9d82f7fad86c5d7b4 100644 (file)
@@ -3060,16 +3060,18 @@ PATTERN is as returned by `completion-pcm--string->pattern'."
            (when (string-match-p regex c) (push c poss)))
          (nreverse poss))))))
 
-(defvar flex-score-match-tightness 100
+(defvar flex-score-match-tightness 3
   "Controls how the `flex' completion style scores its matches.
 
-Value is a positive number.  Values smaller than one make the
-scoring formula value matches scattered along the string, while
-values greater than one make the formula value tighter matches.
-I.e \"foo\" matches both strings \"barbazfoo\" and \"fabrobazo\",
-which are of equal length, but only a value greater than one will
-score the former (which has one \"hole\") higher than the
-latter (which has two).")
+Value is a positive number.  A number smaller than 1 makes the
+scoring formula reward matches scattered along the string, while
+a number greater than one make the formula reward matches that
+are clumped together.  I.e \"foo\" matches both strings
+\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but
+only a value greater than one will score the former (which has
+one large \"hole\" and a clumped-together \"oo\" match) higher
+than the latter (which has two \"holes\" and three
+one-letter-long matches).")
 
 (defun completion-pcm--hilit-commonality (pattern completions)
   (when completions
@@ -3086,27 +3088,39 @@ latter (which has two).")
                 (end (pop md))
                 (len (length str))
                 ;; To understand how this works, consider these bad
-                ;; ascii(tm) diagrams showing how the pattern \"foo\"
-                ;; flex-matches \"fabrobazo" and
-                ;; \"barfoobaz\":
+                ;; ascii(tm) diagrams showing how the pattern "foo"
+                ;; flex-matches "fabrobazo", "fbarbazoo" and
+                ;; "barfoobaz":
 
                 ;;      f abr o baz o
                 ;;      + --- + --- +
 
+                ;;      f barbaz oo
+                ;;      + ------ ++
+
                 ;;      bar foo baz
-                ;;      --- +++ ---
+                ;;          +++
 
-                ;; Where + indicates parts where the pattern matched,
-                ;; - where it didn't match.  The score is a number
+                ;; "+" indicates parts where the pattern matched.  A
+                ;; "hole" in the middle of the string is indicated by
+                ;; "-".  Note that there are no "holes" near the edges
+                ;; of the string.  The completion score is a number
                 ;; bound by ]0..1]: the higher the better and only a
                 ;; perfect match (pattern equals string) will have
                 ;; score 1.  The formula takes the form of a quotient.
                 ;; For the numerator, we use the number of +, i.e. the
                 ;; length of the pattern.  For the denominator, it
-                ;; sums (1+ (/ (grouplen - 1)
-                ;; flex-score-match-tightness)) across all groups of
-                ;; -, sums one to that total, and then multiples by
-                ;; the length of the string.
+                ;; first computes
+                ;;
+                ;;     hole_i_contrib = 1 + (Li-1)^(1/tightness)
+                ;;
+                ;; , for each hole "i" of length "Li", where tightness
+                ;; is given by `flex-score-match-tightness'.  The
+                ;; final value for the denominator is then given by:
+                ;;
+                ;;    (SUM_across_i(hole_i_contrib) + 1) * len
+                ;;
+                ;; , where "len" is the string's length.
                 (score-numerator 0)
                 (score-denominator 0)
                 (last-b 0)
@@ -3115,13 +3129,15 @@ latter (which has two).")
                    "Update score variables given match range (A B)."
                    (setq
                     score-numerator   (+ score-numerator (- b a)))
-                   (unless (= a last-b)
+                   (unless (or (= a last-b)
+                               (zerop last-b)
+                               (= a (length str)))
                      (setq
                       score-denominator (+ score-denominator
                                            1
-                                           (/ (- a last-b 1)
-                                              flex-score-match-tightness
-                                              1.0))))
+                                           (expt (- a last-b 1)
+                                                 (/ 1.0
+                                                    flex-score-match-tightness)))))
                    (setq
                     last-b              b))))
            (funcall update-score start start)