From 63fd71cd092de8daded15e32c268215b62c488b9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jo=C3=A3o=20T=C3=A1vora?= Date: Sun, 27 Oct 2019 01:33:54 +0100 Subject: [PATCH] Improve scoring algorithm for flex-style completions The previous algorithm had two problems: it considered non-matches in the beginning and end of the string as matching "holes" and failed to penalize larger holes, making flex-score-match-tightness only effective in some corner cases. The new formula, which is described in code and in pseudo-code in the comments, fixes these problems. As a result, by default, C-h f flex now correctly bubbles up "company-search-flex-regexp" to the top, in front of "file-exists-p". With a flex-score-match-tightness smaller than 1.0, the situation is reversed. * lisp/minibuffer.el (flex-score-match-tightness): Adjust default value. Improve docstring example. (completion-pcm--hilit-commonality): Improve example. Remove unused variable. Improve algorithm. --- lisp/minibuffer.el | 60 +++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el index 542e672400a..c92a91e76ce 100644 --- a/lisp/minibuffer.el +++ b/lisp/minibuffer.el @@ -3060,16 +3060,18 @@ PATTERN is as returned by `completion-pcm--string->pattern'." (when (string-match-p regex c) (push c poss))) (nreverse poss)))))) -(defvar flex-score-match-tightness 100 +(defvar flex-score-match-tightness 3 "Controls how the `flex' completion style scores its matches. -Value is a positive number. Values smaller than one make the -scoring formula value matches scattered along the string, while -values greater than one make the formula value tighter matches. -I.e \"foo\" matches both strings \"barbazfoo\" and \"fabrobazo\", -which are of equal length, but only a value greater than one will -score the former (which has one \"hole\") higher than the -latter (which has two).") +Value is a positive number. A number smaller than 1 makes the +scoring formula reward matches scattered along the string, while +a number greater than one make the formula reward matches that +are clumped together. I.e \"foo\" matches both strings +\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but +only a value greater than one will score the former (which has +one large \"hole\" and a clumped-together \"oo\" match) higher +than the latter (which has two \"holes\" and three +one-letter-long matches).") (defun completion-pcm--hilit-commonality (pattern completions) (when completions @@ -3086,27 +3088,39 @@ latter (which has two).") (end (pop md)) (len (length str)) ;; To understand how this works, consider these bad - ;; ascii(tm) diagrams showing how the pattern \"foo\" - ;; flex-matches \"fabrobazo" and - ;; \"barfoobaz\": + ;; ascii(tm) diagrams showing how the pattern "foo" + ;; flex-matches "fabrobazo", "fbarbazoo" and + ;; "barfoobaz": ;; f abr o baz o ;; + --- + --- + + ;; f barbaz oo + ;; + ------ ++ + ;; bar foo baz - ;; --- +++ --- + ;; +++ - ;; Where + indicates parts where the pattern matched, - ;; - where it didn't match. The score is a number + ;; "+" indicates parts where the pattern matched. A + ;; "hole" in the middle of the string is indicated by + ;; "-". Note that there are no "holes" near the edges + ;; of the string. The completion score is a number ;; bound by ]0..1]: the higher the better and only a ;; perfect match (pattern equals string) will have ;; score 1. The formula takes the form of a quotient. ;; For the numerator, we use the number of +, i.e. the ;; length of the pattern. For the denominator, it - ;; sums (1+ (/ (grouplen - 1) - ;; flex-score-match-tightness)) across all groups of - ;; -, sums one to that total, and then multiples by - ;; the length of the string. + ;; first computes + ;; + ;; hole_i_contrib = 1 + (Li-1)^(1/tightness) + ;; + ;; , for each hole "i" of length "Li", where tightness + ;; is given by `flex-score-match-tightness'. The + ;; final value for the denominator is then given by: + ;; + ;; (SUM_across_i(hole_i_contrib) + 1) * len + ;; + ;; , where "len" is the string's length. (score-numerator 0) (score-denominator 0) (last-b 0) @@ -3115,13 +3129,15 @@ latter (which has two).") "Update score variables given match range (A B)." (setq score-numerator (+ score-numerator (- b a))) - (unless (= a last-b) + (unless (or (= a last-b) + (zerop last-b) + (= a (length str))) (setq score-denominator (+ score-denominator 1 - (/ (- a last-b 1) - flex-score-match-tightness - 1.0)))) + (expt (- a last-b 1) + (/ 1.0 + flex-score-match-tightness))))) (setq last-b b)))) (funcall update-score start start) -- 2.39.2