From: Eli Zaretskii Date: Sat, 28 Apr 2018 07:27:53 +0000 (+0300) Subject: Fix documentation and tests for 'string-distance' X-Git-Tag: emacs-27.0.90~5082 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=a7a3918a16c85f89d797d48b09e751ab30e0f032;p=emacs.git Fix documentation and tests for 'string-distance' * src/fns.c (Fstring_distance): Doc fix. * doc/lispref/strings.texi (Text Comparison): Document 'string-distance'. * etc/NEWS: Fix wording and mark as documented in the manuals. * test/src/fns-tests.el (test-string-distance): Move from subr-tests.el and rename. --- diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 8a9e27d00ec..70ba1aa613e 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -673,6 +673,28 @@ of the two strings. The sign is negative if @var{string1} (or its specified portion) is less. @end defun +@cindex Levenshtein distance +@cindex distance between strings +@cindex edit distance between strings +@defun string-distance string1 string2 &optional bytecompare +This function returns the @dfn{Levenshtein distance} between the +source string @var{string1} and the target string @var{string2}. The +Levenshtein distance is the number of single-character +changes---deletions, insertions, or replacements---required to +transform the source string into the target string; it is one possible +definition of the @dfn{edit distance} between strings. + +Letter-case of the strings is significant for the computed distance, +but their text properties are ignored. If the optional argument +@var{bytecompare} is non-@code{nil}, the function calculates the +distance in terms of bytes instead of characters. The byte-wise +comparison uses the internal Emacs representation of characters, so it +will produce inaccurate results for multibyte strings that include raw +bytes (@pxref{Text Representations}); make the strings unibyte by +encoding them (@pxref{Explicit Encoding}) if you need accurate results +with raw bytes. +@end defun + @defun assoc-string key alist &optional case-fold This function works like @code{assoc}, except that @var{key} must be a string or symbol, and comparison is done using @code{compare-strings}. diff --git a/etc/NEWS b/etc/NEWS index d40f7816b86..32fcdeff918 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -534,7 +534,8 @@ manual for more details. +++ ** New function assoc-delete-all. -** New function string-distance to calculate Levenshtein distance ++++ +** New function 'string-distance' to calculate the Levenshtein distance between two strings. ** 'print-quoted' now defaults to t, so if you want to see diff --git a/src/fns.c b/src/fns.c index 6e851c8555a..1d69f15b0ed 100644 --- a/src/fns.c +++ b/src/fns.c @@ -155,9 +155,11 @@ If STRING is multibyte, this may be greater than the length of STRING. */) DEFUN ("string-distance", Fstring_distance, Sstring_distance, 2, 3, 0, doc: /* Return Levenshtein distance between STRING1 and STRING2. -If BYTECOMPARE is nil, compare character of strings. -If BYTECOMPARE is t, compare byte of strings. -Case is significant, but text properties are ignored. */) +The distance is the number of deletions, insertions, and substitutions +required to transform STRING1 into STRING2. +If BYTECOMPARE is nil or omitted, compute distance in terms of characters. +If BYTECOMPARE is non-nil, compute distance in terms of bytes. +Letter-case is significant, but text properties are ignored. */) (Lisp_Object string1, Lisp_Object string2, Lisp_Object bytecompare) { diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el index 6b80c743a05..52b61d9fb97 100644 --- a/test/lisp/subr-tests.el +++ b/test/lisp/subr-tests.el @@ -281,24 +281,6 @@ indirectly `mapbacktrace'." (should (equal (string-match-p "\\`[[:blank:]]\\'" "\u3000") 0)) (should-not (string-match-p "\\`[[:blank:]]\\'" "\N{LINE SEPARATOR}"))) -(ert-deftest subr-tests--string-distance () - "Test `string-distance' behavior." - ;; ASCII characters are always fine - (should (equal 1 (string-distance "heelo" "hello"))) - (should (equal 2 (string-distance "aeelo" "hello"))) - (should (equal 0 (string-distance "ab" "ab" t))) - (should (equal 1 (string-distance "ab" "abc" t))) - - ;; string containing hanzi character, compare by byte - (should (equal 6 (string-distance "ab" "ab我她" t))) - (should (equal 3 (string-distance "ab" "a我b" t))) - (should (equal 3 (string-distance "我" "她" t))) - - ;; string containing hanzi character, compare by character - (should (equal 2 (string-distance "ab" "ab我她"))) - (should (equal 1 (string-distance "ab" "a我b"))) - (should (equal 1 (string-distance "我" "她")))) - (ert-deftest subr-tests--dolist--wrong-number-of-args () "Test that `dolist' doesn't accept wrong types or length of SPEC, cf. Bug#25477." diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index f8554636bac..0301ceaad52 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -575,4 +575,22 @@ :type 'wrong-type-argument) '(wrong-type-argument plistp (:foo 1 . :bar))))) +(ert-deftest test-string-distance () + "Test `string-distance' behavior." + ;; ASCII characters are always fine + (should (equal 1 (string-distance "heelo" "hello"))) + (should (equal 2 (string-distance "aeelo" "hello"))) + (should (equal 0 (string-distance "ab" "ab" t))) + (should (equal 1 (string-distance "ab" "abc" t))) + + ;; string containing hanzi character, compare by byte + (should (equal 6 (string-distance "ab" "ab我她" t))) + (should (equal 3 (string-distance "ab" "a我b" t))) + (should (equal 3 (string-distance "我" "她" t))) + + ;; string containing hanzi character, compare by character + (should (equal 2 (string-distance "ab" "ab我她"))) + (should (equal 1 (string-distance "ab" "a我b"))) + (should (equal 1 (string-distance "我" "她")))) + (provide 'fns-tests)