From: Eli Zaretskii <eliz@gnu.org>
Date: Sun, 3 Nov 2019 16:08:45 +0000 (+0200)
Subject: Fix case-insensitive completion of non-ASCII file names
X-Git-Tag: emacs-27.0.90~753
X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=0b21ecdb5ea9db8cf7a4a4ee59b29bf6273f2915;p=emacs.git

Fix case-insensitive completion of non-ASCII file names

* src/dired.c (scmp): Comment on (non)applicability to
comparisons of  non-ASCII strings case-insensitively.
(file_name_completion): Decode early the file names read from
the directory, and use the decoded names for comparison when
letter-case should be ignored.  (Bug#11339)
---

diff --git a/src/dired.c b/src/dired.c
index 3768b6dbb7c..0e1c88f03ce 100644
--- a/src/dired.c
+++ b/src/dired.c
@@ -481,18 +481,32 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
   record_unwind_protect_ptr (directory_files_internal_unwind, d);
 
   /* Loop reading directory entries.  */
+  Lisp_Object zero = make_fixnum (0);
+  ptrdiff_t enc_file_len = SCHARS (encoded_file);
+  Lisp_Object file_len = make_fixnum (SCHARS (file));
   for (struct dirent *dp; (dp = read_dirent (d, dirname)); )
     {
       ptrdiff_t len = dirent_namelen (dp);
       bool canexclude = 0;
 
       maybe_quit ();
-      if (len < SCHARS (encoded_file)
-	  || (scmp (dp->d_name, SSDATA (encoded_file),
-		    SCHARS (encoded_file))
-	      >= 0))
+
+      if (len < enc_file_len
+	  /* scmp cannot reliably compare non-ASCII strings while
+	     ignoring letter-case.  */
+	  || (!completion_ignore_case
+	      && scmp (dp->d_name, SSDATA (encoded_file), enc_file_len) >= 0))
 	continue;
 
+      name = make_unibyte_string (dp->d_name, len);
+      name = DECODE_FILE (name);
+      ptrdiff_t name_blen = SBYTES (name), name_len = SCHARS (name);
+      if (completion_ignore_case
+	  && !EQ (Fcompare_strings (name, zero, file_len, file, zero, file_len,
+				    Qt),
+		  Qt))
+	    continue;
+
       switch (dirent_type (dp))
 	{
 	case DT_DIR:
@@ -515,6 +529,7 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
       if (!all_flag)
 	{
 	  ptrdiff_t skip;
+	  Lisp_Object cmp_len = make_fixnum (name_len);
 
 #if 0 /* FIXME: The `scmp' call compares an encoded and a decoded string. */
 	  /* If this entry matches the current bestmatch, the only
@@ -538,7 +553,7 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
 		 actually in the way in a directory with only one file.  */
 	      if (TRIVIAL_DIRECTORY_ENTRY (dp->d_name))
 		canexclude = 1;
-	      else if (len > SCHARS (encoded_file))
+	      else if (len > enc_file_len)
 		/* Ignore directories if they match an element of
 		   completion-ignored-extensions which ends in a slash.  */
 		for (tem = Vcompletion_ignored_extensions;
@@ -550,21 +565,31 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
 		    elt = XCAR (tem);
 		    if (!STRINGP (elt))
 		      continue;
-		    /* Need to encode ELT, since scmp compares unibyte
-		       strings only.  */
-		    elt = ENCODE_FILE (elt);
-		    elt_len = SCHARS (elt) - 1; /* -1 for trailing / */
+		    elt_len = SBYTES (elt) - 1; /* -1 for trailing / */
 		    if (elt_len <= 0)
 		      continue;
 		    p1 = SSDATA (elt);
 		    if (p1[elt_len] != '/')
 		      continue;
-		    skip = len - elt_len;
+		    skip = name_blen - elt_len;
 		    if (skip < 0)
 		      continue;
 
-		    if (scmp (dp->d_name + skip, p1, elt_len) >= 0)
+		    if (!completion_ignore_case
+			&& scmp (SSDATA (name) + skip, p1, elt_len) >= 0)
 		      continue;
+		    if (completion_ignore_case)
+		      {
+			elt_len = SCHARS (elt) - 1;
+			skip = name_len - elt_len;
+			cmp_len = make_fixnum (elt_len);
+			if (skip < 0
+			    || !EQ (Fcompare_strings (name, make_fixnum (skip),
+						      cmp_len,
+						      elt, zero, cmp_len, Qt),
+				    Qt))
+			  continue;
+		      }
 		    break;
 		  }
 	    }
@@ -572,22 +597,33 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
 	    {
 	      /* Compare extensions-to-be-ignored against end of this file name */
 	      /* if name is not an exact match against specified string */
-	      if (len > SCHARS (encoded_file))
+	      if (len > enc_file_len)
 		/* and exit this for loop if a match is found */
 		for (tem = Vcompletion_ignored_extensions;
 		     CONSP (tem); tem = XCDR (tem))
 		  {
 		    elt = XCAR (tem);
 		    if (!STRINGP (elt)) continue;
-		    /* Need to encode ELT, since scmp compares unibyte
-		       strings only.  */
-		    elt = ENCODE_FILE (elt);
-		    skip = len - SCHARS (elt);
+		    ptrdiff_t elt_len = SBYTES (elt);
+		    skip = len - elt_len;
 		    if (skip < 0) continue;
 
-		    if (scmp (dp->d_name + skip, SSDATA (elt), SCHARS (elt))
-			>= 0)
+		    if (!completion_ignore_case
+			&& (scmp (SSDATA (name) + skip, SSDATA (elt), elt_len)
+			    >= 0))
 		      continue;
+		    if (completion_ignore_case)
+		      {
+			elt_len = SCHARS (elt);
+			skip = name_len - elt_len;
+			cmp_len = make_fixnum (elt_len);
+			if (skip < 0
+			    || !EQ (Fcompare_strings (name, make_fixnum (skip),
+						      cmp_len,
+						      elt, zero, cmp_len, Qt),
+				    Qt))
+			  continue;
+		      }
 		    break;
 		  }
 	    }
@@ -611,24 +647,18 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
 	      matchcount = 0;
 	    }
 	}
-      /* FIXME: If we move this `decode' earlier we can eliminate
-	 the repeated ENCODE_FILE on Vcompletion_ignored_extensions.  */
-      name = make_unibyte_string (dp->d_name, len);
-      name = DECODE_FILE (name);
 
-      {
-	Lisp_Object regexps, table = (completion_ignore_case
-				      ? Vascii_canon_table : Qnil);
+      Lisp_Object regexps, table = (completion_ignore_case
+				    ? Vascii_canon_table : Qnil);
 
-	/* Ignore this element if it fails to match all the regexps.  */
-	for (regexps = Vcompletion_regexp_list; CONSP (regexps);
-	     regexps = XCDR (regexps))
-	  if (fast_string_match_internal (XCAR (regexps), name, table) < 0)
-	    break;
+      /* Ignore this element if it fails to match all the regexps.  */
+      for (regexps = Vcompletion_regexp_list; CONSP (regexps);
+	   regexps = XCDR (regexps))
+	if (fast_string_match_internal (XCAR (regexps), name, table) < 0)
+	  break;
 
-	if (CONSP (regexps))
-	  continue;
-      }
+      if (CONSP (regexps))
+	continue;
 
       /* This is a possible completion */
       if (directoryp)
@@ -642,8 +672,6 @@ file_name_completion (Lisp_Object file, Lisp_Object dirname, bool all_flag,
       /* Reject entries where the encoded strings match, but the
          decoded don't.  For example, "a" should not match "a-ring" on
          file systems that store decomposed characters. */
-      Lisp_Object zero = make_fixnum (0);
-
       if (check_decoded && SCHARS (file) <= SCHARS (name))
 	{
 	  /* FIXME: This is a copy of the code below.  */
@@ -757,6 +785,9 @@ scmp (const char *s1, const char *s2, ptrdiff_t len)
 
   if (completion_ignore_case)
     {
+      /* WARNING: This only works for pure ASCII strings, as we
+	 compare bytes, not characters!  Use Fcompare_strings for
+	 comparing non-ASCII strings case-insensitively.  */
       while (l
 	     && (downcase ((unsigned char) *s1++)
 		 == downcase ((unsigned char) *s2++)))