From 07b47905d3b38ac77398213cdb76b2dca2217db7 Mon Sep 17 00:00:00 2001
From: Michael Albinus <michael.albinus@gmx.de>
Date: Sun, 24 Aug 2014 17:40:07 +0200
Subject: [PATCH] Add string collation.

* configure.ac: Check also for the uselocale function.

* src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp): New DEFUNs.

* src/sysdep.c (str_collate): New function.  (Bug#18051)
---
 ChangeLog     |  4 +++
 configure.ac  |  2 +-
 src/ChangeLog |  6 ++++
 src/fns.c     | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/sysdep.c  | 74 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 168 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 34bfef5e4f8..bfa8d4fee51 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2014-08-24  Michael Albinus  <michael.albinus@gmx.de>
+
+	* configure.ac: Check also for the uselocale function.  (Bug#18051)
+
 2014-08-23  Karol Ostrovsky  <karol.ostrovsky@gmail.com>  (tiny change)
 
 	* configure.ac: Accept "*-mingw*", not just "*-mingw32", as
diff --git a/configure.ac b/configure.ac
index 1f8870e9808..30435d67508 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3553,7 +3553,7 @@ LIBS="$LIB_PTHREAD $LIB_MATH $LIBS"
 AC_CHECK_FUNCS(accept4 fchdir gethostname \
 getrusage get_current_dir_name \
 lrand48 random rint \
-select getpagesize setlocale \
+select getpagesize setlocale uselocale \
 getrlimit setrlimit shutdown getaddrinfo \
 pthread_sigmask strsignal setitimer \
 sendto recvfrom getsockname getpeername getifaddrs freeifaddrs \
diff --git a/src/ChangeLog b/src/ChangeLog
index 90c66eb4705..72d7d405f7a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+2014-08-24  Michael Albinus  <michael.albinus@gmx.de>
+
+	* fns.c (Fstring_collate_lessp, Fstring_collate_equalp): New DEFUNs.
+
+	* sysdep.c (str_collate): New function.  (Bug#18051)
+
 2014-08-23  Karol Ostrovsky  <karol.ostrovsky@gmail.com>  (tiny change)
 
 	* Makefile.in (emacs$(EXEEXT)): Retry deletion of bootstrap-emacs
diff --git a/src/fns.c b/src/fns.c
index 33c02598359..fbcec4e659e 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -40,7 +40,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include "xterm.h"
 #endif
 
-Lisp_Object Qstring_lessp;
+Lisp_Object Qstring_lessp, Qstring_collate_lessp, Qstring_collate_equalp;
 static Lisp_Object Qprovide, Qrequire;
 static Lisp_Object Qyes_or_no_p_history;
 Lisp_Object Qcursor_in_echo_area;
@@ -343,6 +343,84 @@ Symbols are also allowed; their print names are used instead.  */)
     }
   return i1 < SCHARS (s2) ? Qt : Qnil;
 }
+
+#ifdef __STDC_ISO_10646__
+/* Defined in sysdep.c.  */
+extern ptrdiff_t str_collate (Lisp_Object, Lisp_Object);
+#endif /* __STDC_ISO_10646__ */
+
+DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 2, 0,
+       doc: /* Return t if first arg string is less than second in collation order.
+
+Case is significant.  Symbols are also allowed; their print names are
+used instead.
+
+This function obeys the conventions for collation order in your
+locale settings.  For example, punctuation and whitespace characters
+are considered less significant for sorting.
+
+\(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp)
+  => \("11" "1 1" "1.1" "12" "1 2" "1.2")
+
+If your system does not support a locale environment, this function
+behaves like `string-lessp'.
+
+If the environment variable \"LC_COLLATE\" is set in `process-environment',
+it overrides the setting of your current locale.  */)
+  (Lisp_Object s1, Lisp_Object s2)
+{
+#ifdef __STDC_ISO_10646__
+  /* Check parameters.  */
+  if (SYMBOLP (s1))
+    s1 = SYMBOL_NAME (s1);
+  if (SYMBOLP (s2))
+    s2 = SYMBOL_NAME (s2);
+  CHECK_STRING (s1);
+  CHECK_STRING (s2);
+
+  return (str_collate (s1, s2) < 0) ? Qt : Qnil;
+
+#else
+  return Fstring_lessp (s1, s2);
+#endif /* __STDC_ISO_10646__ */
+}
+
+DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0,
+       doc: /* Return t if two strings have identical contents.
+
+Case is significant.  Symbols are also allowed; their print names are
+used instead.
+
+This function obeys the conventions for collation order in your locale
+settings.  For example, characters with different coding points but
+the same meaning are considered as equal, like different grave accent
+unicode characters.
+
+\(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF))
+  => t
+
+If your system does not support a locale environment, this function
+behaves like `string-equal'.
+
+If the environment variable \"LC_COLLATE\" is set in `process-environment',
+it overrides the setting of your current locale.  */)
+  (Lisp_Object s1, Lisp_Object s2)
+{
+#ifdef __STDC_ISO_10646__
+  /* Check parameters.  */
+  if (SYMBOLP (s1))
+    s1 = SYMBOL_NAME (s1);
+  if (SYMBOLP (s2))
+    s2 = SYMBOL_NAME (s2);
+  CHECK_STRING (s1);
+  CHECK_STRING (s2);
+
+  return (str_collate (s1, s2) == 0) ? Qt : Qnil;
+
+#else
+  return Fstring_equal (s1, s2);
+#endif /* __STDC_ISO_10646__ */
+}
 
 static Lisp_Object concat (ptrdiff_t nargs, Lisp_Object *args,
 			   enum Lisp_Type target_type, bool last_special);
@@ -4919,6 +4997,8 @@ syms_of_fns (void)
   defsubr (&Sdefine_hash_table_test);
 
   DEFSYM (Qstring_lessp, "string-lessp");
+  DEFSYM (Qstring_collate_lessp, "string-collate-lessp");
+  DEFSYM (Qstring_collate_equalp, "string-collate-equalp");
   DEFSYM (Qprovide, "provide");
   DEFSYM (Qrequire, "require");
   DEFSYM (Qyes_or_no_p_history, "yes-or-no-p-history");
@@ -4972,6 +5052,8 @@ this variable.  */);
   defsubr (&Sstring_equal);
   defsubr (&Scompare_strings);
   defsubr (&Sstring_lessp);
+  defsubr (&Sstring_collate_lessp);
+  defsubr (&Sstring_collate_equalp);
   defsubr (&Sappend);
   defsubr (&Sconcat);
   defsubr (&Svconcat);
diff --git a/src/sysdep.c b/src/sysdep.c
index d5cfd5b88cf..619361472e4 100644
--- a/src/sysdep.c
+++ b/src/sysdep.c
@@ -3513,3 +3513,77 @@ system_process_attributes (Lisp_Object pid)
 }
 
 #endif	/* !defined (WINDOWSNT) */
+
+/* Wide character string collation.  */
+
+#ifdef __STDC_ISO_10646__
+#include <wchar.h>
+
+#if defined (HAVE_USELOCALE) || defined (HAVE_SETLOCALE)
+#include <locale.h>
+#endif /* HAVE_USELOCALE || HAVE_SETLOCALE */
+
+ptrdiff_t
+str_collate (Lisp_Object s1, Lisp_Object s2)
+{
+  register ptrdiff_t res, len, i, i_byte;
+  wchar_t *p1, *p2;
+  Lisp_Object lc_collate;
+#ifdef HAVE_USELOCALE
+  locale_t loc = (locale_t) 0, oldloc = (locale_t) 0;
+#elif defined (HAVE_SETLOCALE)
+  char *oldloc = NULL;
+#endif /* HAVE_USELOCALE */
+
+  USE_SAFE_ALLOCA;
+
+  /* Convert byte stream to code points.  */
+  len = SCHARS (s1); i = i_byte = 0;
+  p1 = (wchar_t *) SAFE_ALLOCA ((len+1) * (sizeof *p1));
+  while (i < len)
+    FETCH_STRING_CHAR_ADVANCE (*(p1+i-1), s1, i, i_byte);
+  *(p1+len) = 0;
+
+  len = SCHARS (s2); i = i_byte = 0;
+  p2 = (wchar_t *) SAFE_ALLOCA ((len+1) * (sizeof *p2));
+  while (i < len)
+    FETCH_STRING_CHAR_ADVANCE (*(p2+i-1), s2, i, i_byte);
+  *(p2+len) = 0;
+
+#if defined (HAVE_USELOCALE) || defined (HAVE_SETLOCALE)
+  /* Create a new locale object, and set it.  */
+  lc_collate =
+    Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment);
+
+#ifdef HAVE_USELOCALE
+  if (STRINGP (lc_collate)
+      && (loc = newlocale (LC_COLLATE_MASK, SSDATA (lc_collate), (locale_t) 0)))
+    oldloc = uselocale (loc);
+#elif defined (HAVE_SETLOCALE)
+  if (STRINGP (lc_collate))
+    {
+      oldloc = xstrdup (setlocale (LC_COLLATE, NULL));
+      setlocale (LC_COLLATE, SSDATA (lc_collate));
+    }
+#endif /* HAVE_USELOCALE */
+#endif /* HAVE_USELOCALE || HAVE_SETLOCALE */
+
+  res = wcscoll (p1, p2);
+
+#ifdef HAVE_USELOCALE
+  /* Free the locale object, and reset.  */
+  if (loc)
+    freelocale (loc);
+  if (oldloc)
+    uselocale (oldloc);
+#elif defined (HAVE_SETLOCALE)
+  /* Restore the original locale. */
+  if (oldloc)
+    setlocale (LC_COLLATE, oldloc);
+#endif /* HAVE_USELOCALE */
+
+  /* Return result.  */
+  SAFE_FREE ();
+  return res;
+}
+#endif /* __STDC_ISO_10646__ */
-- 
2.39.5