From: Michael Albinus Date: Fri, 29 Aug 2014 17:57:36 +0000 (+0200) Subject: Add optional arguments LOCALE and IGNORE-CASE to collation functions. X-Git-Tag: emacs-25.0.90~2635^2~679^2~400 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=b579ae53e46fa9bc9a242e4d5ce524097b3150ef;p=emacs.git Add optional arguments LOCALE and IGNORE-CASE to collation functions. * fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Add optional arguments LOCALE and IGNORE-CASE. * lisp.h (str_collate): Adapt argument list. * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l): Define substitutes for platforms that lack them. (str_collate): Add arguments locale and ignore_case. --- diff --git a/src/ChangeLog b/src/ChangeLog index b3c056edd07..66588bc3e67 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,14 @@ +2014-08-29 Michael Albinus + + * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l): + Define substitutes for platforms that lack them. + (str_collate): Add arguments locale and ignore_case. + + * fns.c (Fstring_collate_lessp, Fstring_collate_equalp): + Add optional arguments LOCALE and IGNORE-CASE. + + * lisp.h (str_collate): Adapt argument list. + 2014-08-29 Dmitry Antipov Add vectors support to Fsort. diff --git a/src/fns.c b/src/fns.c index 2b1fb86419d..3cca40df50f 100644 --- a/src/fns.c +++ b/src/fns.c @@ -344,25 +344,28 @@ Symbols are also allowed; their print names are used instead. */) return i1 < SCHARS (s2) ? Qt : Qnil; } -DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 2, 0, +DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, doc: /* Return t if first arg string is less than second in collation order. - -Case is significant. Symbols are also allowed; their print names are -used instead. +Symbols are also allowed; their print names are used instead. This function obeys the conventions for collation order in your locale settings. For example, punctuation and whitespace characters -are considered less significant for sorting. +are considered less significant for sorting: \(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp) => \("11" "1 1" "1.1" "12" "1 2" "1.2") -If your system does not support a locale environment, this function -behaves like `string-lessp'. +The optional argument LOCALE, a string, overrides the setting of your +current locale identifier for collation. The value is system +dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, +while it would be \"English_USA.1252\" on MS Windows systems. -If the environment variable \"LC_COLLATE\" is set in `process-environment', -it overrides the setting of your current locale. */) - (Lisp_Object s1, Lisp_Object s2) +If IGNORE-CASE is non-nil, characters are converted to lower-case +before comparing them. + +If your system does not support a locale environment, this function +behaves like `string-lessp'. */) + (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) { #if defined __STDC_ISO_10646__ || defined WINDOWSNT /* Check parameters. */ @@ -372,34 +375,39 @@ it overrides the setting of your current locale. */) s2 = SYMBOL_NAME (s2); CHECK_STRING (s1); CHECK_STRING (s2); + if (!NILP (locale)) + CHECK_STRING (locale); - return (str_collate (s1, s2) < 0) ? Qt : Qnil; + return (str_collate (s1, s2, locale, ignore_case) < 0) ? Qt : Qnil; #else /* !__STDC_ISO_10646__, !WINDOWSNT */ return Fstring_lessp (s1, s2); #endif /* !__STDC_ISO_10646__, !WINDOWSNT */ } -DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0, +DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 4, 0, doc: /* Return t if two strings have identical contents. - -Case is significant. Symbols are also allowed; their print names are -used instead. +Symbols are also allowed; their print names are used instead. This function obeys the conventions for collation order in your locale settings. For example, characters with different coding points but the same meaning are considered as equal, like different grave accent -unicode characters. +unicode characters: \(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF)) => t -If your system does not support a locale environment, this function -behaves like `string-equal'. +The optional argument LOCALE, a string, overrides the setting of your +current locale identifier for collation. The value is system +dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, +while it would be \"English_USA.1252\" on MS Windows systems. -If the environment variable \"LC_COLLATE\" is set in `process-environment', -it overrides the setting of your current locale. */) - (Lisp_Object s1, Lisp_Object s2) +If IGNORE-CASE is non-nil, characters are converted to lower-case +before comparing them. + +If your system does not support a locale environment, this function +behaves like `string-equal'. */) + (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) { #if defined __STDC_ISO_10646__ || defined WINDOWSNT /* Check parameters. */ @@ -409,8 +417,10 @@ it overrides the setting of your current locale. */) s2 = SYMBOL_NAME (s2); CHECK_STRING (s1); CHECK_STRING (s2); + if (!NILP (locale)) + CHECK_STRING (locale); - return (str_collate (s1, s2) == 0) ? Qt : Qnil; + return (str_collate (s1, s2, locale, ignore_case) == 0) ? Qt : Qnil; #else /* !__STDC_ISO_10646__, !WINDOWSNT */ return Fstring_equal (s1, s2); diff --git a/src/lisp.h b/src/lisp.h index 7cbbb299896..d31c5ae50c3 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4301,7 +4301,7 @@ extern void lock_file (Lisp_Object); extern void unlock_file (Lisp_Object); extern void unlock_buffer (struct buffer *); extern void syms_of_filelock (void); -extern int str_collate (Lisp_Object, Lisp_Object); +extern int str_collate (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object); /* Defined in sound.c. */ extern void syms_of_sound (void); diff --git a/src/sysdep.c b/src/sysdep.c index c753f84831b..a730cb4a8ff 100644 --- a/src/sysdep.c +++ b/src/sysdep.c @@ -3605,6 +3605,7 @@ system_process_attributes (Lisp_Object pid) #ifdef __STDC_ISO_10646__ # include +# include # if defined HAVE_NEWLOCALE || defined HAVE_SETLOCALE # include @@ -3615,15 +3616,24 @@ system_process_attributes (Lisp_Object pid) # ifndef LC_COLLATE_MASK # define LC_COLLATE_MASK 0 # endif +# ifndef LC_CTYPE +# define LC_CTYPE 0 +# endif +# ifndef LC_CTYPE_MASK +# define LC_CTYPE_MASK 0 +# endif + # ifndef HAVE_NEWLOCALE # undef freelocale # undef locale_t # undef newlocale # undef wcscoll_l +# undef towlower_l # define freelocale emacs_freelocale # define locale_t emacs_locale_t # define newlocale emacs_newlocale # define wcscoll_l emacs_wcscoll_l +# define towlower_l emacs_towlower_l typedef char const *locale_t; @@ -3683,15 +3693,37 @@ wcscoll_l (wchar_t const *a, wchar_t const *b, locale_t loc) errno = err; return result; } + +static wint_t +towlower_l (wint_t wc, locale_t loc) +{ + wint_t result = wc; + char *oldloc = emacs_setlocale (LC_CTYPE, NULL); + + if (oldloc) + { + USE_SAFE_ALLOCA; + char *oldcopy = SAFE_ALLOCA (strlen (oldloc) + 1); + strcpy (oldcopy, oldloc); + if (emacs_setlocale (LC_CTYPE, loc)) + { + result = towlower (wc); + emacs_setlocale (LC_COLLATE, oldcopy); + } + SAFE_FREE (); + } + + return result; +} # endif int -str_collate (Lisp_Object s1, Lisp_Object s2) +str_collate (Lisp_Object s1, Lisp_Object s2, + Lisp_Object locale, Lisp_Object ignore_case) { int res, err; ptrdiff_t len, i, i_byte; wchar_t *p1, *p2; - Lisp_Object lc_collate; USE_SAFE_ALLOCA; @@ -3708,22 +3740,43 @@ str_collate (Lisp_Object s1, Lisp_Object s2) FETCH_STRING_CHAR_ADVANCE (*(p2+i-1), s2, i, i_byte); *(p2+len) = 0; - lc_collate = - Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment); - - if (STRINGP (lc_collate)) + if (STRINGP (locale)) { - locale_t loc = newlocale (LC_COLLATE_MASK, SSDATA (lc_collate), 0); + locale_t loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, + SSDATA (locale), 0); if (!loc) error ("Wrong locale: %s", strerror (errno)); errno = 0; - res = wcscoll_l (p1, p2, loc); + + if (! NILP (ignore_case)) + for (int i = 1; i < 3; i++) + { + wchar_t *p = (i == 1) ? p1 : p2; + for (; *p; p++) + { + *p = towlower_l (*p, loc); + if (errno) + break; + } + if (errno) + break; + } + + if (! errno) + res = wcscoll_l (p1, p2, loc); err = errno; freelocale (loc); } else { errno = 0; + if (! NILP (ignore_case)) + for (int i = 1; i < 3; i++) + { + wchar_t *p = (i == 1) ? p1 : p2; + for (; *p; p++) + *p = towlower (*p); + } res = wcscoll (p1, p2); err = errno; } @@ -3733,15 +3786,14 @@ str_collate (Lisp_Object s1, Lisp_Object s2) SAFE_FREE (); return res; } -#endif /* __STDC_ISO_10646__ */ +#endif /* __STDC_ISO_10646__ */ #ifdef WINDOWSNT int -str_collate (Lisp_Object s1, Lisp_Object s2) -{ - Lisp_Object lc_collate = - Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment); - char *loc = STRINGP (lc_collate) ? SSDATA (lc_collate) : NULL; +str_collate (Lisp_Object s1, Lisp_Object s2, +{ Lisp_Object locale, Lisp_Object ignore_case) + + char *loc = STRINGP (locale) ? SSDATA (locale) : NULL; return w32_compare_strings (SDATA (s1), SDATA (s2), loc); }