From: Juanma Barranquero Date: Thu, 22 May 2003 20:59:57 +0000 (+0000) Subject: (split-string): Implement specification that splitting on explicit separators X-Git-Tag: ttn-vms-21-2-B4~10028 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=6a646626960dda937b81febd43140946a1f64111;p=emacs.git (split-string): Implement specification that splitting on explicit separators retains null fields. Add new argument OMIT-NULLS. Special-case (split-string "a string"). --- diff --git a/lisp/subr.el b/lisp/subr.el index 78f311385a1..c03b2ff0a98 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -1820,19 +1820,45 @@ STRING should be given if the last search was by `string-match' on STRING." (buffer-substring-no-properties (match-beginning num) (match-end num))))) -(defun split-string (string &optional separators) - "Splits STRING into substrings where there are matches for SEPARATORS. -Each match for SEPARATORS is a splitting point. -The substrings between the splitting points are made into a list +(defconst split-string-default-separators "[ \f\t\n\r\v]+" + "The default value of separators for `split-string'. + +A regexp matching strings of whitespace. May be locale-dependent +\(as yet unimplemented). Should not match non-breaking spaces. + +Warning: binding this to a different value and using it as default is +likely to have undesired semantics.") + +;; The specification says that if both SEPARATORS and OMIT-NULLS are +;; defaulted, OMIT-NULLS should be treated as t. Simplifying the logical +;; expression leads to the equivalent implementation that if SEPARATORS +;; is defaulted, OMIT-NULLS is treated as t. +(defun split-string (string &optional separators omit-nulls) + "Splits STRING into substrings bounded by matches for SEPARATORS. + +The beginning and end of STRING, and each match for SEPARATORS, are +splitting points. The substrings matching SEPARATORS are removed, and +the substrings between the splitting points are collected as a list, which is returned. -If SEPARATORS is absent, it defaults to \"[ \\f\\t\\n\\r\\v]+\". -If there is match for SEPARATORS at the beginning of STRING, we do not -include a null substring for that. Likewise, if there is a match -at the end of STRING, we don't include a null substring for that. +If SEPARATORS is non-nil, it should be a regular expression matching text +which separates, but is not part of, the substrings. If nil it defaults to +`split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\", and +OMIT-NULLS is forced to t. + +If OMIT-NULLs is t, zero-length substrings are omitted from the list \(so +that for the default value of SEPARATORS leading and trailing whitespace +are effectively trimmed). If nil, all zero-length substrings are retained, +which correctly parses CSV format, for example. + +Note that the effect of `(split-string STRING)' is the same as +`(split-string STRING split-string-default-separators t)'). In the rare +case that you wish to retain zero-length substrings when splitting on +whitespace, use `(split-string STRING split-string-default-separators)'. Modifies the match data; use `save-match-data' if necessary." - (let ((rexp (or separators "[ \f\t\n\r\v]+")) + (let ((keep-nulls (not (if separators omit-nulls t))) + (rexp (or separators split-string-default-separators)) (start 0) notfirst (list nil)) @@ -1841,16 +1867,14 @@ Modifies the match data; use `save-match-data' if necessary." (= start (match-beginning 0)) (< start (length string))) (1+ start) start)) - (< (match-beginning 0) (length string))) + (< start (length string))) (setq notfirst t) - (or (eq (match-beginning 0) 0) - (and (eq (match-beginning 0) (match-end 0)) - (eq (match-beginning 0) start)) + (if (or keep-nulls (< start (match-beginning 0))) (setq list (cons (substring string start (match-beginning 0)) list))) (setq start (match-end 0))) - (or (eq start (length string)) + (if (or keep-nulls (< start (length string))) (setq list (cons (substring string start) list))) @@ -1868,7 +1892,7 @@ Unless optional argument INPLACE is non-nil, return a new string." newstr)) (defun replace-regexp-in-string (regexp rep string &optional - fixedcase literal subexp start) + fixedcase literal subexp start) "Replace all matches for REGEXP with REP in STRING. Return a new string containing the replacements. @@ -1917,7 +1941,7 @@ and replace a sub-expression, e.g. rep (funcall rep (match-string 0 str))) fixedcase literal str subexp) - (cons (substring string start mb) ; unmatched prefix + (cons (substring string start mb) ; unmatched prefix matches))) (setq start me)) ;; Reconstruct a string from the pieces. @@ -2157,7 +2181,7 @@ included in the mode-line minor mode menu. If TOGGLE has a `:menu-tag', that is used for the menu item's label." (unless (memq toggle minor-mode-list) (push toggle minor-mode-list)) - + (unless toggle-fun (setq toggle-fun toggle)) ;; Add the name to the minor-mode-alist. (when name