From b95f53f0383b5df82e0b711f5027881950893f4f Mon Sep 17 00:00:00 2001 From: Stefan Monnier Date: Mon, 15 Jun 2015 18:48:08 -0400 Subject: [PATCH] * lisp/info.el: Cleanup bytepos/charpos issues * lisp/international/mule-util.el: Use lexical-binding. (filepos-to-bufferpos): New function. * lisp/info.el (Info-find-in-tag-table-1): Use 0-based file positions. (Info-find-node-2): Use filepos-to-bufferpos (bug#20704). (Info-read-subfile, Info-search): Use 0-based file positions. --- etc/NEWS | 2 ++ lisp/info.el | 41 ++++++++++++++-------------- lisp/international/mule-util.el | 47 ++++++++++++++++++++++++++++----- 3 files changed, 63 insertions(+), 27 deletions(-) diff --git a/etc/NEWS b/etc/NEWS index 571adadc61c..76b477244e3 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -851,6 +851,8 @@ behavior, set `diff-switches' to `-c'. * Lisp Changes in Emacs 25.1 +** New function `filepos-to-bufferpos'. + ** The default value of `load-read-function' is now `read'. ** New hook `pre-redisplay-functions', a bit easier to use than pre-redisplay-function. diff --git a/lisp/info.el b/lisp/info.el index 9602337e193..413928bcfbc 100644 --- a/lisp/info.el +++ b/lisp/info.el @@ -1009,7 +1009,7 @@ REGEXP is a regular expression matching nodes or references. Its first group should match `Node:' or `Ref:'. CASE-FOLD t means search for a case-insensitive match. If a match was found, value is a list (FOUND-ANCHOR POS MODE), where -FOUND-ANCHOR is non-nil if a `Ref:' was matched, POS is the position +FOUND-ANCHOR is non-nil if a `Ref:' was matched, POS is the file position where the match was found, and MODE is `major-mode' of the buffer in which the match was found." (let ((case-fold-search case-fold)) @@ -1020,7 +1020,7 @@ which the match was found." (beginning-of-line) (when (re-search-forward regexp nil t) (list (string-equal "Ref:" (match-string 1)) - (+ (point-min) (read (current-buffer))) + (read (current-buffer)) major-mode))))) (defun Info-find-in-tag-table (marker regexp &optional strict-case) @@ -1029,7 +1029,7 @@ MARKER specifies the buffer and position to start searching at. REGEXP is a regular expression matching nodes or references. Its first group should match `Node:' or `Ref:'. If a match was found, value is a list (FOUND-ANCHOR POS MODE), where -FOUND-ANCHOR is non-nil if a `Ref:' was matched, POS is the position +FOUND-ANCHOR is non-nil if a `Ref:' was matched, POS is the file position where the match was found, and MODE is `major-mode' of the buffer in which the match was found. This function tries to find a case-sensitive match first, then a @@ -1187,15 +1187,18 @@ is non-nil)." (when found ;; FOUND is (ANCHOR POS MODE). - (setq guesspos (nth 1 found)) + (let ((filepos (nth 1 found))) ;File position in bytes. - ;; If this is an indirect file, determine which - ;; file really holds this node and read it in. - (unless (eq (nth 2 found) 'Info-mode) - ;; Note that the current buffer must be the - ;; *info* buffer on entry to - ;; Info-read-subfile. Thus the hackery above. - (setq guesspos (Info-read-subfile guesspos))) + ;; If this is an indirect file, determine which + ;; file really holds this node and read it in. + (unless (eq (nth 2 found) 'Info-mode) + ;; Note that the current buffer must be the + ;; *info* buffer on entry to + ;; Info-read-subfile. Thus the hackery above. + (setq filepos (Info-read-subfile filepos))) + + (setq guesspos + (filepos-to-bufferpos filepos 'approximate))) ;; Handle anchor (when (nth 0 found) @@ -1203,8 +1206,7 @@ is non-nil)." (throw 'foo t))))) ;; Else we may have a node, which we search for: - (goto-char (max (point-min) - (- (byte-to-position guesspos) 1000))) + (goto-char (max (point-min) (- guesspos 1000))) ;; Now search from our advised position (or from beg of ;; buffer) to find the actual node. First, check @@ -1506,7 +1508,7 @@ is non-nil)." ;; Note that on entry to this function the current-buffer must be the ;; *info* buffer; not the info tags buffer. (defun Info-read-subfile (nodepos) - ;; NODEPOS is either a position (in the Info file as a whole, + ;; NODEPOS is either a position in bytes (in the Info file as a whole, ;; not relative to a subfile) or the name of a subfile. (let (lastfilepos lastfilename) @@ -1523,7 +1525,7 @@ is non-nil)." thisfilepos thisfilename) (search-forward ": ") (setq thisfilename (buffer-substring beg (- (point) 2))) - (setq thisfilepos (+ (point-min) (read (current-buffer)))) + (setq thisfilepos (read (current-buffer))) ;; read in version 19 stops at the end of number. ;; Advance to the next line. (forward-line 1) @@ -1554,7 +1556,7 @@ is non-nil)." ;; Don't add the length of the skipped summary segment to ;; the value returned to `Info-find-node-2'. (Bug#14125) (if (numberp nodepos) - (+ (- nodepos lastfilepos) (point-min))))) + (- nodepos lastfilepos)))) (defun Info-unescape-quotes (value) "Unescape double quotes and backslashes in VALUE." @@ -2013,10 +2015,9 @@ If DIRECTION is `backward', search in the reverse direction." (re-search-backward "\\(^.*\\): [0-9]+$") (re-search-forward "\\(^.*\\): [0-9]+$")) (goto-char (+ (match-end 1) 2)) - (setq list (cons (cons (+ (point-min) - (read (current-buffer))) - (match-string-no-properties 1)) - list)) + (push (cons (read (current-buffer)) + (match-string-no-properties 1)) + list) (goto-char (if backward (1- (match-beginning 0)) (1+ (match-end 0))))) diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el index eae787bbeb9..92c540cea7d 100644 --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el @@ -1,4 +1,4 @@ -;;; mule-util.el --- utility functions for multilingual environment (mule) +;;; mule-util.el --- utility functions for multilingual environment (mule) -*- lexical-binding:t -*- ;; Copyright (C) 1997-1998, 2000-2015 Free Software Foundation, Inc. ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, @@ -30,8 +30,7 @@ ;;; Code: -;;; String manipulations while paying attention to multibyte -;;; characters. +;;; String manipulations while paying attention to multibyte characters. ;;;###autoload (defsubst string-to-list (string) @@ -49,7 +48,6 @@ (if (integerp obj) (aset string idx obj) (let ((len1 (length obj)) - (len2 (length string)) (i 0)) (while (< i len1) (aset string (+ idx i) (aref obj i)) @@ -90,7 +88,6 @@ defaults to `truncate-string-ellipsis'." (setq ellipsis truncate-string-ellipsis)) (let ((str-len (length str)) (str-width (string-width str)) - (ellipsis-len (if ellipsis (length ellipsis) 0)) (ellipsis-width (if ellipsis (string-width ellipsis) 0)) (idx 0) (column 0) @@ -129,8 +126,8 @@ defaults to `truncate-string-ellipsis'." tail-padding ellipsis)))) -;;; Nested alist handler. Nested alist is alist whose elements are -;;; also nested alist. +;;; Nested alist handler. +;; Nested alist is alist whose elements are also nested alist. ;;;###autoload (defsubst nested-alist-p (obj) @@ -313,6 +310,42 @@ per-character basis, this may not be accurate." (throw 'tag3 charset))) charset-list) nil))))))))) + +;;;###autoload +(defun filepos-to-bufferpos (byte &optional quality coding-system) + "Try to return the buffer position corresponding to a particular file position. +The file position is given as a (0-based) BYTE count. +The function presumes the file is encoded with CODING-SYSTEM, which defaults +to `buffer-file-coding-system'. +QUALITY can be: + `approximate', in which case we may cut some corners to avoid + excessive work. + nil, in which case we may return nil rather than an approximation." + ;; `exact', in which case we may end up re-(en|de)coding a large + ;; part of the file. + (unless coding-system (setq coding-system buffer-file-coding-system)) + (let ((eol (coding-system-eol-type coding-system)) + (type (coding-system-type coding-system)) + (pm (save-restriction (widen) (point-min)))) + (pcase type + (`utf-8 + (when (coding-system-get coding-system :bom) + (setq byte (max 0 (- byte 3)))) + (let (pos lines (eol-offset 0)) + (while + (progn + (setq pos (byte-to-position (+ pm byte (- eol-offset)))) + (setq lines (1- (line-number-at-pos pos))) + (not (= lines eol-offset))) + (setq eol-offset (+ eol-offset lines))) + pos)) + ;; FIXME: What if it's a 2-byte charset? Are there such beasts? + (`charset (+ pm byte)) + (_ + (pcase quality + (`approximate (+ pm (byte-to-position byte))) + ;; (`exact ...) + ))))) (provide 'mule-util) -- 2.39.2