From d633db5189f335873a03544f9f41dcaf77c8e31d Mon Sep 17 00:00:00 2001 From: Daniel Nicolai Date: Thu, 27 Jan 2022 17:03:38 +0100 Subject: [PATCH] Add support for EPUB, CBZ, FB2 and (O)XPS extension to doc view * doc/emacs/misc.texi (Document View): Add requirements for new extensions (i.e. mutool). * lisp/doc-view.el (doc-view): Additionally update preliminary comment (doc-view-custom-set-epub-font-size): redraw image after setting (doc-view-unoconv-program): Put code all on one line (doc-view-doc-type): Update docstring. (doc-view-kill-proc): Fix comment indentation (doc-view-mode-p): Add check for new extensions and alternative check for PDF (doc-view-pdf/ps->png): Associate new extension with png converter (doc-view-convert-current-doc): Handle new extensions like PDF's (doc-view-set-doc-type): Set correct doc-type for new extensions. * lisp/files.el (auto-mode-alist): Associate new extension types with doc-view. --- doc/emacs/misc.texi | 23 ++++--- lisp/doc-view.el | 151 ++++++++++++++++++++++++++++---------------- lisp/files.el | 2 +- 3 files changed, 113 insertions(+), 63 deletions(-) diff --git a/doc/emacs/misc.texi b/doc/emacs/misc.texi index df1e5ef2381..365c079e897 100644 --- a/doc/emacs/misc.texi +++ b/doc/emacs/misc.texi @@ -455,20 +455,27 @@ servers the user has connected to. If this variable is @code{t}, @cindex PostScript file @cindex OpenDocument file @cindex Microsoft Office file +@cindex EPUB file +@cindex CBZ file +@cindex FB2 file +@cindex XPS file +@cindex OXPS file @cindex DocView mode @cindex mode, DocView @cindex document viewer (DocView) @findex doc-view-mode DocView mode is a major mode for viewing DVI, PostScript (PS), PDF, -OpenDocument, and Microsoft Office documents. It provides features -such as slicing, zooming, and searching inside documents. It works by -converting the document to a set of images using the @command{gs} -(GhostScript) or @command{mudraw}/@command{pdfdraw} (MuPDF) commands -and other external tools @footnote{For PostScript files, GhostScript -is a hard requirement. For DVI files, @code{dvipdf} or @code{dvipdfm} -is needed. For OpenDocument and Microsoft Office documents, the -@code{unoconv} tool is needed.}, and displaying those images. +OpenDocument, Microsoft Office, EPUB, CBZ, FB2, XPS and OXPS +documents. It provides features such as slicing, zooming, and +searching inside documents. It works by converting the document to a +set of images using the @command{gs} (GhostScript) or +@command{pdfdraw}/@command{mutool draw} (MuPDF) commands and other +external tools @footnote{PostScript files require GhostScript, DVI +files require @code{dvipdf} or @code{dvipdfm}, OpenDocument and +Microsoft Office documents require the @code{unoconv} tool, and EPUB, +CBZ, FB2, XPS and OXPS files require @code{mutool} to be available.}, +and displaying those images. @findex doc-view-toggle-display @findex doc-view-minor-mode diff --git a/lisp/doc-view.el b/lisp/doc-view.el index 5e160f5dff1..4cc560755e4 100644 --- a/lisp/doc-view.el +++ b/lisp/doc-view.el @@ -3,7 +3,7 @@ ;; Copyright (C) 2007-2022 Free Software Foundation, Inc. ;; ;; Author: Tassilo Horn -;; Keywords: files, pdf, ps, dvi +;; Keywords: files, pdf, ps, dvi, djvu, epub, cbz, fb2, xps, openxps ;; This file is part of GNU Emacs. @@ -25,17 +25,19 @@ ;; Viewing PS/PDF/DVI files requires Ghostscript, `dvipdf' (comes with ;; Ghostscript) or `dvipdfm' (comes with teTeX or TeXLive) and ;; `pdftotext', which comes with xpdf (https://www.foolabs.com/xpdf/) -;; or poppler (https://poppler.freedesktop.org/). -;; Djvu documents require `ddjvu' (from DjVuLibre). -;; ODF files require `soffice' (from LibreOffice). +;; or poppler (https://poppler.freedesktop.org/). EPUB, CBZ, FB2, XPS +;; and OXPS documents require `mutool' which comes with mupdf +;; (https://mupdf.com/index.html). Djvu documents require `ddjvu' +;; (from DjVuLibre). ODF files require `soffice' (from LibreOffice). ;;; Commentary: ;; DocView is a document viewer for Emacs. It converts a number of -;; document formats (including PDF, PS, DVI, Djvu and ODF files) to a -;; set of PNG files, one PNG for each page, and displays the PNG -;; images inside an Emacs buffer. This buffer uses `doc-view-mode' -;; which provides convenient key bindings for browsing the document. +;; document formats (including PDF, PS, DVI, Djvu, ODF, EPUB, CBZ, +;; FB2, XPS and OXPS files) to a set of PNG (or TIFF for djvu) files, +;; one image for each page, and displays the images inside an Emacs +;; buffer. This buffer uses `doc-view-mode' which provides convenient +;; key bindings for browsing the document. ;; ;; To use it simply open a document file with ;; @@ -147,7 +149,10 @@ ;;;; Customization Options (defgroup doc-view nil - "In-buffer viewer for PDF, PostScript, DVI, and DJVU files." + "In-buffer document viewer. +The viewer handles PDF, PostScript, DVI, DJVU, ODF, EPUB, CBZ, +FB2, XPS and OXPS files, if the appropriate converter programs +are available (see Info node `(emacs)Document View')" :link '(function-link doc-view) :version "22.2" :group 'applications @@ -221,6 +226,11 @@ Higher values result in larger images." :type 'number) +(defcustom doc-view-epub-font-size nil + "Font size in points for EPUB layout." + :type 'integer + :set #'doc-view-custom-set-epub-font-size) + (defcustom doc-view-scale-internally t "Whether we should try to rescale images ourselves. If nil, the document is re-rendered every time the scaling factor is modified. @@ -256,9 +266,7 @@ If this and `doc-view-dvipdfm-program' are set, `doc-view-dvipdf-program' will be preferred." :type 'file) -(define-obsolete-variable-alias 'doc-view-unoconv-program - 'doc-view-odf->pdf-converter-program - "24.4") +(define-obsolete-variable-alias 'doc-view-unoconv-program 'doc-view-odf->pdf-converter-program "24.4") (defcustom doc-view-odf->pdf-converter-program (cond @@ -382,7 +390,8 @@ the (uncompressed, extracted) file residing in (defvar doc-view-doc-type nil "The type of document in the current buffer. -Can be `dvi', `pdf', `ps', `djvu' or `odf'.") +Can be `dvi', `pdf', `ps', `djvu', `odf', 'epub', `cbz', `fb2', +`'xps' or `oxps'.") (defvar doc-view-single-page-converter-function nil "Function to call to convert a single page of the document to a bitmap file. @@ -464,17 +473,17 @@ Typically \"page-%s.png\".") ;; It's normal for this operation to result in a very large undo entry. (setq-local undo-outer-limit (* 2 (buffer-size)))) (cl-labels ((revert () - (let ((revert-buffer-preserve-modes t)) - (apply orig-fun args) - ;; Update the cached version of the pdf file, - ;; too. This is the one that's used when - ;; rendering (bug#26996). - (unless (equal buffer-file-name - doc-view--buffer-file-name) - ;; FIXME: Lars says he needed to recreate - ;; the dir, we should figure out why. - (doc-view-make-safe-dir doc-view-cache-directory) - (write-region nil nil doc-view--buffer-file-name))))) + (let ((revert-buffer-preserve-modes t)) + (apply orig-fun args) + ;; Update the cached version of the pdf file, + ;; too. This is the one that's used when + ;; rendering (bug#26996). + (unless (equal buffer-file-name + doc-view--buffer-file-name) + ;; FIXME: Lars says he needed to recreate + ;; the dir, we should figure out why. + (doc-view-make-safe-dir doc-view-cache-directory) + (write-region nil nil doc-view--buffer-file-name))))) (if (and (eq 'pdf doc-view-doc-type) (executable-find "pdfinfo")) ;; We don't want to revert if the PDF file is corrupted which @@ -586,6 +595,15 @@ Typically \"page-%s.png\".") (defmacro doc-view-current-image () '(image-mode-window-get 'image)) (defmacro doc-view-current-slice () '(image-mode-window-get 'slice)) +(defun doc-view-custom-set-epub-font-size (option-name new-value) + (set-default option-name new-value) + (dolist (x (buffer-list)) + (with-current-buffer x + (when (eq doc-view-doc-type 'epub) + (delete-directory doc-view--current-cache-dir t) + (doc-view-initiate-display) + (doc-view-goto-page (doc-view-current-page)))))) + (defun doc-view-last-page-number () (length doc-view--current-files)) @@ -738,7 +756,7 @@ at the top edge of the page moves to the previous page." (interactive) (while (consp doc-view--current-converter-processes) (ignore-errors ;; Some entries might not be processes, and maybe - ;; some are dead already? + ; some are dead already? (kill-process (pop doc-view--current-converter-processes)))) (when doc-view--current-timer (cancel-timer doc-view--current-timer) @@ -799,8 +817,8 @@ It's a subdirectory of `doc-view-cache-directory'." ;;;###autoload (defun doc-view-mode-p (type) "Return non-nil if document type TYPE is available for `doc-view'. -Document types are symbols like `dvi', `ps', `pdf', or `odf' (any -OpenDocument format)." +Document types are symbols like `dvi', `ps', `pdf', `epub', +`cbz', `fb2', `xps', `oxps', or`odf' (any OpenDocument format)." (and (display-graphic-p) (image-type-available-p 'png) (cond @@ -811,16 +829,22 @@ OpenDocument format)." (and doc-view-dvipdfm-program (executable-find doc-view-dvipdfm-program))))) ((memq type '(postscript ps eps pdf)) - (or (and doc-view-ghostscript-program + (or (and doc-view-ghostscript-program (executable-find doc-view-ghostscript-program)) - (and doc-view-pdfdraw-program - (executable-find doc-view-pdfdraw-program)))) + ;; for pdf also check for `doc-view-pdfdraw-program' + (when (eq type 'pdf) + (and doc-view-pdfdraw-program + (executable-find doc-view-pdfdraw-program))))) ((eq type 'odf) (and doc-view-odf->pdf-converter-program (executable-find doc-view-odf->pdf-converter-program) (doc-view-mode-p 'pdf))) ((eq type 'djvu) (executable-find "ddjvu")) + ((memq type '(epub cbz fb2 xps oxps)) + ;; first check if `doc-view-pdfdraw-program' is set to mutool + (and (string= doc-view-pdfdraw-program "mutool") + (executable-find "mutool"))) (t ;; unknown image type nil)))) @@ -1053,7 +1077,7 @@ Should be invoked when the cached images aren't up-to-date." ;; some file-name-handler-managed dir, for example). (let* ((default-directory (or (unhandled-file-name-directory default-directory) - (expand-file-name "~/"))) + (expand-file-name "~/"))) (proc (apply #'start-process name doc-view-conversion-buffer program args))) (push proc doc-view--current-converter-processes) @@ -1139,14 +1163,17 @@ The test is performed using `doc-view-pdfdraw-program'." (search-forward "error: cannot authenticate password" nil t))) (defun doc-view-pdf->png-converter-mupdf (pdf png page callback) - (let ((pdf-passwd (if (doc-view-pdf-password-protected-pdfdraw-p pdf) - (read-passwd "Enter password for PDF file: ")))) + (let* ((pdf-passwd (if (doc-view-pdf-password-protected-pdfdraw-p pdf) + (read-passwd "Enter password for PDF file: "))) + (options `(,(concat "-o" png) + ,(format "-r%d" (round doc-view-resolution)) + ,@(if pdf-passwd `("-p" ,pdf-passwd))))) + (when (and (eq doc-view-doc-type 'epub) doc-view-epub-font-size) + (setq options (append options (list (format "-S%s" doc-view-epub-font-size))))) (doc-view-start-process "pdf->png" doc-view-pdfdraw-program `(,@(doc-view-pdfdraw-program-subcommand) - ,(concat "-o" png) - ,(format "-r%d" (round doc-view-resolution)) - ,@(if pdf-passwd `("-p" ,pdf-passwd)) + ,@options ,pdf ,@(if page `(,(format "%d" page)))) callback))) @@ -1227,20 +1254,20 @@ Start by converting PAGES, and then the rest." (let ((rest (cdr pages))) (funcall doc-view-single-page-converter-function pdf (format png (car pages)) (car pages) - (lambda () - (if rest - (doc-view-document->bitmap pdf png rest) - ;; Yippie, the important pages are done, update the display. - (clear-image-cache) - ;; For the windows that have a message (like "Welcome to - ;; DocView") display property, clearing the image cache is - ;; not sufficient. - (dolist (win (get-buffer-window-list (current-buffer) nil 'visible)) - (with-selected-window win - (when (stringp (overlay-get (doc-view-current-overlay) 'display)) - (doc-view-goto-page (doc-view-current-page))))) - ;; Convert the rest of the pages. - (doc-view-pdf/ps->png pdf png))))))) + (lambda () + (if rest + (doc-view-document->bitmap pdf png rest) + ;; Yippie, the important pages are done, update the display. + (clear-image-cache) + ;; For the windows that have a message (like "Welcome to + ;; DocView") display property, clearing the image cache is + ;; not sufficient. + (dolist (win (get-buffer-window-list (current-buffer) nil 'visible)) + (with-selected-window win + (when (stringp (overlay-get (doc-view-current-overlay) 'display)) + (doc-view-goto-page (doc-view-current-page))))) + ;; Convert the rest of the pages. + (doc-view-pdf/ps->png pdf png))))))) (defun doc-view-pdf->txt (pdf txt callback) "Convert PDF to TXT asynchronously and call CALLBACK when finished." @@ -1337,7 +1364,9 @@ Those files are saved in the directory given by the function ;; Rename to doc.pdf (rename-file opdf pdf) (doc-view-pdf/ps->png pdf png-file))))) - ((or 'pdf 'djvu) + ;; The doc-view-mode-p check ensures that epub, cbz, fb2 and + ;; (o)xps are handled with mutool + ((or 'pdf 'djvu 'epub 'cbz 'fb2 'xps 'oxps) (let ((pages (doc-view-active-pages))) ;; Convert doc to bitmap images starting with the active pages. (doc-view-document->bitmap doc-view--buffer-file-name png-file pages))) @@ -1432,7 +1461,7 @@ dragging it to its bottom-right corner. See also (defun doc-view-guess-paper-size (iw ih) "Guess the paper size according to the aspect ratio." (cl-labels ((div (x y) - (round (/ (* 100.0 x) y)))) + (round (/ (* 100.0 x) y)))) (let ((ar (div iw ih)) (al (mapcar (lambda (l) (list (div (nth 1 l) (nth 2 l)) (car l))) @@ -1869,6 +1898,8 @@ If BACKWARD is non-nil, jump to the previous match." ("dvi" dvi) ;; PDF ("pdf" pdf) ("epdf" pdf) + ;; EPUB + ("epub" epub) ;; PostScript ("ps" ps) ("eps" ps) ;; DjVu @@ -1880,7 +1911,13 @@ If BACKWARD is non-nil, jump to the previous match." ;; Microsoft Office formats (also handled by the odf ;; conversion chain). ("doc" odf) ("docx" odf) ("xls" odf) ("xlsx" odf) - ("ppt" odf) ("pps" odf) ("pptx" odf) ("rtf" odf)) + ("ppt" odf) ("pps" odf) ("pptx" odf) ("rtf" odf) + ;; CBZ + ("cbz" cbz) + ;; FB2 + ("fb2" fb2) + ;; (Open)XPS + ("xps" xps) ("oxps" oxps)) t)))) (content-types (save-excursion @@ -1889,7 +1926,13 @@ If BACKWARD is non-nil, jump to the previous match." ((looking-at "%!") '(ps)) ((looking-at "%PDF") '(pdf)) ((looking-at "\367\002") '(dvi)) - ((looking-at "AT&TFORM") '(djvu)))))) + ((looking-at "AT&TFORM") '(djvu)) + ;; The following pattern actually is for recognizing + ;; zip-archives, so that this same association is used for + ;; cbz files. This is fine, as cbz files should be handled + ;; like epub anyway. + ((looking-at "PK") '(epub)) + )))) (setq-local doc-view-doc-type (car (or (nreverse (seq-intersection name-types content-types #'eq)) diff --git a/lisp/files.el b/lisp/files.el index 4ba71e61442..79c336f7825 100644 --- a/lisp/files.el +++ b/lisp/files.el @@ -2928,7 +2928,7 @@ ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|CBR\\|7Z\\|SQUASHFS\\)\\'" . ("\\.\\(diffs?\\|patch\\|rej\\)\\'" . diff-mode) ("\\.\\(dif\\|pat\\)\\'" . diff-mode) ; for MS-DOS ("\\.[eE]?[pP][sS]\\'" . ps-mode) - ("\\.\\(?:PDF\\|DVI\\|OD[FGPST]\\|DOCX\\|XLSX?\\|PPTX?\\|pdf\\|djvu\\|dvi\\|od[fgpst]\\|docx\\|xlsx?\\|pptx?\\)\\'" . doc-view-mode-maybe) + ("\\.\\(?:PDF\\|EPUB\\|CBZ\\|FB2\\|O?XPS\\|DVI\\|OD[FGPST]\\|DOCX\\|XLSX?\\|PPTX?\\|pdf\\|epub\\|cbz\\|fb2\\|o?xps\\|djvu\\|dvi\\|od[fgpst]\\|docx\\|xlsx?\\|pptx?\\)\\'" . doc-view-mode-maybe) ("configure\\.\\(ac\\|in\\)\\'" . autoconf-mode) ("\\.s\\(v\\|iv\\|ieve\\)\\'" . sieve-mode) ("BROWSE\\'" . ebrowse-tree-mode) -- 2.39.5