From 8edb942b9f5a1bd8615c3e5bbb018ab3b59e204f Mon Sep 17 00:00:00 2001 From: Glenn Morris Date: Wed, 11 Apr 2012 21:09:15 -0400 Subject: [PATCH] Doc and manual fixes related to mule * doc/emacs/mule.texi (International): Copyedits. (International Chars): Update C-x = example output. (Disabling Multibyte): Rename from "Enabling Multibyte". Clarify what "unibyte: t" does, and mode-line description. (Unibyte Mode): Update for "Disabling Multibyte" node name change. Use Texinfo recommended convention for quotes+punctuation. * doc/emacs/custom.texi (Specifying File Variables): Fix "unibyte" description. Update for "Disabling Multibyte" node name change. * doc/emacs/emacs.texi: Update for "Disabling Multibyte" node name change. * doc/lispref/loading.texi (Loading Non-ASCII): "unibyte:" can also be at the end. * lisp/international/mule.el (set-auto-coding-for-load): Doc fix. --- doc/emacs/ChangeLog | 12 +++++ doc/emacs/custom.texi | 4 +- doc/emacs/emacs.texi | 2 +- doc/emacs/mule.texi | 91 ++++++++++++++++++++++---------------- doc/lispref/ChangeLog | 4 ++ doc/lispref/loading.texi | 2 +- lisp/ChangeLog | 4 ++ lisp/international/mule.el | 5 ++- 8 files changed, 81 insertions(+), 43 deletions(-) diff --git a/doc/emacs/ChangeLog b/doc/emacs/ChangeLog index f81e2157e51..e69a0e90ec8 100644 --- a/doc/emacs/ChangeLog +++ b/doc/emacs/ChangeLog @@ -1,3 +1,15 @@ +2012-04-12 Glenn Morris + + * mule.texi (International): Copyedits. + (International Chars): Update C-x = example output. + (Disabling Multibyte): Rename from "Enabling Multibyte". + Clarify what "unibyte: t" does, and mode-line description. + (Unibyte Mode): Update for "Disabling Multibyte" node name change. + Use Texinfo recommended convention for quotes+punctuation. + * custom.texi (Specifying File Variables): Fix "unibyte" description. + Update for "Disabling Multibyte" node name change. + * emacs.texi: Update for "Disabling Multibyte" node name change. + 2012-04-10 Glenn Morris * abbrevs.texi, arevert-xtra.texi, buffers.texi, building.texi: diff --git a/doc/emacs/custom.texi b/doc/emacs/custom.texi index a17eb54e337..6bc96bda9ca 100644 --- a/doc/emacs/custom.texi +++ b/doc/emacs/custom.texi @@ -1163,8 +1163,8 @@ returned by that expression is ignored). conversion of this file. @xref{Coding Systems}. @item -@code{unibyte} says to visit the file in a unibyte buffer, if the -value is @code{t}. @xref{Enabling Multibyte}. +@code{unibyte} says to load or compile a file of Emacs Lisp in unibyte +mode, if the value is @code{t}. @xref{Disabling Multibyte}. @end itemize @noindent diff --git a/doc/emacs/emacs.texi b/doc/emacs/emacs.texi index 152fa73edea..a842f412356 100644 --- a/doc/emacs/emacs.texi +++ b/doc/emacs/emacs.texi @@ -516,7 +516,7 @@ Frames and Graphical Displays International Character Set Support * International Chars:: Basic concepts of multibyte characters. -* Enabling Multibyte:: Controlling whether to use multibyte characters. +* Disabling Multibyte:: Controlling whether to use multibyte characters. * Language Environments:: Setting things up for the language you use. * Input Methods:: Entering text characters not on your keyboard. * Select Input Method:: Specifying your choice of input methods. diff --git a/doc/emacs/mule.texi b/doc/emacs/mule.texi index aeaec2c502e..16fec0961bf 100644 --- a/doc/emacs/mule.texi +++ b/doc/emacs/mule.texi @@ -41,7 +41,7 @@ including European and Vietnamese variants of the Latin alphabet, as well as Cyrillic, Devanagari (for Hindi and Marathi), Ethiopic, Greek, Han (for Chinese and Japanese), Hangul (for Korean), Hebrew, IPA, Kannada, Lao, Malayalam, Tamil, Thai, Tibetan, and Vietnamese scripts. -Emacs also supports various encodings of these characters used by +Emacs also supports various encodings of these characters that are used by other internationalized software, such as word processors and mailers. Emacs allows editing text with international characters by supporting @@ -74,14 +74,14 @@ others. @item You can insert non-@acronym{ASCII} characters or search for them. To do that, you can specify an input method (@pxref{Select Input Method}) suitable -for your language, or use the default input method set up when you set +for your language, or use the default input method set up when you chose your language environment. If your keyboard can produce non-@acronym{ASCII} characters, you can select an appropriate keyboard coding system (@pxref{Terminal Coding}), and Emacs will accept those characters. Latin-1 characters can also be input by using the @kbd{C-x 8} prefix, see @ref{Unibyte Mode}. -On the X Window System, your locale should be set to an appropriate +With the X Window System, your locale should be set to an appropriate value to make sure Emacs interprets keyboard input correctly; see @ref{Language Environments, locales}. @end itemize @@ -90,7 +90,7 @@ value to make sure Emacs interprets keyboard input correctly; see @menu * International Chars:: Basic concepts of multibyte characters. -* Enabling Multibyte:: Controlling whether to use multibyte characters. +* Disabling Multibyte:: Controlling whether to use multibyte characters. * Language Environments:: Setting things up for the language you use. * Input Methods:: Entering text characters not on your keyboard. * Select Input Method:: Specifying your choice of input methods. @@ -224,29 +224,30 @@ faces used to display the character, and any overlays containing it in a buffer whose coding system is @code{utf-8-unix}: @smallexample - character: @`A (192, #o300, #xc0) -preferred charset: unicode (Unicode (ISO10646)) - code point: 0xC0 - syntax: w which means: word - category: j:Japanese l:Latin v:Vietnamese - buffer code: #xC3 #x80 - file code: not encodable by coding system undecided-unix - display: by this font (glyph code) + position: 1 of 1 (0%), column: 0 + character: @`A (displayed as @`A) (codepoint 192, #o300, #xc0) + preferred charset: unicode (Unicode (ISO10646)) +code point in charset: 0xC0 + syntax: w which means: word + category: .:Base, L:Left-to-right (strong), + j:Japanese, l:Latin, v:Viet + buffer code: #xC3 #x80 + file code: not encodable by coding system undecided-unix + display: by this font (glyph code) xft:-unknown-DejaVu Sans Mono-normal-normal- normal-*-13-*-*-*-m-0-iso10646-1 (#x82) Character code properties: customize what to show name: LATIN CAPITAL LETTER A WITH GRAVE + old-name: LATIN CAPITAL LETTER A GRAVE general-category: Lu (Letter, Uppercase) decomposition: (65 768) ('A' '`') - old-name: LATIN CAPITAL LETTER A GRAVE - -There are text properties here: - auto-composed t @end smallexample -@node Enabling Multibyte -@section Enabling Multibyte Characters +@c FIXME? Does this section even belong in the user manual? +@c Seems more appropriate to the lispref? +@node Disabling Multibyte +@section Disabling Multibyte Characters By default, Emacs starts in multibyte mode: it stores the contents of buffers and strings using an internal encoding that represents @@ -275,32 +276,48 @@ Coding}. Unlike @code{find-file-literally}, finding a file as @samp{raw-text} doesn't disable format conversion, uncompression, or auto mode selection. +@c Not a single file in Emacs uses this feature. Is it really worth +@c mentioning in the _user_ manual? Also, this duplicates somewhat +@c "Loading Non-ASCII" from the lispref. @cindex Lisp files, and multibyte operation @cindex multibyte operation, and Lisp files @cindex unibyte operation, and Lisp files @cindex init file, and non-@acronym{ASCII} characters Emacs normally loads Lisp files as multibyte. This includes the Emacs initialization -file, @file{.emacs}, and the initialization files of Emacs packages +file, @file{.emacs}, and the initialization files of packages such as Gnus. However, you can specify unibyte loading for a -particular Lisp file, by putting @w{@samp{-*-unibyte: t;-*-}} in a -comment on the first line (@pxref{File Variables}). Then that file is -always loaded as unibyte text. The motivation for these conventions -is that it is more reliable to always load any particular Lisp file in -the same way. However, you can load a Lisp file as unibyte, on any -one occasion, by typing @kbd{C-x @key{RET} c raw-text @key{RET}} -immediately before loading it. - - The mode line indicates whether multibyte character support is -enabled in the current buffer. If it is, there are two or more -characters (most often two dashes) near the beginning of the mode -line, before the indication of the visited file's end-of-line -convention (colon, backslash, etc.). When multibyte characters -are not enabled, nothing precedes the colon except a single dash. -@xref{Mode Line}, for more details about this. +particular Lisp file, by adding an entry @samp{unibyte: t} in a file +local variables section (@pxref{File Variables}). Then that file is +always loaded as unibyte text. Note that this does not represent a +real @code{unibyte} variable, rather it just acts as an indicator +to Emacs in the same way as @code{coding} does (@pxref{Specify Coding}). +@ignore +@c I don't see the point of this statement: +The motivation for these conventions is that it is more reliable to +always load any particular Lisp file in the same way. +@end ignore +Note also that this feature only applies to @emph{loading} Lisp files +for evaluation, not to visiting them for editing. You can also load a +Lisp file as unibyte, on any one occasion, by typing @kbd{C-x +@key{RET} c raw-text @key{RET}} immediately before loading it. + +@c See http://debbugs.gnu.org/11226 for lack of unibyte tooltip. +@vindex enable-multibyte-characters +The buffer-local variable @code{enable-multibyte-characters} is +non-@code{nil} in multibyte buffers, and @code{nil} in unibyte ones. +The mode line also indicates whether a buffer is multibyte or not. +@xref{Mode Line}. With a graphical display, in a multibyte buffer, +the portion of the mode line that indicates the character set has a +tooltip that (amongst other things) says that the buffer is multibyte. +In a unibyte buffer, the character set indicator is absent. Thus, in +a unibyte buffer (when using a graphical display) there is normally +nothing before the indication of the visited file's end-of-line +convention (colon, backslash, etc.), unless you are using an input +method. @findex toggle-enable-multibyte-characters -You can turn on multibyte support in a specific buffer by invoking the +You can turn off multibyte support in a specific buffer by invoking the command @code{toggle-enable-multibyte-characters} in that buffer. @node Language Environments @@ -1540,7 +1557,7 @@ can still handle these character codes as if they belonged to set-language-environment} and specify a suitable language environment such as @samp{Latin-@var{n}}. - For more information about unibyte operation, see @ref{Enabling + For more information about unibyte operation, see @ref{Disabling Multibyte}. Note particularly that you probably want to ensure that your initialization files are read as unibyte if they contain non-@acronym{ASCII} characters. @@ -1613,7 +1630,7 @@ a key sequence is allowed. library is loaded, the @key{ALT} modifier key, if the keyboard has one, serves the same purpose as @kbd{C-x 8}: use @key{ALT} together with an accent character to modify the following letter. In addition, -if the keyboard has keys for the Latin-1 ``dead accent characters,'' +if the keyboard has keys for the Latin-1 ``dead accent characters'', they too are defined to compose with the following character, once @code{iso-transl} is loaded. diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index a496c5dcdd7..3990db3d602 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog @@ -1,3 +1,7 @@ +2012-04-12 Glenn Morris + + * loading.texi (Loading Non-ASCII): "unibyte:" can also be at the end. + 2012-04-10 Glenn Morris * strings.texi (Case Tables): diff --git a/doc/lispref/loading.texi b/doc/lispref/loading.texi index 7fc9535d88b..0c02f338c7b 100644 --- a/doc/lispref/loading.texi +++ b/doc/lispref/loading.texi @@ -375,7 +375,7 @@ strings are multibyte strings should not be noticeable, since inserting them in unibyte buffers converts them to unibyte automatically. However, if this does make a difference, you can force a particular Lisp file to be interpreted as unibyte by writing -@samp{-*-unibyte: t;-*-} in a comment on the file's first line. With +@samp{unibyte: t} in a local variables section. With that designator, the file will unconditionally be interpreted as unibyte, even in an ordinary multibyte Emacs session. This can matter when making keybindings to non-@acronym{ASCII} characters written as diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 00183d9ebb2..10dd1d5448f 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,7 @@ +2012-04-12 Glenn Morris + + * international/mule.el (set-auto-coding-for-load): Doc fix. + 2012-04-11 Stefan Monnier * imenu.el (imenu-add-to-menubar): `current-local-map' can be nil. diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 7bf15009687..1f88df52fd4 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@ -1754,8 +1754,9 @@ functions, so they won't be called at all." :type '(repeat function)) (defvar set-auto-coding-for-load nil - "Non-nil means look for `load-coding' property instead of `coding'. -This is used for loading and byte-compiling Emacs Lisp files.") + "Non-nil means respect a \"unibyte: t\" entry in file local variables. +Emacs binds this variable to t when loading or byte-compiling Emacs Lisp +files.") (defun auto-coding-alist-lookup (filename) "Return the coding system specified by `auto-coding-alist' for FILENAME." -- 2.39.2