]> git.eshelyaron.com Git - emacs.git/commitdiff
Avoid U+FFFD in commit messages
authorPaul Eggert <eggert@cs.ucla.edu>
Mon, 23 Dec 2024 21:38:51 +0000 (13:38 -0800)
committerEshel Yaron <me@eshelyaron.com>
Fri, 27 Dec 2024 15:28:54 +0000 (16:28 +0100)
* build-aux/git-hooks/commit-msg:
Also check against U+FFFD REPLACEMENT CHARACTER in commit messages.

(cherry picked from commit 28c420afab6a0944a192c30ff2d5d9e40c88f14f)

build-aux/git-hooks/commit-msg

index 1eb2560bba254b1aae4da0e766ab9ebc24515e47..dace4c7fb66e793824d818c981d3b26cc560db88 100755 (executable)
@@ -31,6 +31,8 @@ fi
 # Use U+00A2 CENT SIGN to test whether the locale works.
 cent_sign_utf8_format='\302\242\n'
 cent_sign=`printf "$cent_sign_utf8_format"`
+replacement_character_utf8_format='\357\277\275\n'
+replacement_character=`printf "$replacement_character_utf8_format"`
 print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}'
 at_sign=`$awk "$print_at_sign" </dev/null 2>/dev/null`
 if test "$at_sign" != @; then
@@ -44,7 +46,12 @@ if test "$at_sign" != @; then
 fi
 
 # Check the log entry.
-exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" '
+exec $awk \
+     -v at_sign="$at_sign" \
+     -v cent_sign="$cent_sign" \
+     -v file="$1" \
+     -v replacement_character="$replacement_character" \
+'
   BEGIN {
     # These regular expressions assume traditional Unix unibyte behavior.
     # They are needed for old or broken versions of awk, e.g.,
@@ -137,6 +144,10 @@ exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" '
     print "Unprintable character in commit message"
     status = 1
   }
+  $0 ~ replacement_character {
+    print "Replacement character in commit message"
+    status = 1
+  }
 
   END {
     if (nlines == 0) {