From: Paul Eggert Date: Mon, 23 Dec 2024 21:38:51 +0000 (-0800) Subject: Avoid U+FFFD in commit messages X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=204f402776ae6c8c1847b0eac5640d21d5c381e0;p=emacs.git Avoid U+FFFD in commit messages * build-aux/git-hooks/commit-msg: Also check against U+FFFD REPLACEMENT CHARACTER in commit messages. (cherry picked from commit 28c420afab6a0944a192c30ff2d5d9e40c88f14f) --- diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg index 1eb2560bba2..dace4c7fb66 100755 --- a/build-aux/git-hooks/commit-msg +++ b/build-aux/git-hooks/commit-msg @@ -31,6 +31,8 @@ fi # Use U+00A2 CENT SIGN to test whether the locale works. cent_sign_utf8_format='\302\242\n' cent_sign=`printf "$cent_sign_utf8_format"` +replacement_character_utf8_format='\357\277\275\n' +replacement_character=`printf "$replacement_character_utf8_format"` print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}' at_sign=`$awk "$print_at_sign" /dev/null` if test "$at_sign" != @; then @@ -44,7 +46,12 @@ if test "$at_sign" != @; then fi # Check the log entry. -exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" ' +exec $awk \ + -v at_sign="$at_sign" \ + -v cent_sign="$cent_sign" \ + -v file="$1" \ + -v replacement_character="$replacement_character" \ +' BEGIN { # These regular expressions assume traditional Unix unibyte behavior. # They are needed for old or broken versions of awk, e.g., @@ -137,6 +144,10 @@ exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" ' print "Unprintable character in commit message" status = 1 } + $0 ~ replacement_character { + print "Replacement character in commit message" + status = 1 + } END { if (nlines == 0) {