port commit-msg to Gawk 3.0.4 (1999)

author Paul Eggert <eggert@cs.ucla.edu>

Sat, 11 Apr 2015 02:12:16 +0000 (19:12 -0700)

committer Paul Eggert <eggert@cs.ucla.edu>

Sat, 11 Apr 2015 02:12:16 +0000 (19:12 -0700)
author Paul Eggert <eggert@cs.ucla.edu>
Sat, 11 Apr 2015 02:12:16 +0000 (19:12 -0700)
committer Paul Eggert <eggert@cs.ucla.edu>
Sat, 11 Apr 2015 02:12:16 +0000 (19:12 -0700)
diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg

index 6721d53a582363930c05ab30ecd4ad4edd9f2021..5b07b6c5d65e2bfcab2ee8454210ee535d8012f2 100755 (executable)
--- a/build-aux/git-hooks/commit-msg
+++ b/build-aux/git-hooks/commit-msg
@@ -29,34 +29,36 @@ fi
  
  # Use a UTF-8 locale if available, so that the UTF-8 check works.
  # Use U+00A2 CENT SIGN to test whether the locale works.
-cent_sign_utf8_octal='\302\242'
-at_sign=`
-  printf "${cent_sign_utf8_octal}@" |
-  $awk '{print substr($0, 2)}' 2>/dev/null
-`
+cent_sign_utf8_format='\302\242\n'
+cent_sign=`printf "$cent_sign_utf8_format"`
+print_at_sign='{print substr("'$cent_sign'@", 2)}'
+at_sign=`$awk "$print_at_sign" 2>/dev/null`
  if test "$at_sign" != @; then
-  at_sign=`
-    printf "${cent_sign_utf8_octal}@" |
-    LC_ALL=en_US.UTF-8 $awk '{print substr($0, 2)}' 2>/dev/null
-  `
+  at_sign=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" 2>/dev/null`
    if test "$at_sign" = @; then
      LC_ALL=en_US.UTF-8; export LC_ALL
    fi
  fi
  
  # Check the log entry.
-exec $awk '
+exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" '
    BEGIN {
+    # These regular expressions assume traditional Unix unibyte behavior.
+    # They are needed for old or broken versions of awk, e.g.,
+    # mawk 1.3.3 (1996), Gawk 3.0.4 (1999).
+    space = "[ \f\n\r\t\v]"
+    non_space = "[^ \f\n\r\t\v]"
+    non_print = "[\1-\37\177]"
+
+    # Prefer POSIX regular expressions if available, as they do a
+    # better job of checking.  Similarly, prefer POSIX negated
+    # expressions if UTF-8 also works.
      if (" " ~ /[[:space:]]/) {
        space = "[[:space:]]"
-      non_space = "[^[:space:]]"
-      non_print = "[^[:print:]]"
-    } else {
-      # mawk 1.3.3 does not support POSIX bracket expressions.
-      # Approximate them as best we can.
-      space = "[ \f\n\r\t\v]"
-      non_space = "[^ \f\n\r\t\v]"
-      non_print = "[\1-\37\177]"
+      if (at_sign == "@" && cent_sign ~ /^[[:print:]]$/) {
+        non_space = "[^[:space:]]"
+        non_print = "[^[:print:]]"
+      }
      }
    }
author	Paul Eggert <eggert@cs.ucla.edu>
	Sat, 11 Apr 2015 02:12:16 +0000 (19:12 -0700)
committer	Paul Eggert <eggert@cs.ucla.edu>
	Sat, 11 Apr 2015 02:12:16 +0000 (19:12 -0700)