From 8b9587d73b579fb2fdd0eaaa1ed5fd608653e522 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 19 Apr 2011 23:24:51 -0700
Subject: [PATCH] Make the Lisp reader and string-to-float more consistent.

* data.c (atof): Remove decl; no longer used or needed.
(Fstring_to_number): Use new string_to_float function, to be
consistent with how the Lisp reader treats infinities and NaNs.
Do not assume that floating-point numbers represent EMACS_INT
without losing information; this is not true on most 64-bit hosts.
Avoid double-rounding errors, by insisting on integers when
parsing non-base-10 numbers, as the documentation specifies.
Report integer overflow instead of silently converting to
integers.
* lisp.h (string_to_float): New decl, replacing ...
(isfloat_string): Remove.
* lread.c (read1): Do not accept +. and -. as integers; this
appears to have been a coding error.  Similarly, do not accept
strings like +-1e0 as floating point numbers.  Do not report
overflow for some integer overflows and not others; instead,
report them all.  Break out the floating-point parsing into a new
function string_to_float, so that Fstring_to_number parses
floating point numbers consistently with the Lisp reader.
(string_to_float): New function, replacing isfloat_string.
This function checks for valid syntax and produces the resulting
Lisp float number too.
---
 src/ChangeLog |  25 +++++++++
 src/data.c    |  42 ++++----------
 src/lisp.h    |   2 +-
 src/lread.c   | 149 +++++++++++++++++++++++---------------------------
 4 files changed, 106 insertions(+), 112 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index 4a675cc96c6..c232c242fd7 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,28 @@
+2011-04-20  Paul Eggert  <eggert@cs.ucla.edu>
+
+	Make the Lisp reader and string-to-float more consistent.
+	* data.c (atof): Remove decl; no longer used or needed.
+	(Fstring_to_number): Use new string_to_float function, to be
+	consistent with how the Lisp reader treats infinities and NaNs.
+	Do not assume that floating-point numbers represent EMACS_INT
+	without losing information; this is not true on most 64-bit hosts.
+	Avoid double-rounding errors, by insisting on integers when
+	parsing non-base-10 numbers, as the documentation specifies.
+	Report integer overflow instead of silently converting to
+	integers.
+	* lisp.h (string_to_float): New decl, replacing ...
+	(isfloat_string): Remove.
+	* lread.c (read1): Do not accept +. and -. as integers; this
+	appears to have been a coding error.  Similarly, do not accept
+	strings like +-1e0 as floating point numbers.  Do not report
+	overflow for some integer overflows and not others; instead,
+	report them all.  Break out the floating-point parsing into a new
+	function string_to_float, so that Fstring_to_number parses
+	floating point numbers consistently with the Lisp reader.
+	(string_to_float): New function, replacing isfloat_string.
+	This function checks for valid syntax and produces the resulting
+	Lisp float number too.
+
 2011-04-19  Eli Zaretskii  <eliz@gnu.org>
 
 	* syntax.h (SETUP_SYNTAX_TABLE_FOR_OBJECT): Fix setting of
diff --git a/src/data.c b/src/data.c
index c9250a67bf0..c3ee3e39939 100644
--- a/src/data.c
+++ b/src/data.c
@@ -48,10 +48,6 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include <math.h>
 
-#if !defined (atof)
-extern double atof (const char *);
-#endif /* !atof */
-
 Lisp_Object Qnil, Qt, Qquote, Qlambda, Qunbound;
 static Lisp_Object Qsubr;
 Lisp_Object Qerror_conditions, Qerror_message, Qtop_level;
@@ -2415,8 +2411,7 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 {
   register char *p;
   register int b;
-  int sign = 1;
-  Lisp_Object val;
+  EMACS_INT n;
 
   CHECK_STRING (string);
 
@@ -2430,38 +2425,23 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 	xsignal1 (Qargs_out_of_range, base);
     }
 
-  /* Skip any whitespace at the front of the number.  Some versions of
-     atoi do this anyway, so we might as well make Emacs lisp consistent.  */
+  /* Skip any whitespace at the front of the number.  Typically strtol does
+     this anyway, so we might as well be consistent.  */
   p = SSDATA (string);
   while (*p == ' ' || *p == '\t')
     p++;
 
-  if (*p == '-')
-    {
-      sign = -1;
-      p++;
-    }
-  else if (*p == '+')
-    p++;
-
-  if (isfloat_string (p, 1) && b == 10)
-    val = make_float (sign * atof (p));
-  else
+  if (b == 10)
     {
-      double v = 0;
-
-      while (1)
-	{
-	  int digit = digit_to_number (*p++, b);
-	  if (digit < 0)
-	    break;
-	  v = v * b + digit;
-	}
-
-      val = make_fixnum_or_float (sign * v);
+      Lisp_Object val = string_to_float (p, 1);
+      if (FLOATP (val))
+	return val;
     }
 
-  return val;
+  n = strtol (p, NULL, b);
+  if (FIXNUM_OVERFLOW_P (n))
+    xsignal (Qoverflow_error, list1 (string));
+  return make_number (n);
 }
 
 
diff --git a/src/lisp.h b/src/lisp.h
index 581835dd32b..6080007c780 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
   } while (0)
 extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                   Lisp_Object *, Lisp_Object);
-extern int isfloat_string (const char *, int);
+Lisp_Object string_to_float (char const *, int);
 extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                          Lisp_Object);
 extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c
index d32f0b6a7e2..776d4ced7f2 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -3006,85 +3006,32 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)
 	if (!quoted && !uninterned_symbol)
 	  {
 	    register char *p1;
+	    Lisp_Object result;
 	    p1 = read_buffer;
 	    if (*p1 == '+' || *p1 == '-') p1++;
 	    /* Is it an integer? */
-	    if (p1 != p)
+	    if ('0' <= *p1 && *p1 <= '9')
 	      {
-		while (p1 != p && (c = *p1) >= '0' && c <= '9') p1++;
+		do
+		  p1++;
+		while ('0' <= *p1 && *p1 <= '9');
+
 		/* Integers can have trailing decimal points.  */
-		if (p1 > read_buffer && p1 < p && *p1 == '.') p1++;
+		p1 += (*p1 == '.');
 		if (p1 == p)
-		  /* It is an integer. */
 		  {
-		    if (p1[-1] == '.')
-		      p1[-1] = '\0';
-		    {
-		      /* EMACS_INT n = atol (read_buffer); */
-		      char *endptr = NULL;
-		      EMACS_INT n = (errno = 0,
-				     strtol (read_buffer, &endptr, 10));
-		      if (errno == ERANGE && endptr)
-			{
-			  Lisp_Object args
-			    = Fcons (make_string (read_buffer,
-						  endptr - read_buffer),
-				     Qnil);
-			  xsignal (Qoverflow_error, args);
-			}
-		      return make_fixnum_or_float (n);
-		    }
+		    /* It is an integer. */
+		    EMACS_INT n = strtol (read_buffer, NULL, 10);
+		    if (FIXNUM_OVERFLOW_P (n))
+		      xsignal (Qoverflow_error,
+			       list1 (build_string (read_buffer)));
+		    return make_number (n);
 		  }
 	      }
-	    if (isfloat_string (read_buffer, 0))
-	      {
-		/* Compute NaN and infinities using 0.0 in a variable,
-		   to cope with compilers that think they are smarter
-		   than we are.  */
-		double zero = 0.0;
-
-		double value;
-
-		/* Negate the value ourselves.  This treats 0, NaNs,
-		   and infinity properly on IEEE floating point hosts,
-		   and works around a common bug where atof ("-0.0")
-		   drops the sign.  */
-		int negative = read_buffer[0] == '-';
-
-		/* The only way p[-1] can be 'F' or 'N', after isfloat_string
-		   returns 1, is if the input ends in e+INF or e+NaN.  */
-		switch (p[-1])
-		  {
-		  case 'F':
-		    value = 1.0 / zero;
-		    break;
-		  case 'N':
-		    value = zero / zero;
 
-		    /* If that made a "negative" NaN, negate it.  */
-
-		    {
-		      int i;
-		      union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
-
-		      u_data.d = value;
-		      u_minus_zero.d = - 0.0;
-		      for (i = 0; i < sizeof (double); i++)
-			if (u_data.c[i] & u_minus_zero.c[i])
-			  {
-			    value = - value;
-			    break;
-			  }
-		    }
-		    /* Now VALUE is a positive NaN.  */
-		    break;
-		  default:
-		    value = atof (read_buffer + negative);
-		    break;
-		  }
-
-		return make_float (negative ? - value : value);
-	      }
+	    result = string_to_float (read_buffer, 0);
+	    if (FLOATP (result))
+	      return result;
 	  }
 	{
 	  Lisp_Object name, result;
@@ -3242,20 +3189,40 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
 }
 
 
+/* Return the length of the floating-point number that is the prefix of CP, or
+   zero if there is none.  */
+
 #define LEAD_INT 1
 #define DOT_CHAR 2
 #define TRAIL_INT 4
 #define E_CHAR 8
 #define EXP_INT 16
 
-int
-isfloat_string (const char *cp, int ignore_trailing)
+
+/* Convert CP to a floating point number.  Return a non-float value if CP does
+   not have valid floating point syntax.  If IGNORE_TRAILING is nonzero,
+   consider just the longest prefix of CP that has valid floating point
+   syntax.  */
+
+Lisp_Object
+string_to_float (char const *cp, int ignore_trailing)
 {
   int state;
   const char *start = cp;
 
+  /* Compute NaN and infinities using a variable, to cope with compilers that
+     think they are smarter than we are.  */
+  double zero = 0;
+
+  /* Negate the value ourselves.  This treats 0, NaNs, and infinity properly on
+     IEEE floating point hosts, and works around a formerly-common bug where
+     atof ("-0.0") drops the sign.  */
+  int negative = *cp == '-';
+
+  double value = 0;
+
   state = 0;
-  if (*cp == '+' || *cp == '-')
+  if (negative || *cp == '+')
     cp++;
 
   if (*cp >= '0' && *cp <= '9')
@@ -3295,21 +3262,43 @@ isfloat_string (const char *cp, int ignore_trailing)
     {
       state |= EXP_INT;
       cp += 3;
+      value = 1.0 / zero;
     }
   else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
     {
       state |= EXP_INT;
       cp += 3;
+      value = zero / zero;
+
+      /* If that made a "negative" NaN, negate it.  */
+      {
+	int i;
+	union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
+
+	u_data.d = value;
+	u_minus_zero.d = - 0.0;
+	for (i = 0; i < sizeof (double); i++)
+	  if (u_data.c[i] & u_minus_zero.c[i])
+	    {
+	      value = - value;
+	      break;
+	    }
+      }
+      /* Now VALUE is a positive NaN.  */
     }
 
-  return ((ignore_trailing
-	   || *cp == 0 || *cp == ' ' || *cp == '\t' || *cp == '\n'
-	   || *cp == '\r' || *cp == '\f')
-	  && (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
-	      || state == (DOT_CHAR|TRAIL_INT)
-	      || state == (LEAD_INT|E_CHAR|EXP_INT)
-	      || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
-	      || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)));
+  if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
+	 || state == (DOT_CHAR|TRAIL_INT)
+	 || state == (LEAD_INT|E_CHAR|EXP_INT)
+	 || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
+	 || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
+    return make_number (0); /* Any non-float value will do.  */
+
+  if (! value)
+    value = atof (start + negative);
+  if (negative)
+    value = - value;
+  return make_float (value);
 }
 
 
-- 
2.39.5