Treat large integers as floats in the Lisp reader and in string-to-number.

2024-11-27 07:37:33 +00:00 · 2011-04-20 23:03:09 -07:00 · 2011-04-20 23:03:09 -07:00 · 452f415013
commit 452f415013
parent 6703b2e490
4 changed files with 172 additions and 152 deletions
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,32 @@
+2011-04-21  Paul Eggert  <eggert@cs.ucla.edu>
+
+	Make the Lisp reader and string-to-float more consistent.
+	* data.c (atof): Remove decl; no longer used or needed.
+	(digit_to_number): Move to lread.c.
+	(Fstring_to_number): Use new string_to_number function, to be
+	consistent with how the Lisp reader treats infinities and NaNs.
+	Do not assume that floating-point numbers represent EMACS_INT
+	without losing information; this is not true on most 64-bit hosts.
+	Avoid double-rounding errors, by insisting on integers when
+	parsing non-base-10 numbers, as the documentation specifies.
+	* lisp.h (string_to_number): New decl, replacing ...
+	(isfloat_string): Remove.
+	* lread.c (read1): Do not accept +. and -. as integers; this
+	appears to have been a coding error.  Similarly, do not accept
+	strings like +-1e0 as floating point numbers.  Do not report
+	overflow for integer overflows unless the base is not 10 which
+	means we have no simple and reliable way to continue.
+	Break out the floating-point parsing into a new
+	function string_to_number, so that Fstring_to_number parses
+	floating point numbers consistently with the Lisp reader.
+	(digit_to_number): Moved here from data.c.  Make it static inline.
+	(E_CHAR, EXP_INT): Remove, replacing with ...
+	(E_EXP): New macro, to solve the "1.0e+" problem mentioned below.
+	(string_to_number): New function, replacing isfloat_string.
+	This function checks for valid syntax and produces the resulting
+	Lisp float number too.  Rework it so that string-to-number
+	no longer mishandles examples like "1.0e+".
+
 2011-04-20  Paul Eggert  <eggert@cs.ucla.edu>

 	* textprop.c (set_text_properties_1): Rewrite for clarity,
@ -15,29 +44,6 @@
 	* alloc.c (overrun_check_malloc, overrun_check_realloc): Now static.
 	(overrun_check_free): Likewise.

-	Make the Lisp reader and string-to-float more consistent.
-	* data.c (atof): Remove decl; no longer used or needed.
-	(Fstring_to_number): Use new string_to_float function, to be
-	consistent with how the Lisp reader treats infinities and NaNs.
-	Do not assume that floating-point numbers represent EMACS_INT
-	without losing information; this is not true on most 64-bit hosts.
-	Avoid double-rounding errors, by insisting on integers when
-	parsing non-base-10 numbers, as the documentation specifies.
-	Report integer overflow instead of silently converting to
-	integers.
-	* lisp.h (string_to_float): New decl, replacing ...
-	(isfloat_string): Remove.
-	* lread.c (read1): Do not accept +. and -. as integers; this
-	appears to have been a coding error.  Similarly, do not accept
-	strings like +-1e0 as floating point numbers.  Do not report
-	overflow for some integer overflows and not others; instead,
-	report them all.  Break out the floating-point parsing into a new
-	function string_to_float, so that Fstring_to_number parses
-	floating point numbers consistently with the Lisp reader.
-	(string_to_float): New function, replacing isfloat_string.
-	This function checks for valid syntax and produces the resulting
-	Lisp float number too.
-
 	* alloc.c (SDATA_SIZE) [!GC_CHECK_STRING_BYTES]: Avoid runtime check
 	in the common case where SDATA_DATA_OFFSET is a multiple of Emacs
 	word size.
--- a/src/data.c
+++ b/src/data.c
@ -2374,26 +2374,6 @@ NUMBER may be an integer or a floating point number.  */)
  return build_string (buffer);
 }

-INLINE static int
-digit_to_number (int character, int base)
-{
-  int digit;
-
-  if (character >= '0' && character <= '9')
-    digit = character - '0';
-  else if (character >= 'a' && character <= 'z')
-    digit = character - 'a' + 10;
-  else if (character >= 'A' && character <= 'Z')
-    digit = character - 'A' + 10;
-  else
-    return -1;
-
-  if (digit >= base)
-    return -1;
-  else
-    return digit;
-}
-
 DEFUN ("string-to-number", Fstring_to_number, Sstring_to_number, 1, 2, 0,
       doc: /* Parse STRING as a decimal number and return the number.
 This parses both integers and floating point numbers.
@ -2406,7 +2386,7 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 {
  register char *p;
  register int b;
-  EMACS_INT n;
+  Lisp_Object val;

  CHECK_STRING (string);

@ -2420,25 +2400,13 @@ If the base used is not 10, STRING is always parsed as integer.  */)
 	xsignal1 (Qargs_out_of_range, base);
    }

-  /* Skip any whitespace at the front of the number.  Typically strtol does
-     this anyway, so we might as well be consistent.  */
  p = SSDATA (string);
  while (*p == ' ' || *p == '\t')
    p++;

-  if (b == 10)
-    {
-      Lisp_Object val = string_to_float (p, 1);
-      if (FLOATP (val))
-	return val;
-    }
-
-  n = strtol (p, NULL, b);
-  if (FIXNUM_OVERFLOW_P (n))
-    xsignal (Qoverflow_error, list1 (string));
-  return make_number (n);
+  val = string_to_number (p, b, 1);
+  return NILP (val) ? make_number (0) : val;
 }
-

 enum arithop
  {
--- a/src/lisp.h
+++ b/src/lisp.h
@ -2782,7 +2782,7 @@ extern Lisp_Object oblookup (Lisp_Object, const char *, EMACS_INT, EMACS_INT);
  } while (0)
 extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                  Lisp_Object *, Lisp_Object);
-Lisp_Object string_to_float (char const *, int);
+Lisp_Object string_to_number (char const *, int, int);
 extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                         Lisp_Object);
 extern void dir_warning (const char *, Lisp_Object);
--- a/src/lread.c
+++ b/src/lread.c
@ -3005,32 +3005,8 @@ read1 (register Lisp_Object readcharfun, int *pch, int first_in_list)

 	if (!quoted && !uninterned_symbol)
 	  {
-	    register char *p1;
-	    Lisp_Object result;
-	    p1 = read_buffer;
-	    if (*p1 == '+' || *p1 == '-') p1++;
-	    /* Is it an integer? */
-	    if ('0' <= *p1 && *p1 <= '9')
-	      {
-		do
-		  p1++;
-		while ('0' <= *p1 && *p1 <= '9');
-
-		/* Integers can have trailing decimal points.  */
-		p1 += (*p1 == '.');
-		if (p1 == p)
-		  {
-		    /* It is an integer. */
-		    EMACS_INT n = strtol (read_buffer, NULL, 10);
-		    if (FIXNUM_OVERFLOW_P (n))
-		      xsignal (Qoverflow_error,
-			       list1 (build_string (read_buffer)));
-		    return make_number (n);
-		  }
-	      }
-
-	    result = string_to_float (read_buffer, 0);
-	    if (FLOATP (result))
+	    Lisp_Object result = string_to_number (read_buffer, 10, 0);
+	    if (! NILP (result))
 	      return result;
 	  }
 	{
@ -3189,23 +3165,44 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
 }


+static inline int
+digit_to_number (int character, int base)
+{
+  int digit;
+
+  if ('0' <= character && character <= '9')
+    digit = character - '0';
+  else if ('a' <= character && character <= 'z')
+    digit = character - 'a' + 10;
+  else if ('A' <= character && character <= 'Z')
+    digit = character - 'A' + 10;
+  else
+    return -1;
+
+  return digit < base ? digit : -1;
+}
+
 #define LEAD_INT 1
 #define DOT_CHAR 2
 #define TRAIL_INT 4
-#define E_CHAR 8
-#define EXP_INT 16
+#define E_EXP 16


-/* Convert CP to a floating point number.  Return a non-float value if CP does
-   not have valid floating point syntax.  If IGNORE_TRAILING is nonzero,
-   consider just the longest prefix of CP that has valid floating point
-   syntax.  */
+/* Convert STRING to a number, assuming base BASE.  Return a fixnum if CP has
+   integer syntax and fits in a fixnum, else return the nearest float if CP has
+   either floating point or integer syntax and BASE is 10, else return nil.  If
+   IGNORE_TRAILING is nonzero, consider just the longest prefix of CP that has
+   valid floating point syntax.  Signal an overflow if BASE is not 10 and the
+   number has integer syntax but does not fit.  */

 Lisp_Object
-string_to_float (char const *cp, int ignore_trailing)
+string_to_number (char const *string, int base, int ignore_trailing)
 {
  int state;
-  const char *start = cp;
+  char const *cp = string;
+  int leading_digit;
+  int float_syntax = 0;
+  double value = 0;

  /* Compute NaN and infinities using a variable, to cope with compilers that
     think they are smarter than we are.  */
@ -3216,88 +3213,137 @@ string_to_float (char const *cp, int ignore_trailing)
     atof ("-0.0") drops the sign.  */
  int negative = *cp == '-';

-  double value = 0;
+  int signedp = negative || *cp == '+';
+  cp += signedp;

  state = 0;
-  if (negative || *cp == '+')
-    cp++;

-  if (*cp >= '0' && *cp <= '9')
+  leading_digit = digit_to_number (*cp, base);
+  if (0 <= leading_digit)
    {
      state |= LEAD_INT;
-      while (*cp >= '0' && *cp <= '9')
-	cp++;
+      do
+	++cp;
+      while (0 <= digit_to_number (*cp, base));
    }
+
  if (*cp == '.')
    {
      state |= DOT_CHAR;
      cp++;
    }
-  if (*cp >= '0' && *cp <= '9')
-    {
-      state |= TRAIL_INT;
-      while (*cp >= '0' && *cp <= '9')
-	cp++;
-    }
-  if (*cp == 'e' || *cp == 'E')
-    {
-      state |= E_CHAR;
-      cp++;
-      if (*cp == '+' || *cp == '-')
-	cp++;
-    }

-  if (*cp >= '0' && *cp <= '9')
+  if (base == 10)
    {
-      state |= EXP_INT;
-      while (*cp >= '0' && *cp <= '9')
-	cp++;
-    }
-  else if (cp == start)
-    ;
-  else if (cp[-1] == '+' && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = 1.0 / zero;
-    }
-  else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
-    {
-      state |= EXP_INT;
-      cp += 3;
-      value = zero / zero;
-
-      /* If that made a "negative" NaN, negate it.  */
-      {
-	int i;
-	union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
-
-	u_data.d = value;
-	u_minus_zero.d = - 0.0;
-	for (i = 0; i < sizeof (double); i++)
-	  if (u_data.c[i] & u_minus_zero.c[i])
+      if ('0' <= *cp && *cp <= '9')
+	{
+	  state |= TRAIL_INT;
+	  do
+	    cp++;
+	  while ('0' <= *cp && *cp <= '9');
+	}
+      if (*cp == 'e' || *cp == 'E')
+	{
+	  char const *ecp = cp;
+	  cp++;
+	  if (*cp == '+' || *cp == '-')
+	    cp++;
+	  if ('0' <= *cp && *cp <= '9')
 	    {
-	      value = - value;
+	      state |= E_EXP;
+	      do
+		cp++;
+	      while ('0' <= *cp && *cp <= '9');
+	    }
+	  else if (cp[-1] == '+'
+		   && cp[0] == 'I' && cp[1] == 'N' && cp[2] == 'F')
+	    {
+	      state |= E_EXP;
+	      cp += 3;
+	      value = 1.0 / zero;
+	    }
+	  else if (cp[-1] == '+'
+		   && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
+	    {
+	      state |= E_EXP;
+	      cp += 3;
+	      value = zero / zero;
+
+	      /* If that made a "negative" NaN, negate it.  */
+	      {
+		int i;
+		union { double d; char c[sizeof (double)]; }
+		  u_data, u_minus_zero;
+		u_data.d = value;
+		u_minus_zero.d = -0.0;
+		for (i = 0; i < sizeof (double); i++)
+		  if (u_data.c[i] & u_minus_zero.c[i])
+		    {
+		      value = -value;
+		      break;
+		    }
+	      }
+	      /* Now VALUE is a positive NaN.  */
+	    }
+	  else
+	    cp = ecp;
+	}
+
+      float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
+		      || state == (LEAD_INT|E_EXP));
+    }
+
+  /* Return nil if the number uses invalid syntax.  If IGNORE_TRAILING, accept
+     any prefix that matches.  Otherwise, the entire string must match.  */
+  if (! (ignore_trailing
+	 ? ((state & LEAD_INT) != 0 || float_syntax)
+	 : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
+    return Qnil;
+
+  /* If the number does not use float syntax, and fits into a fixnum, return
+     the fixnum.  */
+  if (0 <= leading_digit && ! float_syntax)
+    {
+      /* Convert string to EMACS_INT.  Do not use strtol, to avoid assuming
+	 that EMACS_INT is no wider than 'long', and because when BASE is 16
+	 strtol might accept numbers like "0x1" that are not allowed here.  */
+      EMACS_INT n = leading_digit;
+      EMACS_INT abs_bound =
+	(negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM);
+      EMACS_INT abs_bound_over_base = abs_bound / base;
+
+      for (cp = string + signedp + 1; ; cp++)
+	{
+	  int d = digit_to_number (*cp, base);
+	  if (d < 0)
+	    {
+	      if (n <= abs_bound)
+		return make_number (negative ? -n : n);
 	      break;
 	    }
-      }
-      /* Now VALUE is a positive NaN.  */
+	  if (abs_bound_over_base < n)
+	    break;
+	  n = base * n + d;
+	}
+
+      /* Unfortunately there's no simple and reliable way to convert
+	 non-base-10 to floating point.  */
+      if (base != 10)
+	xsignal (Qoverflow_error, list1 (build_string (string)));
    }

-  if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
-	 || state == (DOT_CHAR|TRAIL_INT)
-	 || state == (LEAD_INT|E_CHAR|EXP_INT)
-	 || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
-	 || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
-    return make_number (0); /* Any non-float value will do.  */
-
+  /* Either the number uses float syntax, or it does not fit into a fixnum.
+     Convert it from string to floating point, unless the value is already
+     known because it is an infinity or a NAN.  */
  if (! value)
-    value = atof (start + negative);
+    value = atof (string + signedp);
+
  if (negative)
-    value = - value;
+    value = -value;
  return make_float (value);
 }

+

 static Lisp_Object
 read_vector (Lisp_Object readcharfun, int bytecodeflag)