Use a better method of scaling by 2**k. Instead of adding to the

exponent bits of the reduced result, construct 2**k (hopefully in parallel with the construction of the reduced result) and multiply by it. This tends to be much faster if the construction of 2**k is actually in parallel, and might be faster even with no parallelism since adjustment of the exponent requires a read-modify-wrtite at an unfortunate time for pipelines. In some cases involving exp2* on amd64 (A64), this change saves about 40 cycles or 30%. I think it is inherently only about 12 cycles faster in these cases and the rest of the speedup is from partly-accidentally avoiding compiler pessimizations (the construction of 2**k is now manually scheduled for good results, and -O2 doesn't always mess this up). In most cases on amd64 (A64) and i386 (A64) the speedup is about 20 cycles. The worst case that I found is expf on ia64 where this change is a pessimization of about 10 cycles or 5%. The manual scheduling for plain exp[f] is harder and not as tuned. Details specific to expm1*: - the saving is closer to 12 cycles than to 40 for expm1* on i386 (A64). For some reason it is much larger for negative args. - also convert to __FBSDID().
svn path=/head/; revision=176082
2024-12-02 08:42:48 +00:00 · 2008-02-07 09:42:19 +00:00 · 2008-02-07 09:42:19 +00:00 · a00672cff9 · 2020-12-20 02:59:44 +00:00
commit a00672cff9
parent 626ac252ea
2 changed files with 16 additions and 26 deletions
--- a/lib/msun/src/s_expm1.c
+++ b/lib/msun/src/s_expm1.c
@ -10,9 +10,8 @@
 * ====================================================
 */

-#ifndef lint
-static char rcsid[] = "$FreeBSD$";
-#endif
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");

 /* expm1(x)
 * Returns exp(x)-1, the exponential of x minus 1.
@ -130,7 +129,7 @@ Q5  =  -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
 double
 expm1(double x)
 {
-	double y,hi,lo,c,t,e,hxs,hfx,r1;
+	double y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
 	int32_t k,xsb;
 	u_int32_t hx;

@ -187,6 +186,7 @@ expm1(double x)
 	e  = hxs*((r1-t)/(6.0 - x*t));
 	if(k==0) return x - (x*e-hxs);		/* c is 0 */
 	else {
+	    INSERT_WORDS(twopk,0x3ff00000+(k<<20),0);	/* 2^k */
 	    e  = (x*(e-c)-c);
 	    e -= hxs;
 	    if(k== -1) return 0.5*(x-e)-0.5;
@ -194,26 +194,21 @@ expm1(double x)
 	       	if(x < -0.25) return -2.0*(e-(x+0.5));
 	       	else 	      return  one+2.0*(x-e);
 	    if (k <= -2 || k>56) {   /* suffice to return exp(x)-1 */
-	        u_int32_t high;
 	        y = one-(e-x);
-		GET_HIGH_WORD(high,y);
-		SET_HIGH_WORD(y,high+(k<<20));	/* add k to y's exponent */
+		if (k == 1024) y = y*2.0*0x1p1023;
+		else y = y*twopk;
 	        return y-one;
 	    }
 	    t = one;
 	    if(k<20) {
-	        u_int32_t high;
 	        SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k));  /* t=1-2^-k */
 	       	y = t-(e-x);
-		GET_HIGH_WORD(high,y);
-		SET_HIGH_WORD(y,high+(k<<20));	/* add k to y's exponent */
+		y = y*twopk;
 	   } else {
-	        u_int32_t high;
 		SET_HIGH_WORD(t,((0x3ff-k)<<20));	/* 2^-k */
 	       	y = x-(e+t);
 	       	y += one;
-		GET_HIGH_WORD(high,y);
-		SET_HIGH_WORD(y,high+(k<<20));	/* add k to y's exponent */
+		y = y*twopk;
 	    }
 	}
 	return y;
--- a/lib/msun/src/s_expm1f.c
+++ b/lib/msun/src/s_expm1f.c
@ -13,9 +13,8 @@
 * ====================================================
 */

-#ifndef lint
-static char rcsid[] = "$FreeBSD$";
-#endif
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");

 #include "math.h"
 #include "math_private.h"
@ -38,7 +37,7 @@ Q5  =  -2.0109921195e-07; /* 0xb457edbb */
 float
 expm1f(float x)
 {
-	float y,hi,lo,c,t,e,hxs,hfx,r1;
+	float y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
 	int32_t k,xsb;
 	u_int32_t hx;

@ -92,6 +91,7 @@ expm1f(float x)
 	e  = hxs*((r1-t)/((float)6.0 - x*t));
 	if(k==0) return x - (x*e-hxs);		/* c is 0 */
 	else {
+	    SET_FLOAT_WORD(twopk,0x3f800000+(k<<23));	/* 2^k */
 	    e  = (x*(e-c)-c);
 	    e -= hxs;
 	    if(k== -1) return (float)0.5*(x-e)-(float)0.5;
@ -99,26 +99,21 @@ expm1f(float x)
 	       	if(x < (float)-0.25) return -(float)2.0*(e-(x+(float)0.5));
 	       	else 	      return  one+(float)2.0*(x-e);
 	    if (k <= -2 || k>56) {   /* suffice to return exp(x)-1 */
-	        int32_t i;
 	        y = one-(e-x);
-		GET_FLOAT_WORD(i,y);
-		SET_FLOAT_WORD(y,i+(k<<23));	/* add k to y's exponent */
+		if (k == 128) y = y*2.0F*0x1p127F;
+		else y = y*twopk;
 	        return y-one;
 	    }
 	    t = one;
 	    if(k<23) {
-	        int32_t i;
 	        SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */
 	       	y = t-(e-x);
-		GET_FLOAT_WORD(i,y);
-		SET_FLOAT_WORD(y,i+(k<<23));	/* add k to y's exponent */
+		y = y*twopk;
 	   } else {
-	        int32_t i;
 		SET_FLOAT_WORD(t,((0x7f-k)<<23));	/* 2^-k */
 	       	y = x-(e+t);
 	       	y += one;
-		GET_FLOAT_WORD(i,y);
-		SET_FLOAT_WORD(y,i+(k<<23));	/* add k to y's exponent */
+		y = y*twopk;
 	    }
 	}
 	return y;