mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-02 08:42:48 +00:00
Use a better method of scaling by 2**k. Instead of adding to the
exponent bits of the reduced result, construct 2**k (hopefully in parallel with the construction of the reduced result) and multiply by it. This tends to be much faster if the construction of 2**k is actually in parallel, and might be faster even with no parallelism since adjustment of the exponent requires a read-modify-wrtite at an unfortunate time for pipelines. In some cases involving exp2* on amd64 (A64), this change saves about 40 cycles or 30%. I think it is inherently only about 12 cycles faster in these cases and the rest of the speedup is from partly-accidentally avoiding compiler pessimizations (the construction of 2**k is now manually scheduled for good results, and -O2 doesn't always mess this up). In most cases on amd64 (A64) and i386 (A64) the speedup is about 20 cycles. The worst case that I found is expf on ia64 where this change is a pessimization of about 10 cycles or 5%. The manual scheduling for plain exp[f] is harder and not as tuned. Details specific to expm1*: - the saving is closer to 12 cycles than to 40 for expm1* on i386 (A64). For some reason it is much larger for negative args. - also convert to __FBSDID().
This commit is contained in:
parent
626ac252ea
commit
a00672cff9
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=176082
@ -10,9 +10,8 @@
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$FreeBSD$";
|
||||
#endif
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/* expm1(x)
|
||||
* Returns exp(x)-1, the exponential of x minus 1.
|
||||
@ -130,7 +129,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
|
||||
double
|
||||
expm1(double x)
|
||||
{
|
||||
double y,hi,lo,c,t,e,hxs,hfx,r1;
|
||||
double y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
|
||||
int32_t k,xsb;
|
||||
u_int32_t hx;
|
||||
|
||||
@ -187,6 +186,7 @@ expm1(double x)
|
||||
e = hxs*((r1-t)/(6.0 - x*t));
|
||||
if(k==0) return x - (x*e-hxs); /* c is 0 */
|
||||
else {
|
||||
INSERT_WORDS(twopk,0x3ff00000+(k<<20),0); /* 2^k */
|
||||
e = (x*(e-c)-c);
|
||||
e -= hxs;
|
||||
if(k== -1) return 0.5*(x-e)-0.5;
|
||||
@ -194,26 +194,21 @@ expm1(double x)
|
||||
if(x < -0.25) return -2.0*(e-(x+0.5));
|
||||
else return one+2.0*(x-e);
|
||||
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
|
||||
u_int32_t high;
|
||||
y = one-(e-x);
|
||||
GET_HIGH_WORD(high,y);
|
||||
SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
|
||||
if (k == 1024) y = y*2.0*0x1p1023;
|
||||
else y = y*twopk;
|
||||
return y-one;
|
||||
}
|
||||
t = one;
|
||||
if(k<20) {
|
||||
u_int32_t high;
|
||||
SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */
|
||||
y = t-(e-x);
|
||||
GET_HIGH_WORD(high,y);
|
||||
SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
|
||||
y = y*twopk;
|
||||
} else {
|
||||
u_int32_t high;
|
||||
SET_HIGH_WORD(t,((0x3ff-k)<<20)); /* 2^-k */
|
||||
y = x-(e+t);
|
||||
y += one;
|
||||
GET_HIGH_WORD(high,y);
|
||||
SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
|
||||
y = y*twopk;
|
||||
}
|
||||
}
|
||||
return y;
|
||||
|
@ -13,9 +13,8 @@
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$FreeBSD$";
|
||||
#endif
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
@ -38,7 +37,7 @@ Q5 = -2.0109921195e-07; /* 0xb457edbb */
|
||||
float
|
||||
expm1f(float x)
|
||||
{
|
||||
float y,hi,lo,c,t,e,hxs,hfx,r1;
|
||||
float y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
|
||||
int32_t k,xsb;
|
||||
u_int32_t hx;
|
||||
|
||||
@ -92,6 +91,7 @@ expm1f(float x)
|
||||
e = hxs*((r1-t)/((float)6.0 - x*t));
|
||||
if(k==0) return x - (x*e-hxs); /* c is 0 */
|
||||
else {
|
||||
SET_FLOAT_WORD(twopk,0x3f800000+(k<<23)); /* 2^k */
|
||||
e = (x*(e-c)-c);
|
||||
e -= hxs;
|
||||
if(k== -1) return (float)0.5*(x-e)-(float)0.5;
|
||||
@ -99,26 +99,21 @@ expm1f(float x)
|
||||
if(x < (float)-0.25) return -(float)2.0*(e-(x+(float)0.5));
|
||||
else return one+(float)2.0*(x-e);
|
||||
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
|
||||
int32_t i;
|
||||
y = one-(e-x);
|
||||
GET_FLOAT_WORD(i,y);
|
||||
SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
|
||||
if (k == 128) y = y*2.0F*0x1p127F;
|
||||
else y = y*twopk;
|
||||
return y-one;
|
||||
}
|
||||
t = one;
|
||||
if(k<23) {
|
||||
int32_t i;
|
||||
SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */
|
||||
y = t-(e-x);
|
||||
GET_FLOAT_WORD(i,y);
|
||||
SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
|
||||
y = y*twopk;
|
||||
} else {
|
||||
int32_t i;
|
||||
SET_FLOAT_WORD(t,((0x7f-k)<<23)); /* 2^-k */
|
||||
y = x-(e+t);
|
||||
y += one;
|
||||
GET_FLOAT_WORD(i,y);
|
||||
SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
|
||||
y = y*twopk;
|
||||
}
|
||||
}
|
||||
return y;
|
||||
|
Loading…
Reference in New Issue
Block a user