From 264c3d8738c3580b596ecf5aaa1ce1b5367fc0da Mon Sep 17 00:00:00 2001 From: Peter Wemm Date: Thu, 19 Aug 1999 14:54:40 +0000 Subject: [PATCH] Undo my previous commit and do it differently. Break the ffs() etc macros into two parts - one to do the bsfl and the other to convert the result (base 0) to ffs()-like (base 1) in inline C. This enables the optimizer to be a lot smarter in certain cases, like where it knows that the argument is non-zero and we want ffs(known non zero arg) - 1. This appears to produce identical code to the old inline when the argument is unknown. --- sys/amd64/include/cpufunc.h | 41 ++++++++++++++++++++++++------------- sys/i386/include/cpufunc.h | 41 ++++++++++++++++++++++++------------- 2 files changed, 54 insertions(+), 28 deletions(-) diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 60928b844a7..9c72d0db532 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: cpufunc.h,v 1.88 1999/07/23 23:45:19 alc Exp $ + * $Id: cpufunc.h,v 1.89 1999/08/19 00:32:48 peter Exp $ */ /* @@ -82,40 +82,53 @@ enable_intr(void) __asm __volatile("sti"); } -#define HAVE_INLINE_FFS -#if __GNUC__ == 2 && __GNUC_MINOR__ > 8 -#define ffs(mask) __builtin_ffs(mask) -#else +#define HAVE_INLINE__BSFL + static __inline int -ffs(int mask) +__bsfl(int mask) { int result; + /* * bsfl turns out to be not all that slow on 486's. It can beaten * using a binary search to reduce to 4 bits and then a table lookup, * but only if the code is inlined and in the cache, and the code * is quite large so inlining it probably busts the cache. - * + */ + __asm __volatile("bsfl %0,%0" : "=r" (result) : "0" (mask)); + return (result); +} + +#define HAVE_INLINE_FFS + +static __inline int +ffs(int mask) +{ + /* * Note that gcc-2's builtin ffs would be used if we didn't declare * this inline or turn off the builtin. The builtin is faster but * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6. */ - __asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:" - : "=r" (result) : "0" (mask)); + return mask == 0 ? mask : __bsfl(mask) + 1; +} + +#define HAVE_INLINE__BSRL + +static __inline int +__bsrl(int mask) +{ + int result; + __asm __volatile("bsrl %0,%0" : "=r" (result) : "0" (mask)); return (result); } -#endif #define HAVE_INLINE_FLS static __inline int fls(int mask) { - int result; - __asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:" - : "=r" (result) : "0" (mask)); - return (result); + return mask == 0 ? mask : __bsrl(mask) + 1; } #if __GNUC__ < 2 diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 60928b844a7..9c72d0db532 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: cpufunc.h,v 1.88 1999/07/23 23:45:19 alc Exp $ + * $Id: cpufunc.h,v 1.89 1999/08/19 00:32:48 peter Exp $ */ /* @@ -82,40 +82,53 @@ enable_intr(void) __asm __volatile("sti"); } -#define HAVE_INLINE_FFS -#if __GNUC__ == 2 && __GNUC_MINOR__ > 8 -#define ffs(mask) __builtin_ffs(mask) -#else +#define HAVE_INLINE__BSFL + static __inline int -ffs(int mask) +__bsfl(int mask) { int result; + /* * bsfl turns out to be not all that slow on 486's. It can beaten * using a binary search to reduce to 4 bits and then a table lookup, * but only if the code is inlined and in the cache, and the code * is quite large so inlining it probably busts the cache. - * + */ + __asm __volatile("bsfl %0,%0" : "=r" (result) : "0" (mask)); + return (result); +} + +#define HAVE_INLINE_FFS + +static __inline int +ffs(int mask) +{ + /* * Note that gcc-2's builtin ffs would be used if we didn't declare * this inline or turn off the builtin. The builtin is faster but * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6. */ - __asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:" - : "=r" (result) : "0" (mask)); + return mask == 0 ? mask : __bsfl(mask) + 1; +} + +#define HAVE_INLINE__BSRL + +static __inline int +__bsrl(int mask) +{ + int result; + __asm __volatile("bsrl %0,%0" : "=r" (result) : "0" (mask)); return (result); } -#endif #define HAVE_INLINE_FLS static __inline int fls(int mask) { - int result; - __asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:" - : "=r" (result) : "0" (mask)); - return (result); + return mask == 0 ? mask : __bsrl(mask) + 1; } #if __GNUC__ < 2