From dead7b5e47d14f6560c5be40e673664ba20c670f Mon Sep 17 00:00:00 2001 From: Maxim Sobolev Date: Thu, 29 Nov 2018 19:28:01 +0000 Subject: [PATCH] Replace hand-crafted naive byte-by-byte zero block detection routine with macro based around memcmp(). The latter is expected to be some 8 times faster on a modern 64-bit architectures. In practice, throughput of doing conv=sparse from /dev/zero to /dev/null went up some 5-fold here from 1.9GB/sec to 9.7GB/sec with this change (bs=128k). MFC after: 2 weeks --- bin/dd/dd.c | 10 +++------- bin/dd/dd.h | 4 ++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/dd/dd.c b/bin/dd/dd.c index ef186a8186c..46175fa4c8f 100644 --- a/bin/dd/dd.c +++ b/bin/dd/dd.c @@ -511,7 +511,7 @@ void dd_out(int force) { u_char *outp; - size_t cnt, i, n; + size_t cnt, n; ssize_t nw; static int warned; int sparse; @@ -544,12 +544,8 @@ dd_out(int force) do { sparse = 0; if (ddflags & C_SPARSE) { - sparse = 1; /* Is buffer sparse? */ - for (i = 0; i < cnt; i++) - if (outp[i] != 0) { - sparse = 0; - break; - } + /* Is buffer sparse? */ + sparse = BISZERO(outp, cnt); } if (sparse && !force) { pending += cnt; diff --git a/bin/dd/dd.h b/bin/dd/dd.h index 0f7c680a6ee..8090252923f 100644 --- a/bin/dd/dd.h +++ b/bin/dd/dd.h @@ -103,3 +103,7 @@ typedef struct { #define C_PROGRESS 0x40000000 #define C_PARITY (C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET) + +#define BISZERO(p, s) ((s) > 0 && *((const char *)p) == 0 && !memcmp( \ + (const void *)(p), (const void *) \ + ((const char *)p + 1), (s) - 1))