Replace hand-crafted naive byte-by-byte zero block detection routine

with macro based around memcmp(). The latter is expected to be some
8 times faster on a modern 64-bit architectures.

In practice, throughput of doing conv=sparse from /dev/zero to /dev/null
went up some 5-fold here from 1.9GB/sec to 9.7GB/sec with this change
(bs=128k).

MFC after:	2 weeks
This commit is contained in:
Maxim Sobolev 2018-11-29 19:28:01 +00:00
parent 7d2b0bd7d7
commit dead7b5e47
2 changed files with 7 additions and 7 deletions

View File

@ -511,7 +511,7 @@ void
dd_out(int force) dd_out(int force)
{ {
u_char *outp; u_char *outp;
size_t cnt, i, n; size_t cnt, n;
ssize_t nw; ssize_t nw;
static int warned; static int warned;
int sparse; int sparse;
@ -544,12 +544,8 @@ dd_out(int force)
do { do {
sparse = 0; sparse = 0;
if (ddflags & C_SPARSE) { if (ddflags & C_SPARSE) {
sparse = 1; /* Is buffer sparse? */ /* Is buffer sparse? */
for (i = 0; i < cnt; i++) sparse = BISZERO(outp, cnt);
if (outp[i] != 0) {
sparse = 0;
break;
}
} }
if (sparse && !force) { if (sparse && !force) {
pending += cnt; pending += cnt;

View File

@ -103,3 +103,7 @@ typedef struct {
#define C_PROGRESS 0x40000000 #define C_PROGRESS 0x40000000
#define C_PARITY (C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET) #define C_PARITY (C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET)
#define BISZERO(p, s) ((s) > 0 && *((const char *)p) == 0 && !memcmp( \
(const void *)(p), (const void *) \
((const char *)p + 1), (s) - 1))