mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-08 13:28:05 +00:00
5e3934b15a
Apply the following automated changes to try to eliminate no-longer-needed sys/cdefs.h includes as well as now-empty blank lines in a row. Remove /^#if.*\n#endif.*\n#include\s+<sys/cdefs.h>.*\n/ Remove /\n+#include\s+<sys/cdefs.h>.*\n+#if.*\n#endif.*\n+/ Remove /\n+#if.*\n#endif.*\n+/ Remove /^#if.*\n#endif.*\n/ Remove /\n+#include\s+<sys/cdefs.h>\n#include\s+<sys/types.h>/ Remove /\n+#include\s+<sys/cdefs.h>\n#include\s+<sys/param.h>/ Remove /\n+#include\s+<sys/cdefs.h>\n#include\s+<sys/capsicum.h>/ Sponsored by: Netflix
503 lines
12 KiB
C
503 lines
12 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/endian.h>
|
|
#include <sys/param.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/uio.h>
|
|
#include <netinet/in.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <err.h>
|
|
#include <fcntl.h>
|
|
#include <pthread.h>
|
|
#include <signal.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#include "mkuzip.h"
|
|
#include "mkuz_cloop.h"
|
|
#include "mkuz_blockcache.h"
|
|
#include "mkuz_lzma.h"
|
|
#include "mkuz_zlib.h"
|
|
#include "mkuz_zstd.h"
|
|
#include "mkuz_blk.h"
|
|
#include "mkuz_cfg.h"
|
|
#include "mkuz_conveyor.h"
|
|
#include "mkuz_format.h"
|
|
#include "mkuz_fqueue.h"
|
|
#include "mkuz_time.h"
|
|
#include "mkuz_insize.h"
|
|
|
|
#define DEFAULT_CLSTSIZE 16384
|
|
|
|
enum UZ_ALGORITHM {
|
|
UZ_ZLIB = 0,
|
|
UZ_LZMA,
|
|
UZ_ZSTD,
|
|
UZ_INVALID
|
|
};
|
|
|
|
static const struct mkuz_format uzip_fmts[] = {
|
|
[UZ_ZLIB] = {
|
|
.option = "zlib",
|
|
.magic = CLOOP_MAGIC_ZLIB,
|
|
.default_sufx = DEFAULT_SUFX_ZLIB,
|
|
.f_compress_bound = mkuz_zlib_cbound,
|
|
.f_init = mkuz_zlib_init,
|
|
.f_compress = mkuz_zlib_compress,
|
|
},
|
|
[UZ_LZMA] = {
|
|
.option = "lzma",
|
|
.magic = CLOOP_MAGIC_LZMA,
|
|
.default_sufx = DEFAULT_SUFX_LZMA,
|
|
.f_compress_bound = mkuz_lzma_cbound,
|
|
.f_init = mkuz_lzma_init,
|
|
.f_compress = mkuz_lzma_compress,
|
|
},
|
|
[UZ_ZSTD] = {
|
|
.option = "zstd",
|
|
.magic = CLOOP_MAGIC_ZSTD,
|
|
.default_sufx = DEFAULT_SUFX_ZSTD,
|
|
.f_compress_bound = mkuz_zstd_cbound,
|
|
.f_init = mkuz_zstd_init,
|
|
.f_compress = mkuz_zstd_compress,
|
|
},
|
|
};
|
|
|
|
static struct mkuz_blk *readblock(int, u_int32_t);
|
|
static void usage(void) __dead2;
|
|
static void cleanup(void);
|
|
|
|
static char *cleanfile = NULL;
|
|
|
|
static int
|
|
cmp_blkno(const struct mkuz_blk *bp, void *p)
|
|
{
|
|
uint32_t *ap;
|
|
|
|
ap = (uint32_t *)p;
|
|
|
|
return (bp->info.blkno == *ap);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
struct mkuz_cfg cfs;
|
|
char *oname;
|
|
uint64_t *toc;
|
|
int i, io, opt, tmp;
|
|
struct {
|
|
int en;
|
|
FILE *f;
|
|
} summary;
|
|
struct iovec iov[2];
|
|
uint64_t offset, last_offset;
|
|
struct cloop_header hdr;
|
|
struct mkuz_conveyor *cvp;
|
|
struct mkuz_blk_info *chit;
|
|
size_t ncpusz, ncpu, magiclen;
|
|
double st, et;
|
|
enum UZ_ALGORITHM comp_alg;
|
|
int comp_level;
|
|
|
|
st = getdtime();
|
|
|
|
ncpusz = sizeof(size_t);
|
|
if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
|
|
ncpu = 1;
|
|
} else if (ncpu > MAX_WORKERS_AUTO) {
|
|
ncpu = MAX_WORKERS_AUTO;
|
|
}
|
|
|
|
memset(&hdr, 0, sizeof(hdr));
|
|
cfs.blksz = DEFAULT_CLSTSIZE;
|
|
oname = NULL;
|
|
cfs.verbose = 0;
|
|
cfs.no_zcomp = 0;
|
|
cfs.en_dedup = 0;
|
|
summary.en = 0;
|
|
summary.f = stderr;
|
|
comp_alg = UZ_ZLIB;
|
|
comp_level = USE_DEFAULT_LEVEL;
|
|
cfs.nworkers = ncpu;
|
|
struct mkuz_blk *iblk, *oblk;
|
|
|
|
while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
|
|
switch(opt) {
|
|
case 'A':
|
|
for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
|
|
if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
|
|
break;
|
|
}
|
|
if (tmp == UZ_INVALID)
|
|
errx(1, "invalid algorithm specified: %s",
|
|
optarg);
|
|
/* Not reached */
|
|
comp_alg = tmp;
|
|
break;
|
|
case 'C':
|
|
comp_level = atoi(optarg);
|
|
break;
|
|
case 'o':
|
|
oname = optarg;
|
|
break;
|
|
|
|
case 's':
|
|
tmp = atoi(optarg);
|
|
if (tmp <= 0) {
|
|
errx(1, "invalid cluster size specified: %s",
|
|
optarg);
|
|
/* Not reached */
|
|
}
|
|
cfs.blksz = tmp;
|
|
break;
|
|
|
|
case 'v':
|
|
cfs.verbose = 1;
|
|
break;
|
|
|
|
case 'Z':
|
|
cfs.no_zcomp = 1;
|
|
break;
|
|
|
|
case 'd':
|
|
cfs.en_dedup = 1;
|
|
break;
|
|
|
|
case 'L':
|
|
comp_alg = UZ_LZMA;
|
|
break;
|
|
|
|
case 'S':
|
|
summary.en = 1;
|
|
summary.f = stdout;
|
|
break;
|
|
|
|
case 'j':
|
|
tmp = atoi(optarg);
|
|
if (tmp <= 0) {
|
|
errx(1, "invalid number of compression threads"
|
|
" specified: %s", optarg);
|
|
/* Not reached */
|
|
}
|
|
cfs.nworkers = tmp;
|
|
break;
|
|
|
|
default:
|
|
usage();
|
|
/* Not reached */
|
|
}
|
|
}
|
|
argc -= optind;
|
|
argv += optind;
|
|
|
|
if (argc != 1) {
|
|
usage();
|
|
/* Not reached */
|
|
}
|
|
|
|
cfs.handler = &uzip_fmts[comp_alg];
|
|
|
|
magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
|
|
assert(magiclen < sizeof(hdr.magic));
|
|
|
|
if (cfs.en_dedup != 0) {
|
|
/*
|
|
* Dedupe requires a version 3 format. Don't downgrade newer
|
|
* formats.
|
|
*/
|
|
if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
|
|
hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
|
|
hdr.magic[CLOOP_OFS_COMPR] =
|
|
tolower(hdr.magic[CLOOP_OFS_COMPR]);
|
|
}
|
|
|
|
if (cfs.blksz % DEV_BSIZE != 0)
|
|
errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
|
|
|
|
cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
|
|
if (cfs.cbound_blksz > MAXPHYS)
|
|
errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
|
|
cfs.cbound_blksz, (size_t)MAXPHYS);
|
|
|
|
cfs.handler->f_init(&comp_level);
|
|
cfs.comp_level = comp_level;
|
|
|
|
cfs.iname = argv[0];
|
|
if (oname == NULL) {
|
|
asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
|
|
if (oname == NULL) {
|
|
err(1, "can't allocate memory");
|
|
/* Not reached */
|
|
}
|
|
}
|
|
|
|
signal(SIGHUP, exit);
|
|
signal(SIGINT, exit);
|
|
signal(SIGTERM, exit);
|
|
signal(SIGXCPU, exit);
|
|
signal(SIGXFSZ, exit);
|
|
atexit(cleanup);
|
|
|
|
cfs.fdr = open(cfs.iname, O_RDONLY);
|
|
if (cfs.fdr < 0) {
|
|
err(1, "open(%s)", cfs.iname);
|
|
/* Not reached */
|
|
}
|
|
cfs.isize = mkuz_get_insize(&cfs);
|
|
if (cfs.isize < 0) {
|
|
errx(1, "can't determine input image size");
|
|
/* Not reached */
|
|
}
|
|
hdr.nblocks = cfs.isize / cfs.blksz;
|
|
if ((cfs.isize % cfs.blksz) != 0) {
|
|
if (cfs.verbose != 0)
|
|
fprintf(stderr, "file size is not multiple "
|
|
"of %d, padding data\n", cfs.blksz);
|
|
hdr.nblocks++;
|
|
}
|
|
toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
|
|
|
|
/*
|
|
* Initialize last+1 entry with non-heap trash. If final padding is
|
|
* added later, it may or may not be overwritten with an offset
|
|
* representing the length of the final compressed block. If not,
|
|
* initialize to a defined value.
|
|
*/
|
|
toc[hdr.nblocks] = 0;
|
|
|
|
cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
|
|
S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
|
|
if (cfs.fdw < 0) {
|
|
err(1, "open(%s)", oname);
|
|
/* Not reached */
|
|
}
|
|
cleanfile = oname;
|
|
|
|
/* Prepare header that we will write later when we have index ready. */
|
|
iov[0].iov_base = (char *)&hdr;
|
|
iov[0].iov_len = sizeof(hdr);
|
|
iov[1].iov_base = (char *)toc;
|
|
iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
|
|
offset = iov[0].iov_len + iov[1].iov_len;
|
|
|
|
/* Reserve space for header */
|
|
lseek(cfs.fdw, offset, SEEK_SET);
|
|
|
|
if (cfs.verbose != 0) {
|
|
fprintf(stderr, "data size %ju bytes, number of clusters "
|
|
"%u, index length %zu bytes\n", cfs.isize,
|
|
hdr.nblocks, iov[1].iov_len);
|
|
}
|
|
|
|
cvp = mkuz_conveyor_ctor(&cfs);
|
|
|
|
last_offset = 0;
|
|
iblk = oblk = NULL;
|
|
for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
|
|
iblk = readblock(cfs.fdr, cfs.blksz);
|
|
mkuz_fqueue_enq(cvp->wrk_queue, iblk);
|
|
if (iblk != MKUZ_BLK_EOF &&
|
|
(i < (cfs.nworkers * ITEMS_PER_WORKER))) {
|
|
continue;
|
|
}
|
|
drain:
|
|
oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
|
|
assert(oblk->info.blkno == (unsigned)io);
|
|
oblk->info.offset = offset;
|
|
chit = NULL;
|
|
if (cfs.en_dedup != 0 && oblk->info.len > 0) {
|
|
chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
|
|
/*
|
|
* There should be at least one non-empty block
|
|
* between us and the backref'ed offset, otherwise
|
|
* we won't be able to parse that sequence correctly
|
|
* as it would be indistinguishible from another
|
|
* empty block.
|
|
*/
|
|
if (chit != NULL && chit->offset == last_offset) {
|
|
chit = NULL;
|
|
}
|
|
}
|
|
if (chit != NULL) {
|
|
toc[io] = htobe64(chit->offset);
|
|
oblk->info.len = 0;
|
|
} else {
|
|
if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
|
|
oblk->info.len) < 0) {
|
|
err(1, "write(%s)", oname);
|
|
/* Not reached */
|
|
}
|
|
toc[io] = htobe64(offset);
|
|
last_offset = offset;
|
|
offset += oblk->info.len;
|
|
}
|
|
if (cfs.verbose != 0) {
|
|
fprintf(stderr, "cluster #%d, in %u bytes, "
|
|
"out len=%lu offset=%lu", io, cfs.blksz,
|
|
(u_long)oblk->info.len, (u_long)be64toh(toc[io]));
|
|
if (chit != NULL) {
|
|
fprintf(stderr, " (backref'ed to #%d)",
|
|
chit->blkno);
|
|
}
|
|
fprintf(stderr, "\n");
|
|
}
|
|
free(oblk);
|
|
io += 1;
|
|
if (iblk == MKUZ_BLK_EOF) {
|
|
if (io < i)
|
|
goto drain;
|
|
/* Last block, see if we need to add some padding */
|
|
if ((offset % DEV_BSIZE) == 0)
|
|
continue;
|
|
oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
|
|
oblk->info.blkno = io;
|
|
oblk->info.len = oblk->alen;
|
|
if (cfs.verbose != 0) {
|
|
fprintf(stderr, "padding data with %lu bytes "
|
|
"so that file size is multiple of %d\n",
|
|
(u_long)oblk->alen, DEV_BSIZE);
|
|
}
|
|
mkuz_fqueue_enq(cvp->results, oblk);
|
|
goto drain;
|
|
}
|
|
}
|
|
|
|
close(cfs.fdr);
|
|
|
|
if (cfs.verbose != 0 || summary.en != 0) {
|
|
et = getdtime();
|
|
fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
|
|
"bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
|
|
(long long)(cfs.isize - offset),
|
|
100.0 * (long long)(cfs.isize - offset) /
|
|
(float)cfs.isize, (float)cfs.isize / (et - st));
|
|
}
|
|
|
|
/* Convert to big endian */
|
|
hdr.blksz = htonl(cfs.blksz);
|
|
hdr.nblocks = htonl(hdr.nblocks);
|
|
/* Write headers into pre-allocated space */
|
|
lseek(cfs.fdw, 0, SEEK_SET);
|
|
if (writev(cfs.fdw, iov, 2) < 0) {
|
|
err(1, "writev(%s)", oname);
|
|
/* Not reached */
|
|
}
|
|
cleanfile = NULL;
|
|
close(cfs.fdw);
|
|
|
|
exit(0);
|
|
}
|
|
|
|
static struct mkuz_blk *
|
|
readblock(int fd, u_int32_t clstsize)
|
|
{
|
|
int numread;
|
|
struct mkuz_blk *rval;
|
|
static int blockcnt;
|
|
off_t cpos;
|
|
|
|
rval = mkuz_blk_ctor(clstsize);
|
|
|
|
rval->info.blkno = blockcnt;
|
|
blockcnt += 1;
|
|
cpos = lseek(fd, 0, SEEK_CUR);
|
|
if (cpos < 0) {
|
|
err(1, "readblock: lseek() failed");
|
|
/* Not reached */
|
|
}
|
|
rval->info.offset = cpos;
|
|
|
|
numread = read(fd, rval->data, clstsize);
|
|
if (numread < 0) {
|
|
err(1, "readblock: read() failed");
|
|
/* Not reached */
|
|
}
|
|
if (numread == 0) {
|
|
free(rval);
|
|
return MKUZ_BLK_EOF;
|
|
}
|
|
rval->info.len = numread;
|
|
return rval;
|
|
}
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
|
|
fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
|
|
"[-j ncompr] infile\n");
|
|
exit(1);
|
|
}
|
|
|
|
void *
|
|
mkuz_safe_malloc(size_t size)
|
|
{
|
|
void *retval;
|
|
|
|
retval = malloc(size);
|
|
if (retval == NULL) {
|
|
err(1, "can't allocate memory");
|
|
/* Not reached */
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
void *
|
|
mkuz_safe_zmalloc(size_t size)
|
|
{
|
|
void *retval;
|
|
|
|
retval = mkuz_safe_malloc(size);
|
|
bzero(retval, size);
|
|
return retval;
|
|
}
|
|
|
|
static void
|
|
cleanup(void)
|
|
{
|
|
|
|
if (cleanfile != NULL)
|
|
unlink(cleanfile);
|
|
}
|
|
|
|
int
|
|
mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
|
|
{
|
|
const u_char *mm;
|
|
|
|
mm = (const u_char *)memory;
|
|
return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
|
|
}
|