1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-18 10:35:55 +00:00

This commit was generated by cvs2svn to compensate for changes in r92948,

which included commits to RCS files with non-trunk default branches.
This commit is contained in:
Mark Murray 2002-03-22 13:54:24 +00:00
commit 3c3f11563a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=92949
13 changed files with 2778 additions and 0 deletions

7
usr.bin/sort/Makefile Normal file
View File

@ -0,0 +1,7 @@
# $NetBSD: Makefile,v 1.3 2001/01/08 19:16:49 jdolecek Exp $
# from: @(#)Makefile 8.1 (Berkeley) 6/6/93
PROG= sort
SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
.include <bsd.prog.mk>

205
usr.bin/sort/append.c Normal file
View File

@ -0,0 +1,205 @@
/* $NetBSD: append.c,v 1.9 2001/01/18 20:59:43 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: append.c,v 1.9 2001/01/18 20:59:43 jdolecek Exp $");
__SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
#define OUTPUT { \
if ((n = cpos - ppos) > 1) { \
for (; ppos < cpos; ++ppos) \
*ppos -= odepth; \
ppos -= n; \
if (stable_sort) \
sradixsort(ppos, n, wts1, REC_D); \
else \
radixsort(ppos, n, wts1, REC_D); \
for (; ppos < cpos; ppos++) { \
prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
put(prec, fp); \
} \
} else put(prec, fp); \
}
/*
* copy sorted lines to output; check for uniqueness
*/
void
append(keylist, nelem, depth, fp, put, ftbl)
const u_char **keylist;
int nelem;
int depth;
FILE *fp;
put_func_t put;
struct field *ftbl;
{
u_char *wts, *wts1;
int n, odepth;
const u_char **cpos, **ppos, **lastkey;
const u_char *cend, *pend, *start;
const struct recheader *crec, *prec;
if (*keylist == '\0' && UNIQUE)
return;
wts1 = wts = ftbl[0].weights;
if ((!UNIQUE) && SINGL_FLD) {
if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
wts1 = Rascii;
else if (ftbl[0].flags & F)
wts1 = ascii;
odepth = depth;
}
lastkey = keylist + nelem;
depth += sizeof(TRECHEADER);
if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
ppos = keylist;
prec = (const RECHEADER *) (*ppos - depth);
if (UNIQUE)
put(prec, fp);
for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
crec = (const RECHEADER *) (*cpos - depth);
if (crec->length == prec->length) {
/*
* Set pend and cend so that trailing NUL and
* record separator is ignored.
*/
pend = (const u_char *) &prec->data + prec->length - 2;
cend = (const u_char *) &crec->data + crec->length - 2;
for (start = *cpos; cend >= start; cend--) {
if (wts[*cend] != wts[*pend])
break;
pend--;
}
if (pend + 1 != *ppos) {
if (!UNIQUE) {
OUTPUT;
} else
put(crec, fp);
ppos = cpos;
prec = crec;
}
} else {
if (!UNIQUE) {
OUTPUT;
} else
put(crec, fp);
ppos = cpos;
prec = crec;
}
}
if (!UNIQUE) { OUTPUT; }
} else if (UNIQUE) {
ppos = keylist;
prec = (const RECHEADER *) (*ppos - depth);
put(prec, fp);
for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
crec = (const RECHEADER *) (*cpos - depth);
if (crec->offset == prec->offset) {
/*
* Set pend and cend so that trailing NUL and
* record separator is ignored.
*/
pend = (const u_char *) &prec->data + prec->offset - 2;
cend = (const u_char *) &crec->data + crec->offset - 2;
for (start = *cpos; cend >= start; cend--) {
if (wts[*cend] != wts[*pend])
break;
pend--;
}
if (pend + 1 != *ppos) {
ppos = cpos;
prec = crec;
put(prec, fp);
}
} else {
ppos = cpos;
prec = crec;
put(prec, fp);
}
}
} else for (cpos = keylist; cpos < lastkey; cpos++) {
crec = (const RECHEADER *) (*cpos - depth);
put(crec, fp);
}
}
/*
* output the already sorted eol bin.
*/
void
rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
u_char *buffer;
int infl0;
int binno, nfiles;
FILE *outfp;
u_char *bufend;
{
RECHEADER *rec;
rec = (RECHEADER *) buffer;
if (!getnext(binno, infl0, NULL, nfiles,
(RECHEADER *) buffer, bufend, 0)) {
putline(rec, outfp);
while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
bufend, 0) == 0) {
if (!UNIQUE)
putline(rec, outfp);
}
}
}
/*
* append plain text--used after sorting the biggest bin.
*/
void
concat(a, b)
FILE *a, *b;
{
int nread;
char buffer[4096];
rewind(b);
while ((nread = fread(buffer, 1, 4096, b)) > 0)
EWRITE(buffer, 1, nread, a);
}

69
usr.bin/sort/extern.h Normal file
View File

@ -0,0 +1,69 @@
/* $NetBSD: extern.h,v 1.5 2001/01/12 19:31:25 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)extern.h 8.1 (Berkeley) 6/6/93
*/
void append __P((const u_char **, int, int, FILE *,
void (*)(const RECHEADER *, FILE *), struct field *));
void concat __P((FILE *, FILE *));
length_t enterkey __P((RECHEADER *, DBT *, int, struct field *));
void fixit __P((int *, char **));
void fldreset __P((struct field *));
FILE *ftmp __P((void));
void fmerge __P((int, int, struct filelist *, int,
get_func_t, FILE *, put_func_t, struct field *));
void fsort __P((int, int, int, struct filelist *, int, FILE *,
struct field *));
int geteasy __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
int getnext __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
int makekey __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
int makeline __P((int, int, struct filelist *,
int, RECHEADER *, u_char *, struct field *));
void merge __P((int, int, get_func_t, FILE *, put_func_t, struct field *));
void num_init __P((void));
void onepass __P((const u_char **, int, long, long *, u_char *, FILE *));
int optval __P((int, int));
void order __P((struct filelist *, get_func_t, struct field *));
void putline __P((const RECHEADER *, FILE *));
void putrec __P((const RECHEADER *, FILE *));
void rd_append __P((int, int, int, FILE *, u_char *, u_char *));
int setfield __P((const char *, struct field *, int));
void settables __P((int));

339
usr.bin/sort/fields.c Normal file
View File

@ -0,0 +1,339 @@
/* $NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Subroutines to generate sort keys. */
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $");
__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#define blancmange(ptr) { \
if (BLANK & d_mask[*(ptr)]) \
while (BLANK & d_mask[*(++(ptr))]); \
}
#define NEXTCOL(pos) { \
if (!SEP_FLAG) \
while (BLANK & l_d_mask[*(++pos)]); \
while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
}
static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
#define DECIMAL '.'
#define OFFSET 128
u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
u_char fnum[NBINS], rnum[NBINS];
/*
* constructs sort key with leading recheader, followed by the key,
* followed by the original line.
*/
length_t
enterkey(keybuf, line, size, fieldtable)
RECHEADER *keybuf; /* pointer to start of key */
DBT *line;
int size;
struct field fieldtable[];
{
int i;
u_char *l_d_mask;
u_char *lineend, *pos;
u_char *endkey, *keypos;
struct coldesc *clpos;
int col = 1;
struct field *ftpos;
l_d_mask = d_mask;
pos = (u_char *) line->data - 1;
lineend = (u_char *) line->data + line->size-1;
/* don't include rec_delimiter */
for (i = 0; i < ncols; i++) {
clpos = clist + i;
for (; (col < clpos->num) && (pos < lineend); col++) {
NEXTCOL(pos);
}
if (pos >= lineend)
break;
clpos->start = SEP_FLAG ? pos + 1 : pos;
NEXTCOL(pos);
clpos->end = pos;
col++;
if (pos >= lineend) {
clpos->end = lineend;
i++;
break;
}
}
for (; i <= ncols; i++)
clist[i].start = clist[i].end = lineend;
if (clist[0].start < (u_char *) line->data)
clist[0].start++;
keypos = keybuf->data;
endkey = (u_char *) keybuf + size - line->size;
for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
if ((keypos = enterfield(keypos, endkey, ftpos,
fieldtable->flags)) == NULL)
return (1);
keybuf->offset = keypos - keybuf->data;
keybuf->length = keybuf->offset + line->size;
if (keybuf->length + sizeof(TRECHEADER) > size) {
/* line too long for buffer */
return (1);
}
/*
* Make [s]radixsort() only sort by relevant part of key if:
* 1. we want to choose unique items by relevant field[s]
* 2. we want stable sort and so the items should be sorted only by
* the relevant field[s]
*/
if (UNIQUE || (stable_sort && keybuf->offset < line->size))
keypos[-1] = REC_D;
memcpy(keybuf->data + keybuf->offset, line->data, line->size);
return (0);
}
/*
* constructs a field (as defined by -k) within a key
*/
static u_char *
enterfield(tablepos, endkey, cur_fld, gflags)
struct field *cur_fld;
u_char *tablepos, *endkey;
int gflags;
{
u_char *start, *end, *lineend, *mask, *lweight;
struct column icol, tcol;
u_int flags;
u_int Rflag;
icol = cur_fld->icol;
tcol = cur_fld->tcol;
flags = cur_fld->flags;
start = icol.p->start;
lineend = clist[ncols].end;
if (flags & BI)
blancmange(start);
start += icol.indent;
start = min(start, lineend);
if (!tcol.num)
end = lineend;
else {
if (tcol.indent) {
end = tcol.p->start;
if (flags & BT)
blancmange(end);
end += tcol.indent;
end = min(end, lineend);
} else
end = tcol.p->end;
}
if (flags & N) {
Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
return number(tablepos, endkey, start, end, Rflag);
}
mask = cur_fld->mask;
lweight = cur_fld->weights;
for (; start < end; start++)
if (mask[*start]) {
if (*start <= 1) {
if (tablepos+2 >= endkey)
return (NULL);
*tablepos++ = lweight[1];
*tablepos++ = lweight[*start ? 2 : 1];
} else {
if (tablepos+1 >= endkey)
return (NULL);
*tablepos++ = lweight[*start];
}
}
*tablepos++ = lweight[0];
return (tablepos == endkey ? NULL : tablepos);
}
/* Uses the first bin to assign sign, expsign, 0, and the first
* 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
* When sorting in forward order:
* use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
* else use (0-99)->(2-102).
* If the exponent is >=61, use another byte for each additional 253
* in the exponent. Cutoff is at 567.
* To avoid confusing the exponent and the mantissa, use a field delimiter
* if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
* only time a field delimiter can come in that position.
* Reverse order is done analagously.
*/
static u_char *
number(pos, bufend, line, lineend, Rflag)
u_char *line, *pos, *bufend, *lineend;
int Rflag;
{
int or_sign, parity = 0;
int expincr = 1, exponent = -1;
int bite, expsign = 1, sign = 1;
u_char lastvalue, *nonzero, *tline, *C_TENS;
u_char *nweights;
if (Rflag)
nweights = rnum;
else
nweights = fnum;
if (pos > bufend - 8)
return (NULL);
/*
* or_sign sets the sort direction:
* (-r: +/-)(sign: +/-)(expsign: +/-)
*/
or_sign = sign ^ expsign ^ Rflag;
blancmange(line);
if (*line == '-') { /* set the sign */
or_sign ^= 1;
sign = 0;
line++;
}
/* eat initial zeroes */
for (; *line == '0' && line < lineend; line++)
;
/* calculate exponents < 0 */
if (*line == DECIMAL) {
exponent = 1;
while (*++line == '0' && line < lineend)
exponent++;
expincr = 0;
expsign = 0;
}
/* next character better be a digit */
if (*line < '1' || *line > '9' || line >= lineend) {
*pos++ = nweights[127];
return (pos);
}
if (expincr) {
for (tline = line-1; *++tline >= '0' &&
*tline <= '9' && tline < lineend;)
exponent++;
}
if (exponent > 567) {
*pos++ = nweights[sign ? (expsign ? 254 : 128)
: (expsign ? 0 : 126)];
warnx("exponent out of bounds");
return (pos);
}
bite = min(exponent, 61);
*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
: (expsign ? 64-bite : 64+bite)];
if (bite >= 61) {
do {
exponent -= bite;
bite = min(exponent, 254);
*pos++ = nweights[or_sign ? 254-bite : bite];
} while (bite == 254);
}
C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
for (; line < lineend; line++) {
if (*line >= '0' && *line <= '9') {
if (parity) {
*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
: *line);
if (pos == bufend)
return (NULL);
if (*line != '0' || lastvalue != '0')
nonzero = pos;
} else
lastvalue = *line;
parity ^= 1;
} else if(*line == DECIMAL) {
if(!expincr) /* a decimal already occurred once */
break;
expincr = 0;
} else
break;
}
if (parity && lastvalue != '0') {
*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
OFF_TENS[lastvalue] + '0';
} else
pos = nonzero;
if (pos > bufend-1)
return (NULL);
*pos++ = or_sign ? nweights[254] : nweights[0];
return (pos);
}
/* This forces a gap around the record delimiter
* Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
* rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
*/
void
num_init()
{
int i;
TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
OFF_TENS = TENS - '0';
OFF_NTENS = NEGTENS - '0';
for (i = 1; i < 10; i++) {
TENS[i] = TENS[i - 1] + 10;
NEGTENS[i] = NEGTENS[i - 1] - 10;
}
for (i = 0; i < REC_D; i++) {
fnum[i] = i;
rnum[255 - i] = i;
}
for (i = REC_D; i <255; i++) {
fnum[i] = i + 1;
rnum[255 - i] = i - 1;
}
}

372
usr.bin/sort/files.c Normal file
View File

@ -0,0 +1,372 @@
/* $NetBSD: files.c,v 1.16 2001/02/19 20:50:17 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#include "fsort.h"
#ifndef lint
__RCSID("$NetBSD: files.c,v 1.16 2001/02/19 20:50:17 jdolecek Exp $");
__SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <string.h>
static int seq __P((FILE *, DBT *, DBT *));
/*
* this is the subroutine for file management for fsort().
* It keeps the buffers for all temporary files.
*/
int
getnext(binno, infl0, filelist, nfiles, pos, end, dummy)
int binno, infl0;
struct filelist *filelist;
int nfiles;
RECHEADER *pos;
u_char *end;
struct field *dummy;
{
int i;
u_char *hp;
static size_t nleft = 0;
static int cnt = 0, flag = -1;
static u_char maxb = 0;
static FILE *fp;
if (nleft == 0) {
if (binno < 0) /* reset files. */ {
for (i = 0; i < nfiles; i++) {
rewind(fstack[infl0 + i].fp);
fstack[infl0 + i].max_o = 0;
}
flag = -1;
nleft = cnt = 0;
return (-1);
}
maxb = fstack[infl0].maxb;
for (; nleft == 0; cnt++) {
if (cnt >= nfiles) {
cnt = 0;
return (EOF);
}
fp = fstack[infl0 + cnt].fp;
fread(&nleft, sizeof(nleft), 1, fp);
if (binno < maxb)
fstack[infl0+cnt].max_o
+= sizeof(nleft) + nleft;
else if (binno == maxb) {
if (binno != fstack[infl0].lastb) {
fseek(fp, fstack[infl0+
cnt].max_o, SEEK_SET);
fread(&nleft, sizeof(nleft), 1, fp);
}
if (nleft == 0)
fclose(fp);
} else if (binno == maxb + 1) { /* skip a bin */
fseek(fp, nleft, SEEK_CUR);
fread(&nleft, sizeof(nleft), 1, fp);
flag = cnt;
}
}
}
if ((u_char *) pos > end - sizeof(TRECHEADER))
return (BUFFEND);
fread(pos, sizeof(TRECHEADER), 1, fp);
if (end - pos->data < pos->length) {
hp = ((u_char *)pos) + sizeof(TRECHEADER);
for (i = sizeof(TRECHEADER); i ; i--)
ungetc(*--hp, fp);
return (BUFFEND);
}
fread(pos->data, pos->length, 1, fp);
nleft -= pos->length + sizeof(TRECHEADER);
if (nleft == 0 && binno == fstack[infl0].maxb)
fclose(fp);
return (0);
}
/*
* this is called when there is no special key. It's only called
* in the first fsort pass.
*/
int
makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2)
int flno, top;
struct filelist *filelist;
int nfiles;
RECHEADER *recbuf;
u_char *bufend;
struct field *dummy2;
{
static u_char *obufend;
static size_t osz;
char *pos;
static int filenum = 0, overflow = 0;
static FILE *fp = 0;
int c;
pos = (char *) recbuf->data;
if (overflow) {
/*
* Buffer shortage is solved by either of two ways:
* o flush previous buffered data and start using the
* buffer from start (see fsort())
* o realloc buffer and bump bufend
*
* The former is preferred, realloc is only done when
* there is exactly one item in buffer which does not fit.
*/
if (bufend == obufend)
memmove(pos, bufend - osz, osz);
pos += osz;
overflow = 0;
}
for (;;) {
if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
return (EOF);
else if (fp == NULL) {
if (filenum >= nfiles)
return (EOF);
if (!(fp = fopen(filelist->names[filenum], "r")))
err(2, "%s", filelist->names[filenum]);
filenum++;
}
while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) {
if ((*pos++ = c) == REC_D) {
recbuf->offset = 0;
recbuf->length = pos - (char *) recbuf->data;
return (0);
}
}
if (pos >= (char *)bufend) {
if (recbuf->data < bufend) {
overflow = 1;
obufend = bufend;
osz = (pos - (char *) recbuf->data);
}
return (BUFFEND);
} else if (c == EOF) {
if (recbuf->data != (u_char *) pos) {
*pos++ = REC_D;
recbuf->offset = 0;
recbuf->length = pos - (char *) recbuf->data;
return (0);
}
FCLOSE(fp);
fp = 0;
if (flno >= 0)
fstack[flno].fp = 0;
} else {
warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
/* Consume the rest of line from input */
while((c = getc(fp)) != REC_D && c != EOF)
;
recbuf->offset = 0;
recbuf->length = 0;
return (BUFFEND);
}
}
}
/*
* This generates keys. It's only called in the first fsort pass
*/
int
makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl)
int flno, top;
struct filelist *filelist;
int nfiles;
RECHEADER *recbuf;
u_char *bufend;
struct field *ftbl;
{
static int filenum = 0;
static FILE *dbdesc = 0;
static DBT dbkey[1], line[1];
static int overflow = 0;
int c;
if (overflow) {
overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf,
ftbl);
if (overflow)
return (BUFFEND);
else
return (0);
}
for (;;) {
if (flno >= 0) {
if (!(dbdesc = fstack[flno].fp))
return (EOF);
} else if (!dbdesc) {
if (filenum >= nfiles)
return (EOF);
dbdesc = fopen(filelist->names[filenum], "r");
if (!dbdesc)
err(2, "%s", filelist->names[filenum]);
filenum++;
}
if (!(c = seq(dbdesc, line, dbkey))) {
if ((signed)line->size > bufend - recbuf->data) {
overflow = 1;
} else {
overflow = enterkey(recbuf, line,
bufend - (u_char *) recbuf, ftbl);
}
if (overflow)
return (BUFFEND);
else
return (0);
}
if (c == EOF) {
FCLOSE(dbdesc);
dbdesc = 0;
if (flno >= 0)
fstack[flno].fp = 0;
} else {
((char *) line->data)[60] = '\000';
warnx("makekey: line too long: ignoring %.100s...",
(char *)line->data);
}
}
}
/*
* get a key/line pair from fp
*/
static int
seq(fp, line, key)
FILE *fp;
DBT *key, *line;
{
static char *buf, flag = 1;
char *end, *pos;
int c;
if (flag) {
flag = 0;
buf = (char *) linebuf;
end = buf + linebuf_size;
line->data = buf;
}
pos = buf;
while ((c = getc(fp)) != EOF) {
if ((*pos++ = c) == REC_D) {
line->size = pos - buf;
return (0);
}
if (pos == end) {
linebuf_size *= 2;
linebuf = realloc(linebuf, linebuf_size);
if (!linebuf)
err(2, "realloc of linebuf to %lu bytes failed",
(unsigned long)linebuf_size);
end = linebuf + linebuf_size;
pos = linebuf + (pos - buf);
line->data = buf = (char *)linebuf;
continue;
}
}
if (pos != buf) {
*pos++ = REC_D;
line->size = pos - buf;
return (0);
} else
return (EOF);
}
/*
* write a key/line pair to a temporary file
*/
void
putrec(rec, fp)
const RECHEADER *rec;
FILE *fp;
{
EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp);
}
/*
* write a line to output
*/
void
putline(rec, fp)
const RECHEADER *rec;
FILE *fp;
{
EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp);
}
/*
* get a record from a temporary file. (Used by merge sort.)
*/
int
geteasy(flno, top, filelist, nfiles, rec, end, dummy2)
int flno, top;
struct filelist *filelist;
int nfiles;
RECHEADER *rec;
u_char *end;
struct field *dummy2;
{
int i;
FILE *fp;
fp = fstack[flno].fp;
if ((u_char *) rec > end - sizeof(TRECHEADER))
return (BUFFEND);
if (!fread(rec, 1, sizeof(TRECHEADER), fp)) {
fclose(fp);
fstack[flno].fp = 0;
return (EOF);
}
if (end - rec->data < rec->length) {
for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
ungetc(*((char *) rec + i), fp);
return (BUFFEND);
}
fread(rec->data, rec->length, 1, fp);
return (0);
}

358
usr.bin/sort/fsort.c Normal file
View File

@ -0,0 +1,358 @@
/* $NetBSD: fsort.c,v 1.19 2001/05/15 11:19:45 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Read in the next bin. If it fits in one segment sort it;
* otherwise refine it by segment deeper by one character,
* and try again on smaller bins. Sort the final bin at this level
* of recursion to keep the head of fstack at 0.
* After PANIC passes, abort to merge sort.
*/
#include "sort.h"
#include "fsort.h"
#ifndef lint
__RCSID("$NetBSD: fsort.c,v 1.19 2001/05/15 11:19:45 jdolecek Exp $");
__SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
static const u_char **keylist = 0;
u_char *buffer = 0, *linebuf = 0;
size_t bufsize = DEFBUFSIZE;
size_t linebuf_size;
struct tempfile fstack[MAXFCT];
extern char *toutpath;
#define FSORTMAX 4
int PANIC = FSORTMAX;
#define MSTART (MAXFCT - MERGE_FNUM)
#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
void
fsort(binno, depth, top, filelist, nfiles, outfp, ftbl)
int binno, depth, top;
struct filelist *filelist;
int nfiles;
FILE *outfp;
struct field *ftbl;
{
const u_char **keypos;
u_char *bufend, *tmpbuf;
u_char *weights;
int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
int c, nelem, base;
long sizes [NBINS+1];
get_func_t get;
struct recheader *crec;
struct field tfield[2];
FILE *prevfp, *tailfp[FSORTMAX+1];
memset(tailfp, 0, sizeof(tailfp));
prevfp = outfp;
memset(tfield, 0, sizeof(tfield));
if (ftbl[0].flags & R)
tfield[0].weights = Rascii;
else
tfield[0].weights = ascii;
tfield[0].icol.num = 1;
weights = ftbl[0].weights;
if (!buffer) {
buffer = malloc(bufsize);
keylist = malloc(MAXNUM * sizeof(u_char *));
memset(keylist, 0, MAXNUM * sizeof(u_char *));
if (!SINGL_FLD) {
linebuf_size = DEFLLEN;
if ((linebuf = malloc(linebuf_size)) == NULL)
errx(2, "cannot allocate memory");
}
}
bufend = buffer + bufsize;
if (binno >= 0) {
base = top + nfiles;
get = getnext;
} else {
base = 0;
if (SINGL_FLD)
get = makeline;
else
get = makekey;
}
for (;;) {
memset(sizes, 0, sizeof(sizes));
c = ntfiles = 0;
if (binno == weights[REC_D] &&
!(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
rd_append(weights[REC_D], top,
nfiles, prevfp, buffer, bufend);
break;
} else if (binno == weights[REC_D]) {
depth = 0; /* start over on flat weights */
ftbl = tfield;
weights = ftbl[0].weights;
}
while (c != EOF) {
keypos = keylist;
nelem = 0;
crec = (RECHEADER *) buffer;
do_read:
while((c = get(binno, top, filelist, nfiles, crec,
bufend, ftbl)) == 0) {
*keypos++ = crec->data + depth;
if (++nelem == MAXNUM) {
c = BUFFEND;
break;
}
crec =(RECHEADER *) ((char *) crec +
SALIGN(crec->length) + sizeof(TRECHEADER));
}
if (c == BUFFEND && nelem < MAXNUM
&& bufsize < MAXBUFSIZE) {
const u_char **keyp;
u_char *oldb = buffer;
/* buffer was too small for data, allocate
* bigger buffer */
bufsize *= 2;
buffer = realloc(buffer, bufsize);
if (!buffer) {
err(2, "failed to realloc buffer to %ld bytes",
(unsigned long) bufsize);
}
bufend = buffer + bufsize;
/* patch up keylist[] */
for(keyp = &keypos[-1]; keyp >= keylist; keyp--)
*keyp = buffer + (*keyp - oldb);
crec = (RECHEADER *) (buffer + ((u_char *)crec - oldb));
goto do_read;
}
if (c != BUFFEND && !ntfiles && !mfct) {
/* do not push */
continue;
}
/* push */
if (panic >= PANIC) {
fstack[MSTART + mfct].fp = ftmp();
if ((stable_sort)
? sradixsort(keylist, nelem,
weights, REC_D)
: radixsort(keylist, nelem,
weights, REC_D) )
err(2, NULL);
append(keylist, nelem, depth,
fstack[MSTART + mfct].fp, putrec,
ftbl);
mfct++;
/* reduce number of open files */
if (mfct == MERGE_FNUM ||(c == EOF && ntfiles)) {
/*
* Only copy extra incomplete crec
* data if there are any.
*/
int nodata = (bufend >= (u_char *)crec
&& bufend <= crec->data);
if (!nodata) {
tmpbuf = malloc(bufend -
crec->data);
memmove(tmpbuf, crec->data,
bufend - crec->data);
}
fstack[base + ntfiles].fp = ftmp();
fmerge(0, MSTART, filelist,
mfct, geteasy, fstack[base].fp,
putrec, ftbl);
ntfiles++;
mfct = 0;
if (!nodata) {
memmove(crec->data, tmpbuf,
bufend - crec->data);
free(tmpbuf);
}
}
} else {
fstack[base + ntfiles].fp= ftmp();
onepass(keylist, depth, nelem, sizes,
weights, fstack[base + ntfiles].fp);
ntfiles++;
}
}
if (!ntfiles && !mfct) { /* everything in memory--pop */
if (nelem > 1
&& ((stable_sort)
? sradixsort(keylist, nelem, weights, REC_D)
: radixsort(keylist, nelem, weights, REC_D) ))
err(2, NULL);
if (nelem > 0)
append(keylist, nelem, depth, outfp, putline, ftbl);
break; /* pop */
}
if (panic >= PANIC) {
if (!ntfiles)
fmerge(0, MSTART, filelist, mfct, geteasy,
outfp, putline, ftbl);
else
fmerge(0, base, filelist, ntfiles, geteasy,
outfp, putline, ftbl);
break;
}
total = maxb = lastb = 0; /* find if one bin dominates */
for (i = 0; i < NBINS; i++)
if (sizes[i]) {
if (sizes[i] > sizes[maxb])
maxb = i;
lastb = i;
total += sizes[i];
}
if (sizes[maxb] < max((total / 2) , BUFSIZE))
maxb = lastb; /* otherwise pop after last bin */
fstack[base].lastb = lastb;
fstack[base].maxb = maxb;
/* start refining next level. */
getnext(-1, base, NULL, ntfiles, crec, bufend, 0); /* rewind */
for (i = 0; i < maxb; i++) {
if (!sizes[i]) /* bin empty; step ahead file offset */
getnext(i, base, NULL,ntfiles, crec, bufend, 0);
else
fsort(i, depth+1, base, filelist, ntfiles,
outfp, ftbl);
}
get = getnext;
if (lastb != maxb) {
if (prevfp != outfp)
tailfp[panic] = prevfp;
prevfp = ftmp();
for (i = maxb+1; i <= lastb; i++)
if (!sizes[i])
getnext(i, base, NULL, ntfiles, crec,
bufend,0);
else
fsort(i, depth+1, base, filelist,
ntfiles, prevfp, ftbl);
}
/* sort biggest (or last) bin at this level */
depth++;
panic++;
binno = maxb;
top = base;
nfiles = ntfiles; /* so overwrite them */
}
if (prevfp != outfp) {
concat(outfp, prevfp);
fclose(prevfp);
}
for (i = panic; i >= 0; --i)
if (tailfp[i]) {
concat(outfp, tailfp[i]);
fclose(tailfp[i]);
}
/* If on top level, free our structures */
if (depth == 0) {
free(keylist), keylist = NULL;
free(buffer), buffer = NULL;
}
}
/*
* This is one pass of radix exchange, dumping the bins to disk.
*/
#define swap(a, b, t) t = a, a = b, b = t
void
onepass(a, depth, n, sizes, tr, fp)
const u_char **a;
int depth;
long n, sizes[];
u_char *tr;
FILE *fp;
{
size_t tsizes[NBINS+1];
const u_char **bin[257], ***bp, ***bpmax, **top[256], ***tp;
static int histo[256];
int *hp;
int c;
const u_char **an, *t, **aj;
const u_char **ak, *r;
memset(tsizes, 0, sizeof(tsizes));
depth += sizeof(TRECHEADER);
an = &a[n];
for (ak = a; ak < an; ak++) {
histo[c = tr[**ak]]++;
tsizes[c] += ((const RECHEADER *) (*ak -= depth))->length;
}
bin[0] = a;
bpmax = bin + 256;
tp = top, hp = histo;
for (bp = bin; bp < bpmax; bp++) {
*tp++ = *(bp+1) = *bp + (c = *hp);
*hp++ = 0;
if (c <= 1)
continue;
}
for (aj = a; aj < an; *aj = r, aj = bin[c+1])
for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
swap(*ak, r, t);
for (ak = a, c = 0; c < 256; c++) {
an = bin[c+1];
n = an - ak;
tsizes[c] += n * sizeof(TRECHEADER);
/* tell getnext how many elements in this bin, this segment. */
EWRITE(&tsizes[c], sizeof(size_t), 1, fp);
sizes[c] += tsizes[c];
for (; ak < an; ++ak)
putrec((const RECHEADER *) *ak, fp);
}
}

77
usr.bin/sort/fsort.h Normal file
View File

@ -0,0 +1,77 @@
/* $NetBSD: fsort.h,v 1.8 2001/02/19 20:50:17 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fsort.h 8.1 (Berkeley) 6/6/93
*/
#define BUFSIZE (1<<20)
#define MAXNUM 131072 /* low guess at average record count */
#define BUFFEND (EOF-2)
#define MAXFCT 1000
#define DEFLLEN 65536
/*
* Default (initial) and maximum size of record buffer for fsort().
* Note that no more than MAXNUM records are stored in the buffer,
* even if the buffer is not full yet.
*/
#define DEFBUFSIZE (1 << 20) /* 1MB */
#define MAXBUFSIZE (8 << 20) /* 10 MB */
/*
* Number of files merge() can merge in one pass.
* This should be power of two so that it's possible to use this value
* for rouding.
*/
#define MERGE_FNUM 16
extern u_char *buffer, *linebuf;
extern size_t bufsize, linebuf_size;
/* temp files in the stack have a file descriptor, a largest bin (maxb)
* which becomes the last non-empty bin (lastb) when the actual largest
* bin is smaller than max(half the total file, BUFSIZE)
* Max_o is the offset of maxb so it can be sought after the other bins
* are sorted.
*/
struct tempfile {
FILE *fp;
u_char maxb;
u_char lastb;
int max_o;
};
extern struct tempfile fstack[MAXFCT];

355
usr.bin/sort/init.c Normal file
View File

@ -0,0 +1,355 @@
/* $NetBSD: init.c,v 1.5 2001/02/19 20:50:17 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#ifndef lint
__RCSID("$NetBSD: init.c,v 1.5 2001/02/19 20:50:17 jdolecek Exp $");
__SCCSID("@(#)init.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <ctype.h>
#include <string.h>
static void insertcol __P((struct field *));
static const char *setcolumn __P((const char *, struct field *, int));
int setfield __P((const char *, struct field *, int));
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
u_char gweights[NBINS];
/*
* masks of ignored characters. Alltable is 256 ones.
*/
static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
/*
* clist (list of columns which correspond to one or more icol or tcol)
* is in increasing order of columns.
* Fields are kept in increasing order of fields.
*/
/*
* keep clist in order--inserts a column in a sorted array
*/
static void
insertcol(field)
struct field *field;
{
int i;
for (i = 0; i < ncols; i++)
if (field->icol.num <= clist[i].num)
break;
if (field->icol.num != clist[i].num) {
memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
clist[i].num = field->icol.num;
ncols++;
}
if (field->tcol.num && field->tcol.num != field->icol.num) {
for (i = 0; i < ncols; i++)
if (field->tcol.num <= clist[i].num)
break;
if (field->tcol.num != clist[i].num) {
memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
clist[i].num = field->tcol.num;
ncols++;
}
}
}
/*
* matches fields with the appropriate columns--n^2 but who cares?
*/
void
fldreset(fldtab)
struct field *fldtab;
{
int i;
fldtab[0].tcol.p = clist+ncols-1;
for (++fldtab; fldtab->icol.num; ++fldtab) {
for (i = 0; fldtab->icol.num != clist[i].num; i++)
;
fldtab->icol.p = clist + i;
if (!fldtab->tcol.num)
continue;
for (i = 0; fldtab->tcol.num != clist[i].num; i++)
;
fldtab->tcol.p = clist + i;
}
}
/*
* interprets a column in a -k field
*/
static const char *
setcolumn(pos, cur_fld, gflag)
const char *pos;
struct field *cur_fld;
int gflag;
{
struct column *col;
int tmp;
col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
pos += sscanf(pos, "%d", &(col->num));
while (isdigit(*pos))
pos++;
if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
errx(2, "field numbers must be positive");
if (*pos == '.') {
if (!col->num)
errx(2, "cannot indent end of line");
++pos;
pos += sscanf(pos, "%d", &(col->indent));
while (isdigit(*pos))
pos++;
if (&cur_fld->icol == col)
col->indent--;
if (col->indent < 0)
errx(2, "illegal offset");
}
if (optval(*pos, cur_fld->tcol.num))
while ((tmp = optval(*pos, cur_fld->tcol.num))) {
cur_fld->flags |= tmp;
pos++;
}
if (cur_fld->icol.num == 0)
cur_fld->icol.num = 1;
return (pos);
}
int
setfield(pos, cur_fld, gflag)
const char *pos;
struct field *cur_fld;
int gflag;
{
static int nfields = 0;
int tmp;
if (++nfields == ND)
errx(2, "too many sort keys. (Limit is %d)", ND-1);
cur_fld->weights = ascii;
cur_fld->mask = alltable;
pos = setcolumn(pos, cur_fld, gflag);
if (*pos == '\0') /* key extends to EOL. */
cur_fld->tcol.num = 0;
else {
if (*pos != ',')
errx(2, "illegal field descriptor");
setcolumn((++pos), cur_fld, gflag);
}
if (!cur_fld->flags)
cur_fld->flags = gflag;
tmp = cur_fld->flags;
/*
* Assign appropriate mask table and weight table.
* If the global weights are reversed, the local field
* must be "re-reversed".
*/
if (((tmp & R) ^ (gflag & R)) && (tmp & F))
cur_fld->weights = RFtable;
else if (tmp & F)
cur_fld->weights = Ftable;
else if ((tmp & R) ^ (gflag & R))
cur_fld->weights = Rascii;
if (tmp & I)
cur_fld->mask = itable;
else if (tmp & D)
cur_fld->mask = dtable;
cur_fld->flags |= (gflag & (BI | BT));
if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
cur_fld->flags &= ~BT;
if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
&& cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
&& cur_fld->tcol.indent < cur_fld->icol.indent))
errx(2, "fields out of order");
insertcol(cur_fld);
return (cur_fld->tcol.num);
}
int
optval(desc, tcolflag)
int desc, tcolflag;
{
switch(desc) {
case 'b':
if (!tcolflag)
return (BI);
else
return (BT);
case 'd': return (D);
case 'f': return (F);
case 'i': return (I);
case 'n': return (N);
case 'r': return (R);
default: return (0);
}
}
void
fixit(argc, argv)
int *argc;
char **argv;
{
int i, j, v, w, x;
static char vbuf[ND*20], *vpos, *tpos;
vpos = vbuf;
for (i = 1; i < *argc; i++) {
if (argv[i][0] == '+') {
tpos = argv[i]+1;
argv[i] = vpos;
vpos += sprintf(vpos, "-k");
tpos += sscanf(tpos, "%d", &v);
while (isdigit(*tpos))
tpos++;
vpos += sprintf(vpos, "%d", v+1);
if (*tpos == '.') {
++tpos;
tpos += sscanf(tpos, "%d", &x);
vpos += sprintf(vpos, ".%d", x+1);
}
while (*tpos)
*vpos++ = *tpos++;
vpos += sprintf(vpos, ",");
if (argv[i+1] &&
argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
tpos = argv[i+1] + 1;
tpos += sscanf(tpos, "%d", &w);
while (isdigit(*tpos))
tpos++;
x = 0;
if (*tpos == '.') {
++tpos;
tpos += sscanf(tpos, "%d", &x);
while (isdigit(*tpos))
tpos++;
}
if (x) {
vpos += sprintf(vpos, "%d", w+1);
vpos += sprintf(vpos, ".%d", x);
} else
vpos += sprintf(vpos, "%d", w);
while (*tpos)
*vpos++ = *tpos++;
for (j= i+1; j < *argc; j++)
argv[j] = argv[j+1];
*argc -= 1;
}
}
}
}
/*
* ascii, Rascii, Ftable, and RFtable map
* REC_D -> REC_D; {not REC_D} -> {not REC_D}.
* gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
* Note: when sorting in forward order, to encode character zero in a key,
* use \001\001; character 1 becomes \001\002. In this case, character 0
* is reserved for the field delimiter. Analagously for -r (fld_d = 255).
* Note: this is only good for ASCII sorting. For different LC 's,
* all bets are off. See also num_init in number.c
*/
void
settables(gflags)
int gflags;
{
u_char *wts;
int i, incr;
for (i=0; i < 256; i++) {
ascii[i] = i;
if (i > REC_D && i < 255 - REC_D+1)
Rascii[i] = 255 - i + 1;
else
Rascii[i] = 255 - i;
if (islower(i)) {
Ftable[i] = Ftable[toupper(i)];
RFtable[i] = RFtable[toupper(i)];
} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
Ftable[i] = i + 1;
RFtable[i] = Rascii[i] - 1;
} else {
Ftable[i] = i;
RFtable[i] = Rascii[i];
}
alltable[i] = 1;
if (i == '\n' || isprint(i))
itable[i] = 1;
else
itable[i] = 0;
if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
dtable[i] = 1;
else
dtable[i] = 0;
}
Rascii[REC_D] = RFtable[REC_D] = REC_D;
if (isupper(REC_D))
Ftable[tolower(REC_D)]++;
if ((gflags & R) && !((gflags & F) && SINGL_FLD))
wts = Rascii;
else if (!((gflags & F) && SINGL_FLD))
wts = ascii;
else if (gflags & R)
wts = RFtable;
else
wts = Ftable;
memmove(gweights, wts, sizeof(gweights));
incr = (gflags & R) ? -1 : 1;
for (i = 0; i < REC_D; i++)
gweights[i] += incr;
gweights[REC_D] = ((gflags & R) ? 255 : 0);
if (SINGL_FLD && (gflags & F)) {
for (i = 0; i < REC_D; i++) {
ascii[i] += incr;
Rascii[i] += incr;
}
ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
}
}

390
usr.bin/sort/msort.c Normal file
View File

@ -0,0 +1,390 @@
/* $NetBSD: msort.c,v 1.9 2001/01/19 10:50:31 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "sort.h"
#include "fsort.h"
#ifndef lint
__RCSID("$NetBSD: msort.c,v 1.9 2001/01/19 10:50:31 jdolecek Exp $");
__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* Subroutines using comparisons: merge sort and check order */
#define DELETE (1)
typedef struct mfile {
u_char *end;
short flno;
struct recheader rec[1];
} MFILE;
static u_char *wts, *wts1 = NULL;
static int cmp __P((RECHEADER *, RECHEADER *));
static int insert __P((struct mfile **, struct mfile **, int, int));
void
fmerge(binno, top, filelist, nfiles, get, outfp, fput, ftbl)
int binno, top;
struct filelist *filelist;
int nfiles;
get_func_t get;
FILE *outfp;
put_func_t fput;
struct field *ftbl;
{
FILE *tout;
int i, j, last;
put_func_t put;
struct tempfile *l_fstack;
wts = ftbl->weights;
if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
wts1 = (ftbl->flags & R) ? Rascii : ascii;
if (!buffer) {
buffer = malloc(bufsize);
if (!buffer)
err(2, "fmerge(): realloc");
if (!linebuf && !SINGL_FLD) {
linebuf_size = DEFLLEN;
linebuf = malloc(linebuf_size);
}
}
if (binno >= 0)
l_fstack = fstack + top;
else
l_fstack = fstack;
while (nfiles) {
put = putrec;
for (j = 0; j < nfiles; j += MERGE_FNUM) {
if (nfiles <= MERGE_FNUM) {
tout = outfp;
put = fput;
}
else
tout = ftmp();
last = min(MERGE_FNUM, nfiles - j);
if (binno < 0) {
for (i = 0; i < last; i++)
if (!(l_fstack[i+MAXFCT-1-MERGE_FNUM].fp =
fopen(filelist->names[j+i], "r")))
err(2, "%s",
filelist->names[j+i]);
merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl);
} else {
for (i = 0; i< last; i++)
rewind(l_fstack[i+j].fp);
merge(top+j, last, get, tout, put, ftbl);
}
if (nfiles > MERGE_FNUM)
l_fstack[j/MERGE_FNUM].fp = tout;
}
nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM;
if (nfiles == 1)
nfiles = 0;
if (binno < 0) {
binno = 0;
get = geteasy;
top = 0;
}
}
}
void
merge(infl0, nfiles, get, outfp, put, ftbl)
int infl0, nfiles;
get_func_t get;
put_func_t put;
FILE *outfp;
struct field *ftbl;
{
int c, i, j, nf = nfiles;
struct mfile *flist[MERGE_FNUM], *cfile;
size_t availsz = bufsize;
static void *bufs[MERGE_FNUM+1];
static size_t bufs_sz[MERGE_FNUM+1];
/*
* We need nfiles + 1 buffers. One is 'buffer', the
* rest needs to be allocated.
*/
bufs[0] = buffer;
bufs_sz[0] = bufsize;
for(i=1; i < nfiles+1; i++) {
if (bufs[i])
continue;
bufs[i] = malloc(DEFLLEN);
if (!bufs[i])
err(2, "merge(): realloc");
bufs_sz[i] = DEFLLEN;
}
for (i = j = 0; i < nfiles; i++) {
cfile = (struct mfile *) bufs[j];
cfile->flno = infl0 + j;
cfile->end = (u_char *) bufs[j] + bufs_sz[j];
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
cfile->rec, cfile->end, ftbl))) {
--i;
--nfiles;
break;
}
if (c == BUFFEND) {
cfile = realloc(bufs[j], bufs_sz[j] *= 2);
bufs[j] = (void *) cfile;
if (!cfile)
err(2, "merge(): realloc");
cfile->end = (u_char *)cfile + bufs_sz[j];
c = 1;
continue;
}
if (i)
c = insert(flist, &cfile, i, !DELETE);
else
flist[0] = cfile;
}
j++;
}
cfile = (struct mfile *) bufs[nf];
cfile->flno = flist[0]->flno;
cfile->end = (u_char *) cfile + bufs_sz[nf];
while (nfiles) {
for (c = 1; c == 1;) {
if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
cfile->rec, cfile->end, ftbl))) {
put(flist[0]->rec, outfp);
memmove(flist, flist + 1,
sizeof(MFILE *) * (--nfiles));
cfile->flno = flist[0]->flno;
break;
}
if (c == BUFFEND) {
char *oldbuf = (char *) cfile;
availsz = (char *) cfile->end - oldbuf;
availsz *= 2;
cfile = realloc(oldbuf, availsz);
for(i=0; i < nf+1; i++) {
if (bufs[i] == oldbuf) {
bufs[i] = (char *)cfile;
bufs_sz[i] = availsz;
break;
}
}
if (!cfile)
err(2, "merge: realloc");
cfile->end = (u_char *)cfile + availsz;
c = 1;
continue;
}
if (!(c = insert(flist, &cfile, nfiles, DELETE)))
put(cfile->rec, outfp);
}
}
if (bufs_sz[0] > bufsize) {
buffer = bufs[0];
bufsize = bufs_sz[0];
}
}
/*
* if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
* otherwise just inserts *rec in flist.
*/
static int
insert(flist, rec, ttop, delete)
struct mfile **flist, **rec;
int delete, ttop; /* delete = 0 or 1 */
{
struct mfile *tmprec = *rec;
int mid, top = ttop, bot = 0, cmpv = 1;
for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
cmpv = cmp(tmprec->rec, flist[mid]->rec);
if (cmpv < 0)
top = mid;
else if (cmpv > 0)
bot = mid;
else {
if (UNIQUE)
break;
if (stable_sort) {
/*
* Apply sort by fileno, to give priority
* to earlier specified files, hence providing
* more stable sort.
* If fileno is same, the new record should
* be put _after_ the previous entry.
*/
cmpv = tmprec->flno - flist[mid]->flno;
if (cmpv >= 0)
bot = mid;
else /* cmpv == 0 */
bot = mid - 1;
} else {
/* non-stable sort */
bot = mid - 1;
}
break;
}
}
if (delete) {
if (UNIQUE) {
if (!bot && cmpv)
cmpv = cmp(tmprec->rec, flist[0]->rec);
if (!cmpv)
return (1);
}
tmprec = flist[0];
if (bot)
memmove(flist, flist+1, bot * sizeof(MFILE **));
flist[bot] = *rec;
*rec = tmprec;
(*rec)->flno = flist[0]->flno;
return (0);
} else {
if (!bot && !(UNIQUE && !cmpv)) {
cmpv = cmp(tmprec->rec, flist[0]->rec);
if (cmpv < 0)
bot = -1;
}
if (UNIQUE && !cmpv)
return (1);
bot++;
memmove(flist + bot+1, flist + bot,
(ttop - bot) * sizeof(MFILE **));
flist[bot] = *rec;
return (0);
}
}
/*
* check order on one file
*/
void
order(filelist, get, ftbl)
struct filelist *filelist;
get_func_t get;
struct field *ftbl;
{
u_char *crec_end, *prec_end, *trec_end;
int c;
RECHEADER *crec, *prec, *trec;
if (!SINGL_FLD)
linebuf = malloc(DEFLLEN);
buffer = malloc(2 * (DEFLLEN + sizeof(TRECHEADER)));
crec = (RECHEADER *) buffer;
crec_end = buffer + DEFLLEN + sizeof(TRECHEADER);
prec = (RECHEADER *) (buffer + DEFLLEN + sizeof(TRECHEADER));
prec_end = buffer + 2*(DEFLLEN + sizeof(TRECHEADER));
wts = ftbl->weights;
if (SINGL_FLD && (ftbl->flags & F))
wts1 = (ftbl->flags & R) ? Rascii : ascii;
else
wts1 = NULL;
if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl))
while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) {
if (0 < (c = cmp(prec, crec))) {
crec->data[crec->length-1] = 0;
errx(1, "found disorder: %s", crec->data+crec->offset);
}
if (UNIQUE && !c) {
crec->data[crec->length-1] = 0;
errx(1, "found non-uniqueness: %s",
crec->data+crec->offset);
}
/*
* Swap pointers so that this record is on place pointed
* to by prec and new record is read to place pointed to by
* crec.
*/
trec = prec;
prec = crec;
crec = trec;
trec_end = prec_end;
prec_end = crec_end;
crec_end = trec_end;
}
exit(0);
}
static int
cmp(rec1, rec2)
RECHEADER *rec1, *rec2;
{
int r;
u_char *pos1, *pos2, *end;
u_char *cwts;
for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) {
pos1 = rec1->data;
pos2 = rec2->data;
if (!SINGL_FLD && (UNIQUE || stable_sort))
end = pos1 + min(rec1->offset, rec2->offset);
else
end = pos1 + min(rec1->length, rec2->length);
for (; pos1 < end; ) {
if ((r = cwts[*pos1++] - cwts[*pos2++]))
return (r);
}
}
return (0);
}

41
usr.bin/sort/pathnames.h Normal file
View File

@ -0,0 +1,41 @@
/* $NetBSD: pathnames.h,v 1.2 2000/10/07 18:37:10 bjh21 Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)pathnames.h 8.1 (Berkeley) 6/6/93
*/
#define _PATH_STDIN "/dev/stdin"

332
usr.bin/sort/sort.c Normal file
View File

@ -0,0 +1,332 @@
/* $NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* Sort sorts a file using an optional user-defined key.
* Sort uses radix sort for internal sorting, and allows
* a choice of merge sort and radix sort for external sorting.
*/
#include "sort.h"
#include "fsort.h"
#include "pathnames.h"
#ifndef lint
__COPYRIGHT("@(#) Copyright (c) 1993\n\
The Regents of the University of California. All rights reserved.\n");
#endif /* not lint */
#ifndef lint
__RCSID("$NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $");
__SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <paths.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <locale.h>
int REC_D = '\n';
u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
/*
* weight tables. Gweights is one of ascii, Rascii..
* modified to weight rec_d = 0 (or 255)
*/
u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
struct coldesc clist[(ND+1)*2];
int ncols = 0;
extern struct coldesc clist[(ND+1)*2];
extern int ncols;
/*
* Default to stable sort.
*/
int stable_sort = 1;
char toutpath[MAXPATHLEN];
const char *tmpdir; /* where temporary files should be put */
static void cleanup __P((void));
static void onsignal __P((int));
static void usage __P((const char *));
static void many_files __P((void));
int main __P((int argc, char **argv));
int
main(argc, argv)
int argc;
char *argv[];
{
get_func_t get;
int ch, i, stdinflag = 0, tmp = 0;
char cflag = 0, mflag = 0;
char *outfile, *outpath = 0;
struct field fldtab[ND+2], *ftpos;
struct filelist filelist;
FILE *outfp = NULL;
setlocale(LC_ALL, "");
memset(fldtab, 0, (ND+2)*sizeof(struct field));
memset(d_mask, 0, NBINS);
d_mask[REC_D = '\n'] = REC_D_F;
SINGL_FLD = SEP_FLAG = 0;
d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
ftpos = fldtab;
many_files();
fixit(&argc, argv);
if (!(tmpdir = getenv("TMPDIR")))
tmpdir = _PATH_TMP;
while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) {
switch (ch) {
case 'b':
fldtab->flags |= BI | BT;
break;
case 'c':
cflag = 1;
break;
case 'd': case 'f': case 'i': case 'n': case 'r':
tmp |= optval(ch, 0);
if ((tmp & R) && (tmp & F))
fldtab->weights = RFtable;
else if (tmp & F)
fldtab->weights = Ftable;
else if (tmp & R)
fldtab->weights = Rascii;
fldtab->flags |= tmp;
break;
case 'H':
PANIC = 0;
break;
case 'k':
setfield(optarg, ++ftpos, fldtab->flags);
break;
case 'm':
mflag = 1;
break;
case 'o':
outpath = optarg;
break;
case 's':
/* for GNU sort compatibility (this is our default) */
stable_sort = 1;
break;
case 'S':
stable_sort = 0;
break;
case 't':
if (SEP_FLAG)
usage("multiple field delimiters");
SEP_FLAG = 1;
d_mask[' '] &= ~FLD_D;
d_mask['\t'] &= ~FLD_D;
d_mask[(u_char)*optarg] |= FLD_D;
if (d_mask[(u_char)*optarg] & REC_D_F)
errx(2, "record/field delimiter clash");
break;
case 'R':
if (REC_D != '\n')
usage("multiple record delimiters");
if ('\n' == (REC_D = *optarg))
break;
d_mask['\n'] = d_mask[' '];
d_mask[REC_D] = REC_D_F;
break;
case 'T':
/* -T tmpdir */
tmpdir = optarg;
break;
case 'u':
UNIQUE = 1;
break;
case '?':
default:
usage(NULL);
}
}
if (cflag && argc > optind+1)
errx(2, "too many input files for -c option");
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
outpath = argv[argc-1];
argc -= 2;
}
if (mflag && argc - optind > (MAXFCT - (16+1))*16)
errx(2, "too many input files for -m option");
for (i = optind; i < argc; i++) {
/* allow one occurrence of /dev/stdin */
if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
if (stdinflag)
warnx("ignoring extra \"%s\" in file list",
argv[i]);
else
stdinflag = 1;
/* change to /dev/stdin if '-' */
if (argv[i][0] == '-')
argv[i] = _PATH_STDIN;
} else if ((ch = access(argv[i], R_OK)))
err(2, "%s", argv[i]);
}
if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
SINGL_FLD = 1;
fldtab[0].icol.num = 1;
} else {
if (!fldtab[1].icol.num) {
fldtab[0].flags &= ~(BI|BT);
setfield("1", ++ftpos, fldtab->flags);
}
fldreset(fldtab);
fldtab[0].flags &= ~F;
}
settables(fldtab[0].flags);
num_init();
fldtab->weights = gweights;
if (optind == argc) {
static const char * const names[] = { _PATH_STDIN, NULL };
filelist.names = names;
optind--;
} else
filelist.names = (const char * const *) &argv[optind];
if (SINGL_FLD)
get = makeline;
else
get = makekey;
if (cflag) {
order(&filelist, get, fldtab);
/* NOT REACHED */
}
if (!outpath) {
(void)snprintf(toutpath,
sizeof(toutpath), "%sstdout", _PATH_DEV);
outfile = outpath = toutpath;
outfp = stdout;
} else if (!(ch = access(outpath, 0)) &&
strncmp(_PATH_DEV, outpath, 5)) {
static const struct sigaction act =
{ onsignal, {{0}}, SA_RESTART | SA_RESETHAND };
static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
int outfd;
errno = 0;
if (access(outpath, W_OK))
err(2, "%s", outpath);
(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
outpath);
if ((outfd = mkstemp(toutpath)) == -1)
err(2, "Cannot create temporary file `%s'", toutpath);
if ((outfp = fdopen(outfd, "w")) == NULL)
err(2, "Cannot open temporary file `%s'", toutpath);
outfile = toutpath;
(void)atexit(cleanup);
for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
sigaction(sigtable[i], &act, 0);
} else
outfile = outpath;
if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL)
err(2, "output file %s", outfile);
if (mflag) {
fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline,
fldtab);
} else
fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab);
if (outfile != outpath) {
if (access(outfile, 0))
err(2, "%s", outfile);
(void)unlink(outpath);
if (link(outfile, outpath))
err(2, "cannot link %s: output left in %s",
outpath, outfile);
(void)unlink(outfile);
}
exit(0);
}
static void
onsignal(sig)
int sig;
{
cleanup();
}
static void
cleanup()
{
if (toutpath[0])
(void)unlink(toutpath);
}
static void
usage(msg)
const char *msg;
{
if (msg != NULL)
(void)fprintf(stderr, "sort: %s\n", msg);
(void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] ");
(void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n");
exit(2);
}
static void
many_files()
{
#if 0
struct rlimit rlp_many_files[1];
if (getrlimit(RLIMIT_NOFILE, rlp_many_files) == 0) {
rlp_many_files->rlim_cur = rlp_many_files->rlim_max;
setrlimit(RLIMIT_NOFILE, rlp_many_files);
}
#endif
}

149
usr.bin/sort/sort.h Normal file
View File

@ -0,0 +1,149 @@
/* $NetBSD: sort.h,v 1.11 2001/01/19 10:14:31 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)sort.h 8.1 (Berkeley) 6/6/93
*/
#include <sys/param.h>
#include <db.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NBINS 256
#define MAXMERGE 16
/* values for masks, weights, and other flags. */
#define I 1 /* mask out non-printable characters */
#define D 2 /* sort alphanumeric characters only */
#define N 4 /* Field is a number */
#define F 8 /* weight lower and upper case the same */
#define R 16 /* Field is reversed with respect to the global weight */
#define BI 32 /* ignore blanks in icol */
#define BT 64 /* ignore blanks in tcol */
/* masks for delimiters: blanks, fields, and termination. */
#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
#define REC_D_F 4 /* '\n' default; from -T otherwise */
#define ND 10 /* limit on number of -k options. */
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define FCLOSE(file) { \
if (EOF == fclose(file)) \
err(2, "%p", file); \
}
#define EWRITE(ptr, size, n, f) { \
if (!fwrite(ptr, size, n, f)) \
err(2, NULL); \
}
/* length of record is currently limited to maximum string length (size_t) */
typedef size_t length_t;
/* a record is a key/line pair starting at rec.data. It has a total length
* and an offset to the start of the line half of the pair.
*/
typedef struct recheader {
length_t length;
length_t offset;
u_char data[1];
} RECHEADER;
typedef struct trecheader {
length_t length;
length_t offset;
} TRECHEADER;
/* This is the column as seen by struct field. It is used by enterfield.
* They are matched with corresponding coldescs during initialization.
*/
struct column {
struct coldesc *p;
int num;
int indent;
};
/* a coldesc has a number and pointers to the beginning and end of the
* corresponding column in the current line. This is determined in enterkey.
*/
typedef struct coldesc {
u_char *start;
u_char *end;
int num;
} COLDESC;
/* A field has an initial and final column; an omitted final column
* implies the end of the line. Flags regulate omission of blanks and
* numerical sorts; mask determines which characters are ignored (from -i, -d);
* weights determines the sort weights of a character (from -f, -r).
*/
struct field {
struct column icol;
struct column tcol;
u_int flags;
u_char *mask;
u_char *weights;
};
struct filelist {
const char * const * names;
};
typedef int (*get_func_t) __P((int, int, struct filelist *, int,
RECHEADER *, u_char *, struct field *));
typedef void (*put_func_t) __P((const struct recheader *, FILE *));
extern int PANIC; /* maximum depth of fsort before fmerge is called */
extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
extern u_char d_mask[NBINS];
extern int SINGL_FLD, SEP_FLAG, UNIQUE;
extern int REC_D;
extern const char *tmpdir;
extern int stable_sort;
extern u_char gweights[NBINS];
#include "extern.h"

84
usr.bin/sort/tmp.c Normal file
View File

@ -0,0 +1,84 @@
/* $NetBSD: tmp.c,v 1.7 2001/02/19 15:45:45 jdolecek Exp $ */
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Peter McIlroy.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <ctype.h>
#ifndef lint
__RCSID("$NetBSD: tmp.c,v 1.7 2001/02/19 15:45:45 jdolecek Exp $");
__SCCSID("@(#)tmp.c 8.1 (Berkeley) 6/6/93");
#endif /* not lint */
#include <sys/param.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "sort.h"
#include "pathnames.h"
#define _NAME_TMP "sort.XXXXXXXX"
FILE *
ftmp()
{
sigset_t set, oset;
FILE *fp;
int fd;
char pathb[MAXPATHLEN], *path;
path = pathb;
(void)snprintf(path, sizeof(pathb), "%s%s%s", tmpdir,
(tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
sigfillset(&set);
(void)sigprocmask(SIG_BLOCK, &set, &oset);
if ((fd = mkstemp(path)) < 0)
err(2, "ftmp: mkstemp(\"%s\")", path);
if (!(fp = fdopen(fd, "w+")))
err(2, "ftmp: fdopen(\"%s\")", path);
(void)unlink(path);
(void)sigprocmask(SIG_SETMASK, &oset, NULL);
return (fp);
}