1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-14 10:09:48 +00:00

Add support for multibyte characters in input files and delimiter

strings (arguments to the -d option.) This involves backing out paste.c
rev. 1.13 until we have a version of fgetln() that operates on wide
character streams.
This commit is contained in:
Tim J. Robbins 2004-06-25 01:48:43 +00:00
parent f664b76fe4
commit 0f10c7bbea
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=131076
2 changed files with 48 additions and 31 deletions

View File

@ -36,7 +36,7 @@
.\" @(#)paste.1 8.1 (Berkeley) 6/6/93
.\" $FreeBSD$
.\"
.Dd September 20, 2001
.Dd June 25, 2004
.Dt PASTE 1
.Os
.Sh NAME
@ -148,7 +148,3 @@ A
.Nm
command appeared in
.At 32v .
.Sh BUGS
Multibyte character delimiters cannot be specified with the
.Fl d
option.

View File

@ -54,31 +54,49 @@ __FBSDID("$FreeBSD$");
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
char *delim;
wchar_t *delim;
int delimcnt;
int parallel(char **);
int sequential(char **);
int tr(char *);
int tr(wchar_t *);
static void usage(void);
char tab[] = "\t";
wchar_t tab[] = L"\t";
int
main(int argc, char *argv[])
{
int ch, rval, seq;
wchar_t *warg;
const char *arg;
size_t len;
setlocale(LC_CTYPE, "");
seq = 0;
while ((ch = getopt(argc, argv, "d:s")) != -1)
switch(ch) {
case 'd':
delimcnt = tr(delim = optarg);
arg = optarg;
len = mbsrtowcs(NULL, &arg, 0, NULL);
if (len == (size_t)-1)
err(1, "delimiters");
warg = malloc((len + 1) * sizeof(*warg));
if (warg == NULL)
err(1, NULL);
arg = optarg;
len = mbsrtowcs(warg, &arg, len + 1, NULL);
if (len == (size_t)-1)
err(1, "delimiters");
delimcnt = tr(delim = warg);
break;
case 's':
seq = 1;
@ -116,10 +134,11 @@ parallel(char **argv)
{
LIST *lp;
int cnt;
char ch, *buf, *p;
wint_t ich;
wchar_t ch;
char *p;
LIST *head, *tmp;
int opencnt, output;
size_t len;
for (cnt = 0, head = NULL; (p = *argv); ++argv, ++cnt) {
if ((lp = malloc(sizeof(LIST))) == NULL)
@ -144,16 +163,16 @@ parallel(char **argv)
if (!lp->fp) {
if (output && lp->cnt &&
(ch = delim[(lp->cnt - 1) % delimcnt]))
putchar(ch);
putwchar(ch);
continue;
}
if ((buf = fgetln(lp->fp, &len)) == NULL) {
if ((ich = getwc(lp->fp)) == WEOF) {
if (!--opencnt)
break;
lp->fp = NULL;
if (output && lp->cnt &&
(ch = delim[(lp->cnt - 1) % delimcnt]))
putchar(ch);
putwchar(ch);
continue;
}
/*
@ -164,15 +183,17 @@ parallel(char **argv)
output = 1;
for (cnt = 0; cnt < lp->cnt; ++cnt)
if ((ch = delim[cnt % delimcnt]))
putchar(ch);
putwchar(ch);
} else if ((ch = delim[(lp->cnt - 1) % delimcnt]))
putchar(ch);
if (buf[len - 1] == '\n')
len--;
fwrite(buf, 1, len, stdout);
putwchar(ch);
if (ich == '\n')
continue;
do {
putwchar(ich);
} while ((ich = getwc(lp->fp)) != WEOF && ich != '\n');
}
if (output)
putchar('\n');
putwchar('\n');
}
return (0);
@ -183,8 +204,8 @@ sequential(char **argv)
{
FILE *fp;
int cnt, failed, needdelim;
char *buf, *p;
size_t len;
wint_t ch;
char *p;
failed = 0;
for (; (p = *argv); ++argv) {
@ -196,21 +217,21 @@ sequential(char **argv)
continue;
}
cnt = needdelim = 0;
while ((buf = fgetln(fp, &len)) != NULL) {
while ((ch = getwc(fp)) != WEOF) {
if (needdelim) {
needdelim = 0;
if (delim[cnt] != '\0')
putchar(delim[cnt]);
putwchar(delim[cnt]);
if (++cnt == delimcnt)
cnt = 0;
}
if (buf[len - 1] == '\n')
len--;
fwrite(buf, 1, len, stdout);
needdelim = 1;
if (ch != '\n')
putwchar(ch);
else
needdelim = 1;
}
if (needdelim)
putchar('\n');
putwchar('\n');
if (fp != stdin)
(void)fclose(fp);
}
@ -219,10 +240,10 @@ sequential(char **argv)
}
int
tr(char *arg)
tr(wchar_t *arg)
{
int cnt;
char ch, *p;
wchar_t ch, *p;
for (p = arg, cnt = 0; (ch = *p++); ++arg, ++cnt)
if (ch == '\\')