mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-31 12:13:10 +00:00
696 lines
14 KiB
C
696 lines
14 KiB
C
/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
|
|
/*
|
|
* tc.str.c: Short string package
|
|
* This has been a lesson of how to write buggy code!
|
|
*/
|
|
/*-
|
|
* Copyright (c) 1980, 1991 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
#include "sh.h"
|
|
|
|
#include <assert.h>
|
|
#include <limits.h>
|
|
|
|
RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
|
|
|
|
#define MALLOC_INCR 128
|
|
#ifdef WIDE_STRINGS
|
|
#define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */
|
|
#else
|
|
#define MALLOC_SURPLUS 0
|
|
#endif
|
|
|
|
#ifdef WIDE_STRINGS
|
|
size_t
|
|
one_mbtowc(Char *pwc, const char *s, size_t n)
|
|
{
|
|
int len;
|
|
|
|
len = rt_mbtowc(pwc, s, n);
|
|
if (len == -1) {
|
|
reset_mbtowc();
|
|
*pwc = (unsigned char)*s | INVALID_BYTE;
|
|
}
|
|
if (len <= 0)
|
|
len = 1;
|
|
return len;
|
|
}
|
|
|
|
size_t
|
|
one_wctomb(char *s, Char wchar)
|
|
{
|
|
int len;
|
|
|
|
if (wchar & INVALID_BYTE) {
|
|
s[0] = wchar & 0xFF;
|
|
len = 1;
|
|
} else {
|
|
#ifdef UTF16_STRINGS
|
|
if (wchar >= 0x10000) {
|
|
/* UTF-16 systems can't handle these values directly in calls to
|
|
wctomb. Convert value to UTF-16 surrogate and call wcstombs to
|
|
convert the "string" to the correct multibyte representation,
|
|
if any. */
|
|
wchar_t ws[3];
|
|
wchar -= 0x10000;
|
|
ws[0] = 0xd800 | (wchar >> 10);
|
|
ws[1] = 0xdc00 | (wchar & 0x3ff);
|
|
ws[2] = 0;
|
|
/* The return value of wcstombs excludes the trailing 0, so len is
|
|
the correct number of multibytes for the Unicode char. */
|
|
len = wcstombs (s, ws, MB_CUR_MAX + 1);
|
|
} else
|
|
#endif
|
|
len = wctomb(s, (wchar_t) wchar);
|
|
if (len == -1)
|
|
s[0] = wchar;
|
|
if (len <= 0)
|
|
len = 1;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
int
|
|
rt_mbtowc(Char *pwc, const char *s, size_t n)
|
|
{
|
|
int ret;
|
|
char back[MB_LEN_MAX];
|
|
wchar_t tmp;
|
|
#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
|
|
# if defined(AUTOSET_KANJI)
|
|
static mbstate_t mb_zero, mb;
|
|
/*
|
|
* Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
|
|
*/
|
|
if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
|
|
!memcmp(&mb, &mb_zero, sizeof(mb)))
|
|
{
|
|
*pwc = *s;
|
|
return 1;
|
|
}
|
|
# else
|
|
mbstate_t mb;
|
|
# endif
|
|
|
|
memset (&mb, 0, sizeof mb);
|
|
ret = mbrtowc(&tmp, s, n, &mb);
|
|
#else
|
|
ret = mbtowc(&tmp, s, n);
|
|
#endif
|
|
if (ret > 0) {
|
|
*pwc = tmp;
|
|
#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
|
|
if (tmp >= 0xd800 && tmp <= 0xdbff) {
|
|
/* UTF-16 surrogate pair. Fetch second half and compute
|
|
UTF-32 value. Dispense with the inverse test in this case. */
|
|
size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
|
|
if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
|
|
ret = -1;
|
|
else {
|
|
*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
|
|
ret += n2;
|
|
}
|
|
} else
|
|
#endif
|
|
if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
|
|
ret = -1;
|
|
|
|
} else if (ret == -2)
|
|
ret = -1;
|
|
else if (ret == 0)
|
|
*pwc = '\0';
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
#ifdef SHORT_STRINGS
|
|
Char **
|
|
blk2short(char **src)
|
|
{
|
|
size_t n;
|
|
Char **sdst, **dst;
|
|
|
|
/*
|
|
* Count
|
|
*/
|
|
for (n = 0; src[n] != NULL; n++)
|
|
continue;
|
|
sdst = dst = xmalloc((n + 1) * sizeof(Char *));
|
|
|
|
for (; *src != NULL; src++)
|
|
*dst++ = SAVE(*src);
|
|
*dst = NULL;
|
|
return (sdst);
|
|
}
|
|
|
|
char **
|
|
short2blk(Char **src)
|
|
{
|
|
size_t n;
|
|
char **sdst, **dst;
|
|
|
|
/*
|
|
* Count
|
|
*/
|
|
for (n = 0; src[n] != NULL; n++)
|
|
continue;
|
|
sdst = dst = xmalloc((n + 1) * sizeof(char *));
|
|
|
|
for (; *src != NULL; src++)
|
|
*dst++ = strsave(short2str(*src));
|
|
*dst = NULL;
|
|
return (sdst);
|
|
}
|
|
|
|
Char *
|
|
str2short(const char *src)
|
|
{
|
|
static struct Strbuf buf; /* = Strbuf_INIT; */
|
|
|
|
if (src == NULL)
|
|
return (NULL);
|
|
|
|
buf.len = 0;
|
|
while (*src) {
|
|
Char wc;
|
|
|
|
src += one_mbtowc(&wc, src, MB_LEN_MAX);
|
|
Strbuf_append1(&buf, wc);
|
|
}
|
|
Strbuf_terminate(&buf);
|
|
return buf.s;
|
|
}
|
|
|
|
char *
|
|
short2str(const Char *src)
|
|
{
|
|
static char *sdst = NULL;
|
|
static size_t dstsize = 0;
|
|
char *dst, *edst;
|
|
|
|
if (src == NULL)
|
|
return (NULL);
|
|
|
|
if (sdst == NULL) {
|
|
dstsize = MALLOC_INCR;
|
|
sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
|
|
}
|
|
dst = sdst;
|
|
edst = &dst[dstsize];
|
|
while (*src) {
|
|
dst += one_wctomb(dst, *src & CHAR);
|
|
src++;
|
|
if (dst >= edst) {
|
|
char *wdst = dst;
|
|
char *wedst = edst;
|
|
|
|
dstsize += MALLOC_INCR;
|
|
sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
|
|
edst = &sdst[dstsize];
|
|
dst = &edst[-MALLOC_INCR];
|
|
while (wdst > wedst) {
|
|
dst++;
|
|
wdst--;
|
|
}
|
|
}
|
|
}
|
|
*dst = 0;
|
|
return (sdst);
|
|
}
|
|
|
|
#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
|
|
Char *
|
|
s_strcpy(Char *dst, const Char *src)
|
|
{
|
|
Char *sdst;
|
|
|
|
sdst = dst;
|
|
while ((*dst++ = *src++) != '\0')
|
|
continue;
|
|
return (sdst);
|
|
}
|
|
|
|
Char *
|
|
s_strncpy(Char *dst, const Char *src, size_t n)
|
|
{
|
|
Char *sdst;
|
|
|
|
if (n == 0)
|
|
return(dst);
|
|
|
|
sdst = dst;
|
|
do
|
|
if ((*dst++ = *src++) == '\0') {
|
|
while (--n != 0)
|
|
*dst++ = '\0';
|
|
return(sdst);
|
|
}
|
|
while (--n != 0);
|
|
return (sdst);
|
|
}
|
|
|
|
Char *
|
|
s_strcat(Char *dst, const Char *src)
|
|
{
|
|
Strcpy(Strend(dst), src);
|
|
return dst;
|
|
}
|
|
|
|
#ifdef NOTUSED
|
|
Char *
|
|
s_strncat(Char *dst, const Char *src, size_t n)
|
|
{
|
|
Char *sdst;
|
|
|
|
if (n == 0)
|
|
return (dst);
|
|
|
|
sdst = dst;
|
|
|
|
while (*dst)
|
|
dst++;
|
|
|
|
do
|
|
if ((*dst++ = *src++) == '\0')
|
|
return(sdst);
|
|
while (--n != 0)
|
|
continue;
|
|
|
|
*dst = '\0';
|
|
return (sdst);
|
|
}
|
|
|
|
#endif
|
|
|
|
Char *
|
|
s_strchr(const Char *str, int ch)
|
|
{
|
|
do
|
|
if (*str == ch)
|
|
return ((Char *)(intptr_t)str);
|
|
while (*str++);
|
|
return (NULL);
|
|
}
|
|
|
|
Char *
|
|
s_strrchr(const Char *str, int ch)
|
|
{
|
|
const Char *rstr;
|
|
|
|
rstr = NULL;
|
|
do
|
|
if (*str == ch)
|
|
rstr = str;
|
|
while (*str++);
|
|
return ((Char *)(intptr_t)rstr);
|
|
}
|
|
|
|
size_t
|
|
s_strlen(const Char *str)
|
|
{
|
|
size_t n;
|
|
|
|
for (n = 0; *str++; n++)
|
|
continue;
|
|
return (n);
|
|
}
|
|
|
|
int
|
|
s_strcmp(const Char *str1, const Char *str2)
|
|
{
|
|
for (; *str1 && *str1 == *str2; str1++, str2++)
|
|
continue;
|
|
/*
|
|
* The following case analysis is necessary so that characters which look
|
|
* negative collate low against normal characters but high against the
|
|
* end-of-string NUL.
|
|
*/
|
|
if (*str1 == '\0' && *str2 == '\0')
|
|
return (0);
|
|
else if (*str1 == '\0')
|
|
return (-1);
|
|
else if (*str2 == '\0')
|
|
return (1);
|
|
else
|
|
return (*str1 - *str2);
|
|
}
|
|
|
|
int
|
|
s_strncmp(const Char *str1, const Char *str2, size_t n)
|
|
{
|
|
if (n == 0)
|
|
return (0);
|
|
do {
|
|
if (*str1 != *str2) {
|
|
/*
|
|
* The following case analysis is necessary so that characters
|
|
* which look negative collate low against normal characters
|
|
* but high against the end-of-string NUL.
|
|
*/
|
|
if (*str1 == '\0')
|
|
return (-1);
|
|
else if (*str2 == '\0')
|
|
return (1);
|
|
else
|
|
return (*str1 - *str2);
|
|
}
|
|
if (*str1 == '\0')
|
|
return(0);
|
|
str1++, str2++;
|
|
} while (--n != 0);
|
|
return(0);
|
|
}
|
|
#endif /* not WIDE_STRINGS */
|
|
|
|
int
|
|
s_strcasecmp(const Char *str1, const Char *str2)
|
|
{
|
|
#ifdef WIDE_STRINGS
|
|
wint_t l1 = 0, l2 = 0;
|
|
for (; *str1; str1++, str2++)
|
|
if (*str1 == *str2)
|
|
l1 = l2 = 0;
|
|
else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
|
|
break;
|
|
#else
|
|
unsigned char l1 = 0, l2 = 0;
|
|
for (; *str1; str1++, str2++)
|
|
if (*str1 == *str2)
|
|
l1 = l2 = 0;
|
|
else if ((l1 = tolower((unsigned char)*str1)) !=
|
|
(l2 = tolower((unsigned char)*str2)))
|
|
break;
|
|
#endif
|
|
/*
|
|
* The following case analysis is necessary so that characters which look
|
|
* negative collate low against normal characters but high against the
|
|
* end-of-string NUL.
|
|
*/
|
|
if (*str1 == '\0' && *str2 == '\0')
|
|
return (0);
|
|
else if (*str1 == '\0')
|
|
return (-1);
|
|
else if (*str2 == '\0')
|
|
return (1);
|
|
else if (l1 == l2) /* They are zero when they are equal */
|
|
return (*str1 - *str2);
|
|
else
|
|
return (l1 - l2);
|
|
}
|
|
|
|
Char *
|
|
s_strnsave(const Char *s, size_t len)
|
|
{
|
|
Char *n;
|
|
|
|
n = xmalloc((len + 1) * sizeof (*n));
|
|
memcpy(n, s, len * sizeof (*n));
|
|
n[len] = '\0';
|
|
return n;
|
|
}
|
|
|
|
Char *
|
|
s_strsave(const Char *s)
|
|
{
|
|
Char *n;
|
|
size_t size;
|
|
|
|
if (s == NULL)
|
|
s = STRNULL;
|
|
size = (Strlen(s) + 1) * sizeof(*n);
|
|
n = xmalloc(size);
|
|
memcpy(n, s, size);
|
|
return (n);
|
|
}
|
|
|
|
Char *
|
|
s_strspl(const Char *cp, const Char *dp)
|
|
{
|
|
Char *res, *ep;
|
|
const Char *p, *q;
|
|
|
|
if (!cp)
|
|
cp = STRNULL;
|
|
if (!dp)
|
|
dp = STRNULL;
|
|
for (p = cp; *p++;)
|
|
continue;
|
|
for (q = dp; *q++;)
|
|
continue;
|
|
res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
|
|
for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
|
|
continue;
|
|
for (ep--, q = dp; (*ep++ = *q++) != '\0';)
|
|
continue;
|
|
return (res);
|
|
}
|
|
|
|
Char *
|
|
s_strend(const Char *cp)
|
|
{
|
|
if (!cp)
|
|
return ((Char *)(intptr_t) cp);
|
|
while (*cp)
|
|
cp++;
|
|
return ((Char *)(intptr_t) cp);
|
|
}
|
|
|
|
Char *
|
|
s_strstr(const Char *s, const Char *t)
|
|
{
|
|
do {
|
|
const Char *ss = s;
|
|
const Char *tt = t;
|
|
|
|
do
|
|
if (*tt == '\0')
|
|
return ((Char *)(intptr_t) s);
|
|
while (*ss++ == *tt++);
|
|
} while (*s++ != '\0');
|
|
return (NULL);
|
|
}
|
|
|
|
#else /* !SHORT_STRINGS */
|
|
char *
|
|
caching_strip(const char *s)
|
|
{
|
|
static char *buf = NULL;
|
|
static size_t buf_size = 0;
|
|
size_t size;
|
|
|
|
if (s == NULL)
|
|
return NULL;
|
|
size = strlen(s) + 1;
|
|
if (buf_size < size) {
|
|
buf = xrealloc(buf, size);
|
|
buf_size = size;
|
|
}
|
|
memcpy(buf, s, size);
|
|
strip(buf);
|
|
return buf;
|
|
}
|
|
#endif
|
|
|
|
char *
|
|
short2qstr(const Char *src)
|
|
{
|
|
static char *sdst = NULL;
|
|
static size_t dstsize = 0;
|
|
char *dst, *edst;
|
|
|
|
if (src == NULL)
|
|
return (NULL);
|
|
|
|
if (sdst == NULL) {
|
|
dstsize = MALLOC_INCR;
|
|
sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
|
|
}
|
|
dst = sdst;
|
|
edst = &dst[dstsize];
|
|
while (*src) {
|
|
if (*src & QUOTE) {
|
|
*dst++ = '\\';
|
|
if (dst == edst) {
|
|
dstsize += MALLOC_INCR;
|
|
sdst = xrealloc(sdst,
|
|
(dstsize + MALLOC_SURPLUS) * sizeof(char));
|
|
edst = &sdst[dstsize];
|
|
dst = &edst[-MALLOC_INCR];
|
|
}
|
|
}
|
|
dst += one_wctomb(dst, *src & CHAR);
|
|
src++;
|
|
if (dst >= edst) {
|
|
ptrdiff_t i = dst - edst;
|
|
dstsize += MALLOC_INCR;
|
|
sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
|
|
edst = &sdst[dstsize];
|
|
dst = &edst[-MALLOC_INCR + i];
|
|
}
|
|
}
|
|
*dst = 0;
|
|
return (sdst);
|
|
}
|
|
|
|
struct blk_buf *
|
|
bb_alloc()
|
|
{
|
|
return xcalloc(1, sizeof(struct blk_buf));
|
|
}
|
|
|
|
static void
|
|
bb_store(struct blk_buf *bb, Char *str)
|
|
{
|
|
if (bb->len == bb->size) { /* Keep space for terminating NULL */
|
|
if (bb->size == 0)
|
|
bb->size = 16; /* Arbitrary */
|
|
else
|
|
bb->size *= 2;
|
|
bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
|
|
}
|
|
bb->vec[bb->len] = str;
|
|
}
|
|
|
|
void
|
|
bb_append(struct blk_buf *bb, Char *str)
|
|
{
|
|
bb_store(bb, str);
|
|
bb->len++;
|
|
}
|
|
|
|
void
|
|
bb_cleanup(void *xbb)
|
|
{
|
|
struct blk_buf *bb;
|
|
size_t i;
|
|
|
|
bb = xbb;
|
|
for (i = 0; i < bb->len; i++)
|
|
xfree(bb->vec[i]);
|
|
xfree(bb->vec);
|
|
}
|
|
|
|
void
|
|
bb_free(void *bb)
|
|
{
|
|
bb_cleanup(bb);
|
|
xfree(bb);
|
|
}
|
|
|
|
Char **
|
|
bb_finish(struct blk_buf *bb)
|
|
{
|
|
bb_store(bb, NULL);
|
|
return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
|
|
}
|
|
|
|
#define DO_STRBUF(STRBUF, CHAR, STRLEN) \
|
|
\
|
|
struct STRBUF * \
|
|
STRBUF##_alloc(void) \
|
|
{ \
|
|
return xcalloc(1, sizeof(struct STRBUF)); \
|
|
} \
|
|
\
|
|
static void \
|
|
STRBUF##_store1(struct STRBUF *buf, CHAR c) \
|
|
{ \
|
|
if (buf->size == buf->len) { \
|
|
if (buf->size == 0) \
|
|
buf->size = 64; /* Arbitrary */ \
|
|
else \
|
|
buf->size *= 2; \
|
|
buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
|
|
} \
|
|
assert(buf->s); \
|
|
buf->s[buf->len] = c; \
|
|
} \
|
|
\
|
|
/* Like strbuf_append1(buf, '\0'), but don't advance len */ \
|
|
void \
|
|
STRBUF##_terminate(struct STRBUF *buf) \
|
|
{ \
|
|
STRBUF##_store1(buf, '\0'); \
|
|
} \
|
|
\
|
|
void \
|
|
STRBUF##_append1(struct STRBUF *buf, CHAR c) \
|
|
{ \
|
|
STRBUF##_store1(buf, c); \
|
|
buf->len++; \
|
|
} \
|
|
\
|
|
void \
|
|
STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \
|
|
{ \
|
|
if (buf->size < buf->len + len) { \
|
|
if (buf->size == 0) \
|
|
buf->size = 64; /* Arbitrary */ \
|
|
while (buf->size < buf->len + len) \
|
|
buf->size *= 2; \
|
|
buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \
|
|
} \
|
|
memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \
|
|
buf->len += len; \
|
|
} \
|
|
\
|
|
void \
|
|
STRBUF##_append(struct STRBUF *buf, const CHAR *s) \
|
|
{ \
|
|
STRBUF##_appendn(buf, s, STRLEN(s)); \
|
|
} \
|
|
\
|
|
CHAR * \
|
|
STRBUF##_finish(struct STRBUF *buf) \
|
|
{ \
|
|
STRBUF##_append1(buf, 0); \
|
|
return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \
|
|
} \
|
|
\
|
|
void \
|
|
STRBUF##_cleanup(void *xbuf) \
|
|
{ \
|
|
struct STRBUF *buf; \
|
|
\
|
|
buf = xbuf; \
|
|
xfree(buf->s); \
|
|
} \
|
|
\
|
|
void \
|
|
STRBUF##_free(void *xbuf) \
|
|
{ \
|
|
STRBUF##_cleanup(xbuf); \
|
|
xfree(xbuf); \
|
|
} \
|
|
\
|
|
const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
|
|
|
|
DO_STRBUF(strbuf, char, strlen);
|
|
DO_STRBUF(Strbuf, Char, Strlen);
|