mirror of
https://git.FreeBSD.org/src.git
synced 2025-02-05 18:05:16 +00:00
Add <uchar.h>.
The <uchar.h> header, part of C11, adds a small number of utility functions for 16/32-bit "universal" characters, which may or may not be UTF-16/32. As our wchar_t is already ISO 10646, simply add light-weight wrappers around wcrtomb() and mbrtowc(). While there, also add (non-yet-standard) _l functions, similar to the ones we already have for the other locale-dependent functions. Reviewed by: theraven
This commit is contained in:
parent
89c5c3aab8
commit
50c77c6e8b
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=250883
@ -23,7 +23,7 @@ INCS= a.out.h ar.h assert.h bitstring.h complex.h cpio.h _ctype.h ctype.h \
|
||||
stdnoreturn.h stdio.h stdlib.h string.h stringlist.h \
|
||||
strings.h sysexits.h tar.h termios.h tgmath.h \
|
||||
time.h timeconv.h timers.h ttyent.h \
|
||||
ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \
|
||||
uchar.h ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \
|
||||
wchar.h wctype.h wordexp.h xlocale.h
|
||||
|
||||
.PATH: ${.CURDIR}/../contrib/libc-vis
|
||||
|
@ -145,10 +145,8 @@ typedef _Atomic(long) atomic_long;
|
||||
typedef _Atomic(unsigned long) atomic_ulong;
|
||||
typedef _Atomic(long long) atomic_llong;
|
||||
typedef _Atomic(unsigned long long) atomic_ullong;
|
||||
#if 0
|
||||
typedef _Atomic(__char16_t) atomic_char16_t;
|
||||
typedef _Atomic(__char32_t) atomic_char32_t;
|
||||
#endif
|
||||
typedef _Atomic(__wchar_t) atomic_wchar_t;
|
||||
typedef _Atomic(__int_least8_t) atomic_int_least8_t;
|
||||
typedef _Atomic(__uint_least8_t) atomic_uint_least8_t;
|
||||
|
60
include/uchar.h
Normal file
60
include/uchar.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _UCHAR_H_
|
||||
#define _UCHAR_H_
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <sys/_types.h>
|
||||
|
||||
#ifndef _MBSTATE_T_DECLARED
|
||||
typedef __mbstate_t mbstate_t;
|
||||
#define _MBSTATE_T_DECLARED
|
||||
#endif
|
||||
|
||||
#ifndef _SIZE_T_DECLARED
|
||||
typedef __size_t size_t;
|
||||
#define _SIZE_T_DECLARED
|
||||
#endif
|
||||
|
||||
typedef __char16_t char16_t;
|
||||
typedef __char32_t char32_t;
|
||||
|
||||
__BEGIN_DECLS
|
||||
size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
|
||||
size_t c32rtomb(char * __restrict, char32_t, mbstate_t * __restrict);
|
||||
size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
|
||||
mbstate_t * __restrict);
|
||||
size_t mbrtoc32(char32_t * __restrict, const char * __restrict, size_t,
|
||||
mbstate_t * __restrict);
|
||||
#if __BSD_VISIBLE || defined(_XLOCALE_H_)
|
||||
#include <xlocale/_uchar.h>
|
||||
#endif
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_UCHAR_H_ */
|
@ -2,7 +2,7 @@
|
||||
|
||||
NO_OBJ=
|
||||
INCS= _ctype.h _inttypes.h _langinfo.h _locale.h _monetary.h _stdio.h\
|
||||
_stdlib.h _string.h _time.h _wchar.h
|
||||
_stdlib.h _string.h _time.h _uchar.h _wchar.h
|
||||
INCSDIR=${INCLUDEDIR}/xlocale
|
||||
|
||||
.include <bsd.prog.mk>
|
||||
|
46
include/xlocale/_uchar.h
Normal file
46
include/xlocale/_uchar.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _LOCALE_T_DEFINED
|
||||
#define _LOCALE_T_DEFINED
|
||||
typedef struct _xlocale *locale_t;
|
||||
#endif
|
||||
|
||||
#ifndef _XLOCALE_UCHAR_H_
|
||||
#define _XLOCALE_UCHAR_H_
|
||||
|
||||
size_t c16rtomb_l(char * __restrict, char16_t, mbstate_t * __restrict,
|
||||
locale_t);
|
||||
size_t c32rtomb_l(char * __restrict, char32_t, mbstate_t * __restrict,
|
||||
locale_t);
|
||||
size_t mbrtoc16_l(char16_t * __restrict, const char * __restrict, size_t,
|
||||
mbstate_t * __restrict, locale_t);
|
||||
size_t mbrtoc32_l(char32_t * __restrict, const char * __restrict, size_t,
|
||||
mbstate_t * __restrict, locale_t);
|
||||
|
||||
#endif /* _XLOCALE_UCHAR_H_ */
|
@ -4,11 +4,11 @@
|
||||
# locale sources
|
||||
.PATH: ${.CURDIR}/${LIBC_ARCH}/locale ${.CURDIR}/locale
|
||||
|
||||
SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
|
||||
gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
|
||||
SRCS+= ascii.c big5.c btowc.c c16rtomb.c c32rtomb.c collate.c collcmp.c euc.c \
|
||||
fix_grouping.c gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
|
||||
ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \
|
||||
mbrlen.c \
|
||||
mbrtowc.c mbsinit.c mbsnrtowcs.c \
|
||||
mbrtoc16.c mbrtoc32.c mbrtowc.c mbsinit.c mbsnrtowcs.c \
|
||||
mbsrtowcs.c mbtowc.c mbstowcs.c \
|
||||
mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rpmatch.c \
|
||||
rune.c \
|
||||
@ -72,7 +72,9 @@ MLINKS+=iswalnum_l.3 iswalpha_l.3 iswalnum_l.3 iswcntrl_l.3 \
|
||||
iswalnum_l.3 iswspecial_l.3 iswalnum_l.3 nextwctype_l.3 \
|
||||
iswalnum_l.3 towctrans_l.3 iswalnum_l.3 wctrans_l.3
|
||||
MLINKS+=isxdigit.3 ishexnumber.3
|
||||
MLINKS+=mbrtowc.3 mbrtoc16.3 mbrtowc.3 mbrtoc32.3
|
||||
MLINKS+=mbsrtowcs.3 mbsnrtowcs.3
|
||||
MLINKS+=wcrtomb.3 c16rtomb.3 wcrtomb.3 c32rtomb.3
|
||||
MLINKS+=wcsrtombs.3 wcsnrtombs.3
|
||||
MLINKS+=wcstod.3 wcstof.3 wcstod.3 wcstold.3
|
||||
MLINKS+=wcstol.3 wcstoul.3 wcstol.3 wcstoll.3 wcstol.3 wcstoull.3 \
|
||||
|
@ -199,6 +199,14 @@ FBSD_1.3 {
|
||||
__istype_l;
|
||||
__runes_for_locale;
|
||||
_ThreadRuneLocale;
|
||||
c16rtomb;
|
||||
c16rtomb_l;
|
||||
c32rtomb;
|
||||
c32rtomb_l;
|
||||
mbrtoc16;
|
||||
mbrtoc16_l;
|
||||
mbrtoc32;
|
||||
mbrtoc32_l;
|
||||
};
|
||||
|
||||
FBSDprivate_1.0 {
|
||||
|
81
lib/libc/locale/c16rtomb.c
Normal file
81
lib/libc/locale/c16rtomb.c
Normal file
@ -0,0 +1,81 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <errno.h>
|
||||
#include <uchar.h>
|
||||
#include "xlocale_private.h"
|
||||
|
||||
typedef struct {
|
||||
char16_t lead_surrogate;
|
||||
mbstate_t c32_mbstate;
|
||||
} _Char16State;
|
||||
|
||||
size_t
|
||||
c16rtomb_l(char * __restrict s, char16_t c16, mbstate_t * __restrict ps,
|
||||
locale_t locale)
|
||||
{
|
||||
_Char16State *cs;
|
||||
char32_t c32;
|
||||
|
||||
FIX_LOCALE(locale);
|
||||
if (ps == NULL)
|
||||
ps = &locale->c16rtomb;
|
||||
cs = (_Char16State *)ps;
|
||||
|
||||
/* If s is a null pointer, the value of parameter c16 is ignored. */
|
||||
if (s == NULL) {
|
||||
c32 = 0;
|
||||
} else if (cs->lead_surrogate >= 0xd800 &&
|
||||
cs->lead_surrogate <= 0xdbff) {
|
||||
/* We should see a trail surrogate now. */
|
||||
if (c16 < 0xdc00 || c16 > 0xdfff) {
|
||||
errno = EILSEQ;
|
||||
return ((size_t)-1);
|
||||
}
|
||||
c32 = 0x10000 + ((cs->lead_surrogate & 0x3ff) << 10 |
|
||||
(c16 & 0x3ff));
|
||||
} else if (c16 >= 0xd800 && c16 <= 0xdbff) {
|
||||
/* Store lead surrogate for next invocation. */
|
||||
cs->lead_surrogate = c16;
|
||||
return (0);
|
||||
} else {
|
||||
/* Regular character. */
|
||||
c32 = c16;
|
||||
}
|
||||
cs->lead_surrogate = 0;
|
||||
|
||||
return (c32rtomb_l(s, c32, &cs->c32_mbstate, locale));
|
||||
}
|
||||
|
||||
size_t
|
||||
c16rtomb(char * __restrict s, char16_t c16, mbstate_t * __restrict ps)
|
||||
{
|
||||
|
||||
return (c16rtomb_l(s, c16, ps, __get_locale()));
|
||||
}
|
59
lib/libc/locale/c32rtomb.c
Normal file
59
lib/libc/locale/c32rtomb.c
Normal file
@ -0,0 +1,59 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <errno.h>
|
||||
#include <uchar.h>
|
||||
#include <wchar.h>
|
||||
#include "xlocale_private.h"
|
||||
|
||||
size_t
|
||||
c32rtomb_l(char * __restrict s, char32_t c32, mbstate_t * __restrict ps,
|
||||
locale_t locale)
|
||||
{
|
||||
|
||||
/* Unicode Standard 5.0, D90: ill-formed characters. */
|
||||
if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 > 0x10ffff) {
|
||||
errno = EILSEQ;
|
||||
return ((size_t)-1);
|
||||
}
|
||||
|
||||
FIX_LOCALE(locale);
|
||||
if (ps == NULL)
|
||||
ps = &locale->c32rtomb;
|
||||
|
||||
/* Assume wchar_t uses UTF-32. */
|
||||
return (wcrtomb_l(s, c32, ps, locale));
|
||||
}
|
||||
|
||||
size_t
|
||||
c32rtomb(char * __restrict s, char32_t c32, mbstate_t * __restrict ps)
|
||||
{
|
||||
|
||||
return (c32rtomb_l(s, c32, ps, __get_locale()));
|
||||
}
|
89
lib/libc/locale/mbrtoc16.c
Normal file
89
lib/libc/locale/mbrtoc16.c
Normal file
@ -0,0 +1,89 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <uchar.h>
|
||||
#include "xlocale_private.h"
|
||||
|
||||
typedef struct {
|
||||
char16_t trail_surrogate;
|
||||
mbstate_t c32_mbstate;
|
||||
} _Char16State;
|
||||
|
||||
size_t
|
||||
mbrtoc16_l(char16_t * __restrict pc16, const char * __restrict s, size_t n,
|
||||
mbstate_t * __restrict ps, locale_t locale)
|
||||
{
|
||||
_Char16State *cs;
|
||||
char32_t c32;
|
||||
ssize_t len;
|
||||
|
||||
FIX_LOCALE(locale);
|
||||
if (ps == NULL)
|
||||
ps = &locale->mbrtoc16;
|
||||
cs = (_Char16State *)ps;
|
||||
|
||||
/*
|
||||
* Call straight into mbrtoc32_l() if we don't need to return a
|
||||
* character value. According to the spec, if s is a null
|
||||
* pointer, the value of parameter pc16 is also ignored.
|
||||
*/
|
||||
if (pc16 == NULL || s == NULL) {
|
||||
cs->trail_surrogate = 0;
|
||||
return (mbrtoc32_l(NULL, s, n, &cs->c32_mbstate, locale));
|
||||
}
|
||||
|
||||
/* Return the trail surrogate from the previous invocation. */
|
||||
if (cs->trail_surrogate >= 0xdc00 && cs->trail_surrogate <= 0xdfff) {
|
||||
*pc16 = cs->trail_surrogate;
|
||||
cs->trail_surrogate = 0;
|
||||
return ((size_t)-3);
|
||||
}
|
||||
|
||||
len = mbrtoc32_l(&c32, s, n, &cs->c32_mbstate, locale);
|
||||
if (len >= 0) {
|
||||
if (c32 < 0x10000) {
|
||||
/* Fits in one UTF-16 character. */
|
||||
*pc16 = c32;
|
||||
} else {
|
||||
/* Split up in a surrogate pair. */
|
||||
c32 -= 0x10000;
|
||||
*pc16 = 0xd800 | (c32 >> 10);
|
||||
cs->trail_surrogate = 0xdc00 | (c32 & 0x3ff);
|
||||
}
|
||||
}
|
||||
return (len);
|
||||
}
|
||||
|
||||
size_t
|
||||
mbrtoc16(char16_t * __restrict pc16, const char * __restrict s, size_t n,
|
||||
mbstate_t * __restrict ps)
|
||||
{
|
||||
|
||||
return (mbrtoc16_l(pc16, s, n, ps, __get_locale()));
|
||||
}
|
53
lib/libc/locale/mbrtoc32.c
Normal file
53
lib/libc/locale/mbrtoc32.c
Normal file
@ -0,0 +1,53 @@
|
||||
/*-
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <uchar.h>
|
||||
#include <wchar.h>
|
||||
#include "xlocale_private.h"
|
||||
|
||||
size_t
|
||||
mbrtoc32_l(char32_t * __restrict pc32, const char * __restrict s, size_t n,
|
||||
mbstate_t * __restrict ps, locale_t locale)
|
||||
{
|
||||
|
||||
FIX_LOCALE(locale);
|
||||
if (ps == NULL)
|
||||
ps = &locale->mbrtoc32;
|
||||
|
||||
/* Assume wchar_t uses UTF-32. */
|
||||
return (mbrtowc_l(pc32, s, n, ps, locale));
|
||||
}
|
||||
|
||||
size_t
|
||||
mbrtoc32(char32_t * __restrict pc32, const char * __restrict s, size_t n,
|
||||
mbstate_t * __restrict ps)
|
||||
{
|
||||
|
||||
return (mbrtoc32_l(pc32, s, n, ps, __get_locale()));
|
||||
}
|
@ -24,11 +24,13 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd April 8, 2004
|
||||
.Dd May 21, 2013
|
||||
.Dt MBRTOWC 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm mbrtowc
|
||||
.Nm mbrtowc ,
|
||||
.Nm mbrtoc16 ,
|
||||
.Nm mbrtoc32
|
||||
.Nd "convert a character to a wide-character code (restartable)"
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
@ -36,35 +38,51 @@
|
||||
.In wchar.h
|
||||
.Ft size_t
|
||||
.Fo mbrtowc
|
||||
.Fa "wchar_t * restrict pwc" "const char * restrict s" "size_t n"
|
||||
.Fa "wchar_t * restrict pc" "const char * restrict s" "size_t n"
|
||||
.Fa "mbstate_t * restrict ps"
|
||||
.Fc
|
||||
.In uchar.h
|
||||
.Ft size_t
|
||||
.Fo mbrtoc16
|
||||
.Fa "char16_t * restrict pc" "const char * restrict s" "size_t n"
|
||||
.Fa "mbstate_t * restrict ps"
|
||||
.Fc
|
||||
.Ft size_t
|
||||
.Fo mbrtoc32
|
||||
.Fa "char32_t * restrict pc" "const char * restrict s" "size_t n"
|
||||
.Fa "mbstate_t * restrict ps"
|
||||
.Fc
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn mbrtowc
|
||||
function inspects at most
|
||||
.Fn mbrtowc ,
|
||||
.Fn mbrtoc16
|
||||
and
|
||||
.Fn mbrtoc32
|
||||
functions inspect at most
|
||||
.Fa n
|
||||
bytes pointed to by
|
||||
.Fa s
|
||||
to determine the number of bytes needed to complete the next multibyte
|
||||
character.
|
||||
If a character can be completed, and
|
||||
.Fa pwc
|
||||
.Fa pc
|
||||
is not
|
||||
.Dv NULL ,
|
||||
the wide character which is represented by
|
||||
.Fa s
|
||||
is stored in the
|
||||
.Vt wchar_t
|
||||
.Vt wchar_t ,
|
||||
.Vt char16_t
|
||||
or
|
||||
.Vt char32_t
|
||||
it points to.
|
||||
.Pp
|
||||
If
|
||||
.Fa s
|
||||
is
|
||||
.Dv NULL ,
|
||||
.Fn mbrtowc
|
||||
behaves as if
|
||||
.Fa pwc
|
||||
these functions behave as if
|
||||
.Fa pc
|
||||
was
|
||||
.Dv NULL ,
|
||||
.Fa s
|
||||
@ -81,15 +99,24 @@ argument,
|
||||
is used to keep track of the shift state.
|
||||
If it is
|
||||
.Dv NULL ,
|
||||
.Fn mbrtowc
|
||||
uses an internal, static
|
||||
these functions use an internal, static
|
||||
.Vt mbstate_t
|
||||
object, which is initialized to the initial conversion state
|
||||
at program startup.
|
||||
.Pp
|
||||
As a single
|
||||
.Vt char16_t
|
||||
is not large enough to represent certain multibyte characters, the
|
||||
.Fn mbrtoc16
|
||||
function may need to be invoked multiple times to convert a single
|
||||
multibyte character sequence.
|
||||
.Sh RETURN VALUES
|
||||
The
|
||||
.Fn mbrtowc
|
||||
functions returns:
|
||||
.Fn mbrtowc ,
|
||||
.Fn mbrtoc16
|
||||
and
|
||||
.Fn mbrtoc32
|
||||
functions return:
|
||||
.Bl -tag -width indent
|
||||
.It 0
|
||||
The next
|
||||
@ -100,10 +127,13 @@ represent the null wide character
|
||||
.It >0
|
||||
The next
|
||||
.Fa n
|
||||
or fewer bytes
|
||||
represent a valid character,
|
||||
.Fn mbrtowc
|
||||
returns the number of bytes used to complete the multibyte character.
|
||||
or fewer bytes represent a valid character, these functions
|
||||
return the number of bytes used to complete the multibyte character.
|
||||
.It Po Vt size_t Pc Ns \-1
|
||||
An encoding error has occurred.
|
||||
The next
|
||||
.Fa n
|
||||
or fewer bytes do not contribute to a valid multibyte character.
|
||||
.It Po Vt size_t Pc Ns \-2
|
||||
The next
|
||||
.Fa n
|
||||
@ -111,16 +141,23 @@ contribute to, but do not complete, a valid multibyte character sequence,
|
||||
and all
|
||||
.Fa n
|
||||
bytes have been processed.
|
||||
.It Po Vt size_t Pc Ns \-1
|
||||
An encoding error has occurred.
|
||||
The next
|
||||
.Fa n
|
||||
or fewer bytes do not contribute to a valid multibyte character.
|
||||
.El
|
||||
.Pp
|
||||
The
|
||||
.Fn mbrtoc16
|
||||
function also returns:
|
||||
.Bl -tag -width indent
|
||||
.It Po Vt size_t Pc Ns \-3
|
||||
The next character resulting from a previous call has been stored.
|
||||
No bytes from the input have been consumed.
|
||||
.El
|
||||
.Sh ERRORS
|
||||
The
|
||||
.Fn mbrtowc
|
||||
function will fail if:
|
||||
.Fn mbrtowc ,
|
||||
.Fn mbrtoc16
|
||||
and
|
||||
.Fn mbrtoc32
|
||||
functions will fail if:
|
||||
.Bl -tag -width Er
|
||||
.It Bq Er EILSEQ
|
||||
An invalid multibyte sequence was detected.
|
||||
@ -134,6 +171,9 @@ The conversion state is invalid.
|
||||
.Xr wcrtomb 3
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Fn mbrtowc
|
||||
function conforms to
|
||||
.St -isoC-99 .
|
||||
.Fn mbrtowc ,
|
||||
.Fn mbrtoc16
|
||||
and
|
||||
.Fn mbrtoc32
|
||||
functions conform to
|
||||
.St -isoC-2011 .
|
||||
|
@ -24,24 +24,34 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd April 8, 2004
|
||||
.Dd May 21, 2013
|
||||
.Dt WCRTOMB 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm wcrtomb
|
||||
.Nm wcrtomb ,
|
||||
.Nm c16rtomb ,
|
||||
.Nm c32rtomb
|
||||
.Nd "convert a wide-character code to a character (restartable)"
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
.Sh SYNOPSIS
|
||||
.In wchar.h
|
||||
.Ft size_t
|
||||
.Fn wcrtomb "char * restrict s" "wchar_t wc" "mbstate_t * restrict ps"
|
||||
.Fn wcrtomb "char * restrict s" "wchar_t c" "mbstate_t * restrict ps"
|
||||
.In uchar.h
|
||||
.Ft size_t
|
||||
.Fn c16rtomb "char * restrict s" "char16_t c" "mbstate_t * restrict ps"
|
||||
.Ft size_t
|
||||
.Fn c32rtomb "char * restrict s" "char32_t c" "mbstate_t * restrict ps"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn wcrtomb
|
||||
function stores a multibyte sequence representing the
|
||||
.Fn wcrtomb ,
|
||||
.Fn c16rtomb
|
||||
and
|
||||
.Fn c32rtomb
|
||||
functions store a multibyte sequence representing the
|
||||
wide character
|
||||
.Fa wc ,
|
||||
.Fa c ,
|
||||
including any necessary shift sequences, to the
|
||||
character array
|
||||
.Fa s ,
|
||||
@ -53,11 +63,10 @@ If
|
||||
.Fa s
|
||||
is
|
||||
.Dv NULL ,
|
||||
.Fn wcrtomb
|
||||
behaves as if
|
||||
these functions behave as if
|
||||
.Fa s
|
||||
pointed to an internal buffer and
|
||||
.Fa wc
|
||||
.Fa c
|
||||
was a null wide character (L'\e0').
|
||||
.Pp
|
||||
The
|
||||
@ -67,26 +76,32 @@ argument,
|
||||
is used to keep track of the shift state.
|
||||
If it is
|
||||
.Dv NULL ,
|
||||
.Fn wcrtomb
|
||||
uses an internal, static
|
||||
these functions use an internal, static
|
||||
.Vt mbstate_t
|
||||
object, which is initialized to the initial conversion state
|
||||
at program startup.
|
||||
.Pp
|
||||
As certain multibyte characters may only be represented by a series of
|
||||
16-bit characters, the
|
||||
.Fn c16rtomb
|
||||
may need to invoked multiple times before a multibyte sequence is
|
||||
returned.
|
||||
.Sh RETURN VALUES
|
||||
The
|
||||
.Fn wcrtomb
|
||||
functions returns the length (in bytes) of the multibyte sequence
|
||||
These functions return the length (in bytes) of the multibyte sequence
|
||||
needed to represent
|
||||
.Fa wc ,
|
||||
.Fa c ,
|
||||
or
|
||||
.Po Vt size_t Pc Ns \-1
|
||||
if
|
||||
.Fa wc
|
||||
.Fa c
|
||||
is not a valid wide character code.
|
||||
.Sh ERRORS
|
||||
The
|
||||
.Fn wcrtomb
|
||||
function will fail if:
|
||||
.Fn wcrtomb ,
|
||||
.Fn c16rtomb
|
||||
and
|
||||
.Fn c32rtomb
|
||||
functions will fail if:
|
||||
.Bl -tag -width Er
|
||||
.It Bq Er EILSEQ
|
||||
An invalid wide character code was specified.
|
||||
@ -100,6 +115,9 @@ The conversion state is invalid.
|
||||
.Xr wctomb 3
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Fn wcrtomb
|
||||
function conforms to
|
||||
.St -isoC-99 .
|
||||
.Fn wcrtomb ,
|
||||
.Fn c16rtomb
|
||||
and
|
||||
.Fn c32rtomb
|
||||
functions conform to
|
||||
.St -isoC-2011 .
|
||||
|
@ -109,6 +109,10 @@ struct _xlocale {
|
||||
__mbstate_t mblen;
|
||||
/** Persistent state used by mbrlen() calls. */
|
||||
__mbstate_t mbrlen;
|
||||
/** Persistent state used by mbrtoc16() calls. */
|
||||
__mbstate_t mbrtoc16;
|
||||
/** Persistent state used by mbrtoc32() calls. */
|
||||
__mbstate_t mbrtoc32;
|
||||
/** Persistent state used by mbrtowc() calls. */
|
||||
__mbstate_t mbrtowc;
|
||||
/** Persistent state used by mbsnrtowcs() calls. */
|
||||
@ -117,6 +121,10 @@ struct _xlocale {
|
||||
__mbstate_t mbsrtowcs;
|
||||
/** Persistent state used by mbtowc() calls. */
|
||||
__mbstate_t mbtowc;
|
||||
/** Persistent state used by c16rtomb() calls. */
|
||||
__mbstate_t c16rtomb;
|
||||
/** Persistent state used by c32rtomb() calls. */
|
||||
__mbstate_t c32rtomb;
|
||||
/** Persistent state used by wcrtomb() calls. */
|
||||
__mbstate_t wcrtomb;
|
||||
/** Persistent state used by wcsnrtombs() calls. */
|
||||
|
@ -89,6 +89,12 @@ typedef int __ct_rune_t; /* arg type for ctype funcs */
|
||||
typedef __ct_rune_t __rune_t; /* rune_t (see above) */
|
||||
typedef __ct_rune_t __wint_t; /* wint_t (see above) */
|
||||
|
||||
/* Clang already provides these types as built-ins, but only in C++ mode. */
|
||||
#if !defined(__clang__) || !defined(__cplusplus)
|
||||
typedef __uint_least16_t __char16_t;
|
||||
typedef __uint_least32_t __char32_t;
|
||||
#endif
|
||||
|
||||
typedef __uint32_t __dev_t; /* device number */
|
||||
|
||||
typedef __uint32_t __fixpt_t; /* fixed point number */
|
||||
|
@ -14,7 +14,9 @@ TESTS= test-mbrtowc \
|
||||
test-wcstombs \
|
||||
test-mblen \
|
||||
test-iswctype \
|
||||
test-towctrans
|
||||
test-towctrans \
|
||||
test-c16rtomb \
|
||||
test-mbrtoc16
|
||||
|
||||
.PHONY: tests
|
||||
tests: ${TESTS}
|
||||
|
115
tools/regression/lib/libc/locale/test-c16rtomb.c
Normal file
115
tools/regression/lib/libc/locale/test-c16rtomb.c
Normal file
@ -0,0 +1,115 @@
|
||||
/*-
|
||||
* Copyright (c) 2002 Tim J. Robbins
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <uchar.h>
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
mbstate_t s;
|
||||
char buf[MB_LEN_MAX + 1];
|
||||
|
||||
/*
|
||||
* C/POSIX locale.
|
||||
*/
|
||||
|
||||
printf("1..1\n");
|
||||
|
||||
/*
|
||||
* If the buffer argument is NULL, c16 is implicitly 0,
|
||||
* c16rtomb() resets its internal state.
|
||||
*/
|
||||
assert(c16rtomb(NULL, L'\0', NULL) == 1);
|
||||
assert(c16rtomb(NULL, 0xdc00, NULL) == 1);
|
||||
|
||||
/* Null wide character. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(buf, 0xcc, sizeof(buf));
|
||||
assert(c16rtomb(buf, 0, &s) == 1);
|
||||
assert((unsigned char)buf[0] == 0 && (unsigned char)buf[1] == 0xcc);
|
||||
|
||||
/* Latin letter A, internal state. */
|
||||
assert(c16rtomb(NULL, L'\0', NULL) == 1);
|
||||
assert(c16rtomb(NULL, L'A', NULL) == 1);
|
||||
|
||||
/* Latin letter A. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(buf, 0xcc, sizeof(buf));
|
||||
assert(c16rtomb(buf, L'A', &s) == 1);
|
||||
assert((unsigned char)buf[0] == 'A' && (unsigned char)buf[1] == 0xcc);
|
||||
|
||||
/* Unicode character 'Pile of poo'. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(buf, 0xcc, sizeof(buf));
|
||||
assert(c16rtomb(buf, 0xd83d, &s) == 0);
|
||||
assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1);
|
||||
assert(errno == EILSEQ);
|
||||
|
||||
/*
|
||||
* UTF-8.
|
||||
*/
|
||||
|
||||
assert(strcmp(setlocale(LC_CTYPE, "en_US.UTF-8"), "en_US.UTF-8") == 0);
|
||||
|
||||
/* Unicode character 'Pile of poo'. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(buf, 0xcc, sizeof(buf));
|
||||
assert(c16rtomb(buf, 0xd83d, &s) == 0);
|
||||
assert(c16rtomb(buf, 0xdca9, &s) == 4);
|
||||
assert((unsigned char)buf[0] == 0xf0 && (unsigned char)buf[1] == 0x9f &&
|
||||
(unsigned char)buf[2] == 0x92 && (unsigned char)buf[3] == 0xa9 &&
|
||||
(unsigned char)buf[4] == 0xcc);
|
||||
|
||||
/* Invalid code; 'Pile of poo' without the trail surrogate. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(buf, 0xcc, sizeof(buf));
|
||||
assert(c16rtomb(buf, 0xd83d, &s) == 0);
|
||||
assert(c16rtomb(buf, L'A', &s) == (size_t)-1);
|
||||
assert(errno == EILSEQ);
|
||||
|
||||
/* Invalid code; 'Pile of poo' without the lead surrogate. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(buf, 0xcc, sizeof(buf));
|
||||
assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1);
|
||||
assert(errno == EILSEQ);
|
||||
|
||||
printf("ok 1 - c16rtomb()\n");
|
||||
}
|
150
tools/regression/lib/libc/locale/test-mbrtoc16.c
Normal file
150
tools/regression/lib/libc/locale/test-mbrtoc16.c
Normal file
@ -0,0 +1,150 @@
|
||||
/*-
|
||||
* Copyright (c) 2002 Tim J. Robbins
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <uchar.h>
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
mbstate_t s;
|
||||
size_t len;
|
||||
char16_t c16;
|
||||
|
||||
/*
|
||||
* C/POSIX locale.
|
||||
*/
|
||||
|
||||
printf("1..1\n");
|
||||
|
||||
/* Null wide character, internal state. */
|
||||
assert(mbrtoc16(&c16, "", 1, NULL) == 0);
|
||||
assert(c16 == 0);
|
||||
|
||||
/* Null wide character. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "", 1, &s) == 0);
|
||||
assert(c16 == 0);
|
||||
|
||||
/* Latin letter A, internal state. */
|
||||
assert(mbrtoc16(NULL, 0, 0, NULL) == 0);
|
||||
assert(mbrtoc16(&c16, "A", 1, NULL) == 1);
|
||||
assert(c16 == L'A');
|
||||
|
||||
/* Latin letter A. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "A", 1, &s) == 1);
|
||||
assert(c16 == L'A');
|
||||
|
||||
/* Incomplete character sequence. */
|
||||
c16 = L'z';
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
|
||||
assert(c16 == L'z');
|
||||
|
||||
/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
|
||||
c16 = L'z';
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
|
||||
assert(c16 == L'z');
|
||||
|
||||
/*
|
||||
* UTF-8.
|
||||
*/
|
||||
|
||||
assert(strcmp(setlocale(LC_CTYPE, "en_US.UTF-8"), "en_US.UTF-8") == 0);
|
||||
|
||||
/* Null wide character, internal state. */
|
||||
assert(mbrtoc16(NULL, 0, 0, NULL) == 0);
|
||||
assert(mbrtoc16(&c16, "", 1, NULL) == 0);
|
||||
assert(c16 == 0);
|
||||
|
||||
/* Null wide character. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "", 1, &s) == 0);
|
||||
assert(c16 == 0);
|
||||
|
||||
/* Latin letter A, internal state. */
|
||||
assert(mbrtoc16(NULL, 0, 0, NULL) == 0);
|
||||
assert(mbrtoc16(&c16, "A", 1, NULL) == 1);
|
||||
assert(c16 == L'A');
|
||||
|
||||
/* Latin letter A. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "A", 1, &s) == 1);
|
||||
assert(c16 == L'A');
|
||||
|
||||
/* Incomplete character sequence (zero length). */
|
||||
c16 = L'z';
|
||||
memset(&s, 0, sizeof(s));
|
||||
assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
|
||||
assert(c16 == L'z');
|
||||
|
||||
/* Incomplete character sequence (truncated double-byte). */
|
||||
memset(&s, 0, sizeof(s));
|
||||
c16 = 0;
|
||||
assert(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
|
||||
|
||||
/* Same as above, but complete. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
c16 = 0;
|
||||
assert(mbrtoc16(&c16, "\xc3\x84", 2, &s) == 2);
|
||||
assert(c16 == 0xc4);
|
||||
|
||||
/* Test restarting behaviour. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
c16 = 0;
|
||||
assert(mbrtoc16(&c16, "\xc3", 1, &s) == (size_t)-2);
|
||||
assert(c16 == 0);
|
||||
assert(mbrtoc16(&c16, "\xb7", 1, &s) == 1);
|
||||
assert(c16 == 0xf7);
|
||||
|
||||
/* Surrogate pair. */
|
||||
memset(&s, 0, sizeof(s));
|
||||
c16 = 0;
|
||||
assert(mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s) == 4);
|
||||
assert(c16 == 0xd83d);
|
||||
assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-3);
|
||||
assert(c16 == 0xdca9);
|
||||
|
||||
printf("ok 1 - mbrtoc16()\n");
|
||||
|
||||
return (0);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user