1
0
mirror of https://git.FreeBSD.org/ports.git synced 2024-11-27 00:57:50 +00:00

Add unidesc 2.12, Unicode Description Utilities.

Unidesc consists of four programs for finding out what is in a Unicode file.

They are useful when working with Unicode files when one doesn't know the
writing system, doesn't have the necessary font, needs to inspect invisible
characters, needs to find out whether characters have been combined or in what
order they occur, or needs statistics on which characters occur.
This commit is contained in:
Thierry Thomas 2005-05-15 15:54:36 +00:00
parent a62d1203b7
commit 71fa9a6779
Notes: svn2git 2021-03-31 03:12:20 +00:00
svn path=/head/; revision=135293
11 changed files with 219 additions and 0 deletions

View File

@ -478,6 +478,7 @@
SUBDIR += ucspi-unix
SUBDIR += udesc_dump
SUBDIR += udfclient
SUBDIR += unidesc
SUBDIR += unquote
SUBDIR += upsd
SUBDIR += upsmon

45
sysutils/unidesc/Makefile Normal file
View File

@ -0,0 +1,45 @@
# New ports collection makefile for: unidesc
# Date created: Sun 15 May 2005
# Whom: thierry@pompo.net
#
# $FreeBSD$
#
PORTNAME= unidesc
PORTVERSION= 2.12
CATEGORIES= sysutils textproc
MASTER_SITES= http://www.cis.upenn.edu/~wjposer/.downloads/
DISTNAME= ${PORTNAME}
EXTRACT_SUFX= .tgz
DIST_SUBDIR= ${PORTNAME}-${PORTVERSION}
MAINTAINER= thierry@FreeBSD.org
COMMENT= Unicode Description Utilities
USE_GETTEXT= yes
USE_REINPLACE= yes
TESTFILES= TestData/Test1.ann TestData/Test1.u TestData/Test2.ann \
TestData/Test2.u TestData/Test3.ann TestData/Test3.u \
TestData/Test4.ann TestData/Test4.u
DEMOS= demo.jpg demo.u demo32.u
PORTDOCS= README
MAN1= unidesc.1 uniname.1 ExplicateUTF8.1 unihist.1
WRKSRC= ${WRKDIR}/${PORTNAME:U}
pre-configure:
@${REINPLACE_CMD} -e 's|/usr/local|${PREFIX}|;s|%%LOCALBASE%%|${LOCALBASE}|' \
${WRKSRC}/Makefile
@${REINPLACE_CMD} -e 's|TestData|${EXAMPLESDIR}|' ${WRKSRC}/README
post-install:
.if !defined(NOPORTDOCS)
${MKDIR} ${DOCSDIR} ${EXAMPLESDIR}
${INSTALL_DATA} ${PORTDOCS:S|^|${WRKSRC}/|} ${DOCSDIR}
${INSTALL_DATA} ${TESTFILES:S|^|${WRKSRC}/|} ${EXAMPLESDIR}
${INSTALL_DATA} ${DEMOS:S|^|${WRKSRC}/|} ${EXAMPLESDIR}
.endif
.include <bsd.port.mk>

View File

@ -0,0 +1,2 @@
MD5 (unidesc-2.12/unidesc.tgz) = 7ec1c6584842403992837e238ee1462d
SIZE (unidesc-2.12/unidesc.tgz) = 204808

View File

@ -0,0 +1,23 @@
--- Makefile.orig Sun Jan 9 20:55:19 2005
+++ Makefile Sun May 15 16:11:22 2005
@@ -5,8 +5,9 @@
BINOBJS= unidesc.o uniname.o unames.o ExplicateUTF8.o unihist.o
MANS= unidesc.1 uniname.1 ExplicateUTF8.1 unihist.1
-CFLAGS= -pedantic -O
-CC=gcc
+CFLAGS+= -pedantic -I%%LOCALBASE%%/include
+LDFLAGS+= -L%%LOCALBASE%%/lib -lintl
+# CC=gcc
all: ${BINS}
@@ -42,7 +43,7 @@
${CC} -c ${CFLAGS} unihist.c
unihist: unihist.o Get_UTF32_From_UTF8.o putu8.o
- ${CC} -o unihist unihist.o Get_UTF32_From_UTF8.o putu8.o
+ ${CC} -o unihist unihist.o Get_UTF32_From_UTF8.o putu8.o ${LDFLAGS}
ExplicateUTF8.o: ExplicateUTF8.c

View File

@ -0,0 +1,23 @@
Unidesc consists of four programs for finding out what is in a Unicode file.
They are useful when working with Unicode files when one doesn't know the
writing system, doesn't have the necessary font, needs to inspect invisible
characters, needs to find out whether characters have been combined or in what
order they occur, or needs statistics on which characters occur.
uniname defaults to printing the character offset of each character, its byte
offset, its hex code value, its encoding, the glyph itself, and its name.
unidesc reports the character ranges to which different portions of the text
belong. It can also be used to identify Unicode encodings (e.g. UTF-16be)
flagged by magic numbers.
unihist generates a histogram of the characters in its input, which must be
encoded in UTF-8 Unicode. By default, for each character it prints the
frequency of the character as a percentage of the total, the absolute number of
tokens in the input, the UTF-32 code in hexadecimal, and, if the character is
displayable, the glyph itself as UTF-8 Unicode.
ExplicateUTF8 is intended for debugging or for learning about Unicode. It
determines and explains the validity of a sequence of bytes as a UTF8 encoding.
WWW: http://www.cis.upenn.edu/~wjposer/unidesc.html

View File

@ -0,0 +1,16 @@
bin/ExplicateUTF8
bin/unidesc
bin/unihist
bin/uniname
%%PORTDOCS%%%%EXAMPLESDIR%%/Test1.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test1.u
%%PORTDOCS%%%%EXAMPLESDIR%%/Test2.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test2.u
%%PORTDOCS%%%%EXAMPLESDIR%%/Test3.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test3.u
%%PORTDOCS%%%%EXAMPLESDIR%%/Test4.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test4.u
%%PORTDOCS%%%%EXAMPLESDIR%%/demo.jpg
%%PORTDOCS%%%%EXAMPLESDIR%%/demo.u
%%PORTDOCS%%%%EXAMPLESDIR%%/demo32.u
%%PORTDOCS%%@dirrm %%EXAMPLESDIR%%

View File

@ -0,0 +1,45 @@
# New ports collection makefile for: unidesc
# Date created: Sun 15 May 2005
# Whom: thierry@pompo.net
#
# $FreeBSD$
#
PORTNAME= unidesc
PORTVERSION= 2.12
CATEGORIES= sysutils textproc
MASTER_SITES= http://www.cis.upenn.edu/~wjposer/.downloads/
DISTNAME= ${PORTNAME}
EXTRACT_SUFX= .tgz
DIST_SUBDIR= ${PORTNAME}-${PORTVERSION}
MAINTAINER= thierry@FreeBSD.org
COMMENT= Unicode Description Utilities
USE_GETTEXT= yes
USE_REINPLACE= yes
TESTFILES= TestData/Test1.ann TestData/Test1.u TestData/Test2.ann \
TestData/Test2.u TestData/Test3.ann TestData/Test3.u \
TestData/Test4.ann TestData/Test4.u
DEMOS= demo.jpg demo.u demo32.u
PORTDOCS= README
MAN1= unidesc.1 uniname.1 ExplicateUTF8.1 unihist.1
WRKSRC= ${WRKDIR}/${PORTNAME:U}
pre-configure:
@${REINPLACE_CMD} -e 's|/usr/local|${PREFIX}|;s|%%LOCALBASE%%|${LOCALBASE}|' \
${WRKSRC}/Makefile
@${REINPLACE_CMD} -e 's|TestData|${EXAMPLESDIR}|' ${WRKSRC}/README
post-install:
.if !defined(NOPORTDOCS)
${MKDIR} ${DOCSDIR} ${EXAMPLESDIR}
${INSTALL_DATA} ${PORTDOCS:S|^|${WRKSRC}/|} ${DOCSDIR}
${INSTALL_DATA} ${TESTFILES:S|^|${WRKSRC}/|} ${EXAMPLESDIR}
${INSTALL_DATA} ${DEMOS:S|^|${WRKSRC}/|} ${EXAMPLESDIR}
.endif
.include <bsd.port.mk>

View File

@ -0,0 +1,2 @@
MD5 (unidesc-2.12/unidesc.tgz) = 7ec1c6584842403992837e238ee1462d
SIZE (unidesc-2.12/unidesc.tgz) = 204808

View File

@ -0,0 +1,23 @@
--- Makefile.orig Sun Jan 9 20:55:19 2005
+++ Makefile Sun May 15 16:11:22 2005
@@ -5,8 +5,9 @@
BINOBJS= unidesc.o uniname.o unames.o ExplicateUTF8.o unihist.o
MANS= unidesc.1 uniname.1 ExplicateUTF8.1 unihist.1
-CFLAGS= -pedantic -O
-CC=gcc
+CFLAGS+= -pedantic -I%%LOCALBASE%%/include
+LDFLAGS+= -L%%LOCALBASE%%/lib -lintl
+# CC=gcc
all: ${BINS}
@@ -42,7 +43,7 @@
${CC} -c ${CFLAGS} unihist.c
unihist: unihist.o Get_UTF32_From_UTF8.o putu8.o
- ${CC} -o unihist unihist.o Get_UTF32_From_UTF8.o putu8.o
+ ${CC} -o unihist unihist.o Get_UTF32_From_UTF8.o putu8.o ${LDFLAGS}
ExplicateUTF8.o: ExplicateUTF8.c

View File

@ -0,0 +1,23 @@
Unidesc consists of four programs for finding out what is in a Unicode file.
They are useful when working with Unicode files when one doesn't know the
writing system, doesn't have the necessary font, needs to inspect invisible
characters, needs to find out whether characters have been combined or in what
order they occur, or needs statistics on which characters occur.
uniname defaults to printing the character offset of each character, its byte
offset, its hex code value, its encoding, the glyph itself, and its name.
unidesc reports the character ranges to which different portions of the text
belong. It can also be used to identify Unicode encodings (e.g. UTF-16be)
flagged by magic numbers.
unihist generates a histogram of the characters in its input, which must be
encoded in UTF-8 Unicode. By default, for each character it prints the
frequency of the character as a percentage of the total, the absolute number of
tokens in the input, the UTF-32 code in hexadecimal, and, if the character is
displayable, the glyph itself as UTF-8 Unicode.
ExplicateUTF8 is intended for debugging or for learning about Unicode. It
determines and explains the validity of a sequence of bytes as a UTF8 encoding.
WWW: http://www.cis.upenn.edu/~wjposer/unidesc.html

View File

@ -0,0 +1,16 @@
bin/ExplicateUTF8
bin/unidesc
bin/unihist
bin/uniname
%%PORTDOCS%%%%EXAMPLESDIR%%/Test1.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test1.u
%%PORTDOCS%%%%EXAMPLESDIR%%/Test2.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test2.u
%%PORTDOCS%%%%EXAMPLESDIR%%/Test3.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test3.u
%%PORTDOCS%%%%EXAMPLESDIR%%/Test4.ann
%%PORTDOCS%%%%EXAMPLESDIR%%/Test4.u
%%PORTDOCS%%%%EXAMPLESDIR%%/demo.jpg
%%PORTDOCS%%%%EXAMPLESDIR%%/demo.u
%%PORTDOCS%%%%EXAMPLESDIR%%/demo32.u
%%PORTDOCS%%@dirrm %%EXAMPLESDIR%%