mirror of
https://git.FreeBSD.org/ports.git
synced 2024-10-19 19:59:43 +00:00
A fast implementation of the HTML 5 parsing spec for Python. Parsing
is done in C using a variant of the gumbo parser. The gumbo parse tree is then transformed into an lxml tree, also in C, yielding parse times that can be a thirtieth of the html5lib parse times. That is a speedup of 30x. This differs, for instance, from the gumbo python bindings, where the initial parsing is done in C but the transformation into the final tree is done in python. WWW: https://html5-parser.readthedocs.io/
This commit is contained in:
parent
870a9aef27
commit
134ab3ff52
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=446984
@ -1668,6 +1668,7 @@
|
||||
SUBDIR += py-horizon
|
||||
SUBDIR += py-hpack
|
||||
SUBDIR += py-html
|
||||
SUBDIR += py-html5-parser
|
||||
SUBDIR += py-html5lib
|
||||
SUBDIR += py-http-parser
|
||||
SUBDIR += py-httpie
|
||||
|
19
www/py-html5-parser/Makefile
Normal file
19
www/py-html5-parser/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
# $FreeBSD$
|
||||
|
||||
PORTNAME= html5-parser
|
||||
PORTVERSION= 0.4.3
|
||||
CATEGORIES= www python
|
||||
MASTER_SITES= CHEESESHOP
|
||||
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
|
||||
|
||||
MAINTAINER= madpilot@FreeBSD.org
|
||||
COMMENT= Fast implementation of the HTML 5 parsing spec for Python
|
||||
|
||||
LICENSE= APACHE20
|
||||
|
||||
BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}lxml>=3.8.0:devel/py-lxml
|
||||
|
||||
USES= pkgconfig python
|
||||
USE_PYTHON= autoplist distutils
|
||||
|
||||
.include <bsd.port.mk>
|
3
www/py-html5-parser/distinfo
Normal file
3
www/py-html5-parser/distinfo
Normal file
@ -0,0 +1,3 @@
|
||||
TIMESTAMP = 1501237401
|
||||
SHA256 (html5-parser-0.4.3.tar.gz) = dd5e3647c5919439c41600172ef96b5fdbf278028bd4000476f87412c4fb7b9c
|
||||
SIZE (html5-parser-0.4.3.tar.gz) = 261906
|
9
www/py-html5-parser/pkg-descr
Normal file
9
www/py-html5-parser/pkg-descr
Normal file
@ -0,0 +1,9 @@
|
||||
A fast implementation of the HTML 5 parsing spec for Python. Parsing
|
||||
is done in C using a variant of the gumbo parser. The gumbo parse
|
||||
tree is then transformed into an lxml tree, also in C, yielding
|
||||
parse times that can be a thirtieth of the html5lib parse times.
|
||||
That is a speedup of 30x. This differs, for instance, from the gumbo
|
||||
python bindings, where the initial parsing is done in C but the
|
||||
transformation into the final tree is done in python.
|
||||
|
||||
WWW: https://html5-parser.readthedocs.io/
|
Loading…
Reference in New Issue
Block a user