mirror of
https://git.FreeBSD.org/ports.git
synced 2025-01-03 06:04:53 +00:00
Add p5-HTML-ExtractContent 0.05, perl extension for HTML content
extractor with scoring heuristics.
This commit is contained in:
parent
93c8f25a76
commit
1283458c3a
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=229534
@ -831,6 +831,7 @@
|
||||
SUBDIR += p5-HTML-Element-Library
|
||||
SUBDIR += p5-HTML-Embperl
|
||||
SUBDIR += p5-HTML-Encoding
|
||||
SUBDIR += p5-HTML-ExtractContent
|
||||
SUBDIR += p5-HTML-FillInForm
|
||||
SUBDIR += p5-HTML-FillInForm-ForceUTF8
|
||||
SUBDIR += p5-HTML-FormFu
|
||||
|
27
www/p5-HTML-ExtractContent/Makefile
Normal file
27
www/p5-HTML-ExtractContent/Makefile
Normal file
@ -0,0 +1,27 @@
|
||||
# New ports collection makefile for: HTML::ExtractContent
|
||||
# Date created: 05 Mar 2009
|
||||
# Whom: Jun Kuriyama <kuriyama@FreeBSD.org>
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PORTNAME= HTML-ExtractContent
|
||||
PORTVERSION= 0.05
|
||||
CATEGORIES= www perl5
|
||||
MASTER_SITES= CPAN
|
||||
PKGNAMEPREFIX= p5-
|
||||
|
||||
MAINTAINER= kuriyama@FreeBSD.org
|
||||
COMMENT= Perl extension for HTML content extractor with scoring heuristics
|
||||
|
||||
RUN_DEPENDS= \
|
||||
p5-Class-Accessor-Lvalue>0:${PORTSDIR}/devel/p5-Class-Accessor-Lvalue \
|
||||
p5-Exporter-Lite>0:${PORTSDIR}/devel/p5-Exporter-Lite \
|
||||
p5-HTML-Parser>0:${PORTSDIR}/www/p5-HTML-Parser
|
||||
BUILD_DEPENDS= ${RUN_DEPENDS}
|
||||
|
||||
PERL_CONFIGURE= yes
|
||||
|
||||
MAN3= HTML::ExtractContent.3
|
||||
|
||||
.include <bsd.port.mk>
|
3
www/p5-HTML-ExtractContent/distinfo
Normal file
3
www/p5-HTML-ExtractContent/distinfo
Normal file
@ -0,0 +1,3 @@
|
||||
MD5 (HTML-ExtractContent-0.05.tar.gz) = 95c0f8be7624a4e71de6b7b3a0fe362b
|
||||
SHA256 (HTML-ExtractContent-0.05.tar.gz) = 973950b6445b9644d71caa79787cb4753ed75ec296d31ee5d6df9494491ac85f
|
||||
SIZE (HTML-ExtractContent-0.05.tar.gz) = 25899
|
11
www/p5-HTML-ExtractContent/pkg-descr
Normal file
11
www/p5-HTML-ExtractContent/pkg-descr
Normal file
@ -0,0 +1,11 @@
|
||||
HTML::ExtractContent is a module for extracting content from HTML with
|
||||
scoring heuristics.
|
||||
|
||||
It guesses which block of HTML looks like content according to scores
|
||||
depending on the amount of punctuation marks and the lengths of non-tag
|
||||
texts.
|
||||
|
||||
It also guesses whether content end in the block or continue to the next
|
||||
block.
|
||||
|
||||
WWW: http://search.cpan.org/dist/HTML-ExtractContent/
|
5
www/p5-HTML-ExtractContent/pkg-plist
Normal file
5
www/p5-HTML-ExtractContent/pkg-plist
Normal file
@ -0,0 +1,5 @@
|
||||
%%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent/.packlist
|
||||
%%SITE_PERL%%/HTML/ExtractContent.pm
|
||||
%%SITE_PERL%%/HTML/ExtractContent/Util.pm
|
||||
@dirrm %%SITE_PERL%%/HTML/ExtractContent
|
||||
@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent
|
Loading…
Reference in New Issue
Block a user