mirror of
https://git.FreeBSD.org/ports.git
synced 2025-01-25 09:34:11 +00:00
Add port textproc/py-html2text:
html2text is a Python script that convers a page of HTML into clean, easy-to-read plain ASCII text. Better yet, that ASCII also happens to be valid Markdown (a text-to-HTML format). WWW: http://www.aaronsw.com/2002/html2text/ Author: Aaron Swartz <me@aaronsw.com> Inspired by: pkgsrc package
This commit is contained in:
parent
d1fb8e2940
commit
6ffd6b1352
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=190724
@ -754,6 +754,7 @@
|
||||
SUBDIR += py-expat
|
||||
SUBDIR += py-feedparser
|
||||
SUBDIR += py-genshi
|
||||
SUBDIR += py-html2text
|
||||
SUBDIR += py-hyperestraier
|
||||
SUBDIR += py-hyperestraier-python
|
||||
SUBDIR += py-jaxml
|
||||
|
24
textproc/py-html2text/Makefile
Normal file
24
textproc/py-html2text/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# New ports collection makefile for: html2text
|
||||
# Date created: 23 April 2007
|
||||
# Whom: Andrew Pantyukhin <infofarmer@FreeBSD.org>
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
PORTNAME= html2text
|
||||
PORTVERSION= 2.2.8
|
||||
CATEGORIES= textproc python
|
||||
MASTER_SITES= CENKES
|
||||
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
|
||||
|
||||
MAINTAINER= infofarmer@FreeBSD.org
|
||||
COMMENT= Convert HTML into clean plain ASCII text
|
||||
|
||||
USE_PYTHON= yes
|
||||
USE_PYDISTUTILS=yes
|
||||
PLIST_FILES= bin/${PORTNAME}.py
|
||||
|
||||
post-patch:
|
||||
@${REINPLACE_CMD} -e '1s|.*|#!${PYTHON_CMD}|' ${WRKSRC}/${PORTNAME}.py
|
||||
|
||||
.include <bsd.port.mk>
|
3
textproc/py-html2text/distinfo
Normal file
3
textproc/py-html2text/distinfo
Normal file
@ -0,0 +1,3 @@
|
||||
MD5 (html2text-2.2.8.tar.gz) = 8f84bd9456972ee1cccd2489f8b2535a
|
||||
SHA256 (html2text-2.2.8.tar.gz) = 81a2304eeb7006f351343e8be59214eac8352ece6ff020fd11028b5a6e10d890
|
||||
SIZE (html2text-2.2.8.tar.gz) = 3998
|
28
textproc/py-html2text/files/patch-html2text.py
Normal file
28
textproc/py-html2text/files/patch-html2text.py
Normal file
@ -0,0 +1,28 @@
|
||||
--- html2text.py.orig 2007-01-18 19:06:49.000000000 -0500
|
||||
+++ html2text.py
|
||||
@@ -150,7 +150,7 @@ class _html2text(sgmllib.SGMLParser):
|
||||
self.lastWasNL = 0
|
||||
|
||||
def outtextf(self, s):
|
||||
- if type(s) is type(''): s = codecs.utf_8_decode(s)[0]
|
||||
+ if type(s) is type(''): s = codecs.utf_8_decode(s, "replace")[0]
|
||||
self.outtext += s
|
||||
|
||||
def close(self):
|
||||
@@ -259,6 +259,7 @@ class _html2text(sgmllib.SGMLParser):
|
||||
if attrs.has_key('src'):
|
||||
attrs['href'] = attrs['src']
|
||||
alt = attrs.get('alt', '')
|
||||
+ alt = re.sub('\n', ' ', alt)
|
||||
i = self.previousIndex(attrs)
|
||||
if i is not None:
|
||||
attrs = self.a[i]
|
||||
@@ -279,7 +280,7 @@ class _html2text(sgmllib.SGMLParser):
|
||||
if tag in ["ol", "ul"]:
|
||||
if start:
|
||||
self.list.append({'name':tag, 'num':0})
|
||||
- else:
|
||||
+ elif self.list:
|
||||
if self.list: self.list.pop()
|
||||
|
||||
self.p()
|
6
textproc/py-html2text/pkg-descr
Normal file
6
textproc/py-html2text/pkg-descr
Normal file
@ -0,0 +1,6 @@
|
||||
html2text is a Python script that convers a page of HTML into clean,
|
||||
easy-to-read plain ASCII text. Better yet, that ASCII also happens to
|
||||
be valid Markdown (a text-to-HTML format).
|
||||
|
||||
WWW: http://www.aaronsw.com/2002/html2text/
|
||||
Author: Aaron Swartz <me@aaronsw.com>
|
Loading…
Reference in New Issue
Block a user