From 4733ac6446d46b4ef43935a3565cbc7d45241fef Mon Sep 17 00:00:00 2001 From: Aaron Dalton Date: Mon, 20 Feb 2006 17:47:38 +0000 Subject: [PATCH] Adding port textproc/p5-AI-Categorizer, a tool to aid in the automatic categorization of text documents. Approved by: tobez (implicit) --- textproc/Makefile | 1 + textproc/p5-AI-Categorizer/Makefile | 59 +++++++++++++++++++ textproc/p5-AI-Categorizer/distinfo | 3 + .../p5-AI-Categorizer/files/patch-Build.PL | 31 ++++++++++ textproc/p5-AI-Categorizer/pkg-descr | 21 +++++++ textproc/p5-AI-Categorizer/pkg-plist | 35 +++++++++++ 6 files changed, 150 insertions(+) create mode 100644 textproc/p5-AI-Categorizer/Makefile create mode 100644 textproc/p5-AI-Categorizer/distinfo create mode 100644 textproc/p5-AI-Categorizer/files/patch-Build.PL create mode 100644 textproc/p5-AI-Categorizer/pkg-descr create mode 100644 textproc/p5-AI-Categorizer/pkg-plist diff --git a/textproc/Makefile b/textproc/Makefile index db75c587bc1c..faecc9063448 100644 --- a/textproc/Makefile +++ b/textproc/Makefile @@ -288,6 +288,7 @@ SUBDIR += openvanilla-framework SUBDIR += openvanilla-modules SUBDIR += or-aspell + SUBDIR += p5-AI-Categorizer SUBDIR += p5-Apache-ParseLog SUBDIR += p5-Apache-XBEL SUBDIR += p5-Bloom-Filter diff --git a/textproc/p5-AI-Categorizer/Makefile b/textproc/p5-AI-Categorizer/Makefile new file mode 100644 index 000000000000..5b3693288778 --- /dev/null +++ b/textproc/p5-AI-Categorizer/Makefile @@ -0,0 +1,59 @@ +# New ports collection makefile for: AI-Categorizer +# Date created: 19 Feb 2006 +# Whom: Aaron Dalton +# +# $FreeBSD$ +# + +PORTNAME= AI-Categorizer +PORTVERSION= 0.07 +CATEGORIES= textproc perl5 +MASTER_SITES= ${MASTER_SITE_PERL_CPAN} +MASTER_SITE_SUBDIR= AI +PKGNAMEPREFIX= p5- + +MAINTAINER= aaron@FreeBSD.org +COMMENT= Automatic Text Categorization + +BUILD_DEPENDS= ${SITE_PERL}/Class/Container.pm:${PORTSDIR}/devel/p5-Class-Container \ + ${SITE_PERL}/${PERL_ARCH}/Storable.pm:${PORTSDIR}/devel/p5-Storable \ + ${SITE_PERL}/${PERL_ARCH}/Params/Validate.pm:${PORTSDIR}/devel/p5-Params-Validate \ + ${SITE_PERL}/Statistics/Contingency.pm:${PORTSDIR}/math/p5-Statistics-Contingency \ + ${SITE_PERL}/Lingua/Stem.pm:${PORTSDIR}/textproc/p5-Lingua-Stem \ + ${SITE_PERL}/${PERL_ARCH}/Scalar/Util.pm:${PORTSDIR}/lang/p5-Scalar-List-Utils \ + ${SITE_PERL}/Time/Progress.pm:${PORTSDIR}/devel/p5-Time-Progress \ + ${SITE_PERL}/${PERL_ARCH}/Algorithm/SVM.pm:${PORTSDIR}/devel/p5-Algorithm-SVM \ + ${SITE_PERL}/${PERL_ARCH}/AI/DecisionTree.pm:${PORTSDIR}/math/p5-AI-DecisionTree \ + ${SITE_PERL}/Algorithm/NaiveBayes.pm:${PORTSDIR}/devel/p5-Algorithm-NaiveBayes +RUN_DEPENDS= ${BUILD_DEPENDS} + +MAN3= AI::Categorizer.3 \ + AI::Categorizer::Category.3 \ + AI::Categorizer::Collection.3 \ + AI::Categorizer::Collection::Files.3 \ + AI::Categorizer::Document.3 \ + AI::Categorizer::Experiment.3 \ + AI::Categorizer::FeatureSelector.3 \ + AI::Categorizer::FeatureSelector::DocFrequency.3 \ + AI::Categorizer::FeatureVector.3 \ + AI::Categorizer::Hypothesis.3 \ + AI::Categorizer::KnowledgeSet.3 \ + AI::Categorizer::Learner.3 \ + AI::Categorizer::Learner::Boolean.3 \ + AI::Categorizer::Learner::DecisionTree.3 \ + AI::Categorizer::Learner::Guesser.3 \ + AI::Categorizer::Learner::KNN.3 \ + AI::Categorizer::Learner::NaiveBayes.3 \ + AI::Categorizer::Learner::SVM.3 \ + AI::Categorizer::Learner::Weka.3 \ + AI::Categorizer::Storable.3 + +PERL_MODBUILD= yes + +.include + +.if ${PERL_LEVEL} < 500600 +IGNORE= requires at least Perl5.6 due to dependencies +.endif + +.include diff --git a/textproc/p5-AI-Categorizer/distinfo b/textproc/p5-AI-Categorizer/distinfo new file mode 100644 index 000000000000..b8509a7bf0b4 --- /dev/null +++ b/textproc/p5-AI-Categorizer/distinfo @@ -0,0 +1,3 @@ +MD5 (AI-Categorizer-0.07.tar.gz) = ae1c1320c802337c7b1c2500476ffa9f +SHA256 (AI-Categorizer-0.07.tar.gz) = 27547f91b018c398df2f721dc50435b94a819c50a2589aa99cae1c9bb40ff047 +SIZE (AI-Categorizer-0.07.tar.gz) = 255814 diff --git a/textproc/p5-AI-Categorizer/files/patch-Build.PL b/textproc/p5-AI-Categorizer/files/patch-Build.PL new file mode 100644 index 000000000000..ebf9509a93ad --- /dev/null +++ b/textproc/p5-AI-Categorizer/files/patch-Build.PL @@ -0,0 +1,31 @@ +--- Build.PL Mon Feb 20 10:02:49 2006 ++++ Build.PL Mon Feb 20 10:03:57 2006 +@@ -27,28 +27,4 @@ + }, + ); + +-my $categorizer = File::Spec->catfile('eg', 'categorizer'); +-if ($build->y_n("Do you want to install the $categorizer script to $Config{installscript}?", 'n')) { +- $build->scripts($categorizer); +-} +- +- +-{ +- my $path = $build->prompt +- ( +- "\nIf you have the Weka system installed, please specify the path\n". +- "to the 'weka.jar' file, or '-' to search CLASSPATH, or '!' to skip:", +- '!' +- ); +- +- if ($path eq '!') { +- unlink "classpath" if -e "classpath"; +- } else { +- local *FH; +- open FH, "> classpath" or die "Can't create classpath: $!"; +- print FH $path; +- close FH; +- } +-} +- + $build->create_build_script; diff --git a/textproc/p5-AI-Categorizer/pkg-descr b/textproc/p5-AI-Categorizer/pkg-descr new file mode 100644 index 000000000000..2635b7316ad8 --- /dev/null +++ b/textproc/p5-AI-Categorizer/pkg-descr @@ -0,0 +1,21 @@ +AI::Categorizer is a framework for automatic text categorization. It +consists of a collection of Perl modules that implement common +categorization tasks, and a set of defined relationships among those +modules. The various details are flexible - for example, you can choose +what categorization algorithm to use, what features (words or otherwise) +of the documents should be used (or how to automatically choose these +features), what format the documents are in, and so on. + +The basic process of using this module will typically involve obtaining a +collection of pre-categorized documents, creating a "knowledge set" +representation of those documents, training a categorizer on that +knowledge set, and saving the trained categorizer for later use. There are +several ways to carry out this process. The top-level AI::Categorizer +module provides an umbrella class for high-level operations, or you may +use the interfaces of the individual classes in the framework. + +A simple sample script that reads a training corpus, trains a categorizer, +and tests the categorizer on a test corpus, is distributed as eg/demo.pl . + +WWW: http://search.cpan.org/dist/AI-Categorizer +Author: Ken Williams diff --git a/textproc/p5-AI-Categorizer/pkg-plist b/textproc/p5-AI-Categorizer/pkg-plist new file mode 100644 index 000000000000..9253549fce03 --- /dev/null +++ b/textproc/p5-AI-Categorizer/pkg-plist @@ -0,0 +1,35 @@ +%%SITE_PERL%%/AI/Categorizer.pm +%%SITE_PERL%%/AI/Categorizer/Category.pm +%%SITE_PERL%%/AI/Categorizer/Collection.pm +%%SITE_PERL%%/AI/Categorizer/Collection/DBI.pm +%%SITE_PERL%%/AI/Categorizer/Collection/Files.pm +%%SITE_PERL%%/AI/Categorizer/Collection/InMemory.pm +%%SITE_PERL%%/AI/Categorizer/Collection/SingleFile.pm +%%SITE_PERL%%/AI/Categorizer/Document.pm +%%SITE_PERL%%/AI/Categorizer/Document/SMART.pm +%%SITE_PERL%%/AI/Categorizer/Document/Text.pm +%%SITE_PERL%%/AI/Categorizer/Document/XML.pm +%%SITE_PERL%%/AI/Categorizer/Experiment.pm +%%SITE_PERL%%/AI/Categorizer/FeatureSelector.pm +%%SITE_PERL%%/AI/Categorizer/FeatureSelector/DocFrequency.pm +%%SITE_PERL%%/AI/Categorizer/FeatureVector.pm +%%SITE_PERL%%/AI/Categorizer/Hypothesis.pm +%%SITE_PERL%%/AI/Categorizer/KnowledgeSet.pm +%%SITE_PERL%%/AI/Categorizer/Learner.pm +%%SITE_PERL%%/AI/Categorizer/Learner/Boolean.pm +%%SITE_PERL%%/AI/Categorizer/Learner/DecisionTree.pm +%%SITE_PERL%%/AI/Categorizer/Learner/Guesser.pm +%%SITE_PERL%%/AI/Categorizer/Learner/KNN.pm +%%SITE_PERL%%/AI/Categorizer/Learner/NaiveBayes.pm +%%SITE_PERL%%/AI/Categorizer/Learner/Rocchio.pm +%%SITE_PERL%%/AI/Categorizer/Learner/SVM.pm +%%SITE_PERL%%/AI/Categorizer/Learner/Weka.pm +%%SITE_PERL%%/AI/Categorizer/ObjectSet.pm +%%SITE_PERL%%/AI/Categorizer/Storable.pm +%%SITE_PERL%%/AI/Categorizer/Util.pm +@dirrm %%SITE_PERL%%/AI/Categorizer/Learner +@dirrm %%SITE_PERL%%/AI/Categorizer/FeatureSelector +@dirrm %%SITE_PERL%%/AI/Categorizer/Document +@dirrm %%SITE_PERL%%/AI/Categorizer/Collection +@dirrm %%SITE_PERL%%/AI/Categorizer +@dirrmtry %%SITE_PERL%%/AI