package SNF; use strict; use base 'Continuity::Application'; use YAML qw(); use Algorithm::NaiveBayes; use Algorithm::NaiveBayes::Model::Frequency; use LWP::UserAgent; use XML::RSS; use Text::ParseWords; use Data::Dumper; my @categories = qw{ Good Bad }; my $datafile = '/tmp/cat.dat'; sub main { my ($self) = @_; $self->init_once; $self->init_always; while(1) { my $items = $self->getNews; my $f = $self->dispTemplate('main.tpl', items => $items, categories => [@categories] ); my $category = $f->{category} || ''; if ($category) { my $item = $f->{num}; $self->train($category, $items->[$item]); $self->save; } } } sub init_once { my ($self) = @_; if(-e $datafile) { use Carp; carp('Loading from datafile.'); my $bayes; $bayes = do $datafile; $self->{bayes} = $bayes; my $VAR1; eval(Dumper($self->{bayes})); $self->{bayes} = $VAR1; if($self->{bayes}->isa('Algorithm::NaiveBayes')) { carp("is a AlgorithmNaiveBayes!"); } else { carp(" is NOT a A:NB"); } } else { $self->{bayes} = Algorithm::NaiveBayes->new(purge => 0); if($self->{bayes}->isa('Algorithm::NaiveBayes')) { carp("is a AlgorithmNaiveBayes!"); } else { carp(" is NOT a A:NB"); } } } sub init_always { my ($self) = @_; $self->nocache('init_always'); my $VAR1; eval(Dumper($self->{bayes})); $self->{bayes} = $VAR1; carp("dump: " . Dumper($self->{bayes})); } sub save { my ($self) = @_; carp("saving..."); $Data::Dumper::Purity = 1; $Data::Dumper::Terse = 1; my $str = '$bayes = ' . Dumper($self->{bayes}) . ';'; open(OUT,">$datafile") || carp("Error: $!"); print OUT $str; close OUT; } sub getNews { my ($self) = @_; my @items; my $ua = LWP::UserAgent->new; my @news = ( 'http://download.freshmeat.net/backend/fm.rdf', 'http://linuxtoday.com/backend/biglt.rss', 'http://slashdot.org/index.rss', 'http://www.bbc.co.uk/syndication/feeds/news/ukfs_news/front_page/rss091.xml', 'http://www.perl.com/pace/perlnews.rdf' ); foreach my $url (@news) { my $request = HTTP::Request->new(GET => $url); my $response = $ua->request($request); my $p = new XML::RSS; $p->parse($response->content); @items = (@items, @{$p->{items}}); } for(my $i = 0; $i <= $#items ; $i++) { $items[$i]{num} = $i; $items[$i]{predict} = $self->predict($items[$i]); } @items = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [ $_, ($_->{predict}{Good} - $_->{predict}{Bad}) ] } @items; my $i = 0; @items = map { $_->{num} = $i++ ; $_ } @items; return [@items]; } sub train { my ($self, $category, $item) = @_; my $text = $item->{title} . ' ' . $item->{item} . ' ' . $item->{description}; $text =~ s/[\n\r"']//g; carp("text: '$text'"); my @words = quotewords('\s+',0,$text); my %wordcount; $wordcount{$_}++ foreach @words; carp("Training new words: " . Dumper({%wordcount})); $self->{bayes}->add_instance( attributes => { %wordcount }, label => $category ); $self->{bayes}->train(); } sub predict { my ($self, $item) = @_; my $text = $item->{title} . ' ' . $item->{item} . ' ' . $item->{description}; $text =~ s/[\n\r"']//g; my @words = quotewords('\s+',0,$text); my %wordcount; $wordcount{$_}++ foreach @words; my $labels = $self->{bayes}->predict( attributes => { %wordcount } ); return $labels; } 1;