Sha256: a88c188d5e0cc7e68de6bbd13f5a8b46abfa8563fb1e4c4b5f0c0561173872de
Contents?: true
Size: 1 KB
Versions: 7
Compression:
Stored size: 1 KB
Contents
#include "text_pipeline.h" #include <iostream> DataSet::SparseExample *Preprocessing::Text::TextPipeline::process_text(DataSet::SparseDataSet *data_set, char *text, bool create_features) { tokens.clear(); tokeniser->tokenise(text); return generator->generate(data_set, &tokens, create_features); } void Preprocessing::Text::TextPipeline::process_token(char *start, char *end) { for(int i = 0; i < processors.size(); i++) processors[i]->process(start, end); for(int i = 0; i < selectors.size(); i++) if(!selectors[i]->select(start, end)) return; tokens.push_back(start); } Preprocessing::Text::TextPipeline *Preprocessing::Text::StandardPipeline() { TextPipeline *pipeline = new TextPipeline(); pipeline->tokeniser = new SimpleTokeniser(pipeline); pipeline->processors.push_back(new Downcase()); pipeline->processors.push_back(new PorterStemmer()); pipeline->selectors.push_back(new StopWords()); pipeline->generator = new TokenCounter(TokenCounter::Local); return pipeline; }
Version data entries
7 entries across 7 versions & 1 rubygems