Sha256: d7c6e87d4b8a70f17b4ce9a025c2d7c007d82eb4dbc96c305bb1bd41bac6dbfc

Contents?: true

Size: 1.14 KB

Versions: 6

Compression:

Stored size: 1.14 KB

Contents

#ifndef __text_pipeline_h__
#define __text_pipeline_h__
#include "data_set/sparse/sparse_example.h"
#include "example_generator/example_generator.h"
#include "example_generator/token_counter.h"
#include "inplace_processor/inplace_processor.h"
#include "inplace_processor/downcase.h"
#include "inplace_processor/porter_stemmer.h"
#include "token_selector/token_selector.h"
#include "token_selector/stop_words.h"
#include "token_selector/pos_tag_selector.h"
#include "tokeniser/tokeniser.h"
#include "tokeniser/simple_tokeniser.h"
#include <vector>

namespace Preprocessing {
  namespace Text {
    class Tokeniser;
    
    class TextPipeline {
    public:
      Tokeniser *tokeniser;
      vector<InplaceProcessor *> processors;
      vector<TokenSelector *> selectors;
      ExampleGenerator *generator;
      vector<char *> tokens;
      
      TextPipeline() : tokeniser(NULL), processors(), selectors(), generator(NULL), tokens() {}      
      DataSet::SparseExample *process_text(DataSet::SparseDataSet *data_set, char *text, bool create_features);
      void process_token(char *start, char *end);
    };
    
    TextPipeline *StandardPipeline();
  }
}

#endif

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
thera-0.0.7 lib/quarry/src/preprocessing/text/text_pipeline.h
thera-0.0.6 lib/quarry/src/preprocessing/text/text_pipeline.h
thera-0.0.5 lib/quarry/src/preprocessing/text/text_pipeline.h
thera-0.0.4 lib/quarry/src/preprocessing/text/text_pipeline.h
thera-0.0.3 lib/quarry/src/preprocessing/text/text_pipeline.h
thera-0.0.2 lib/quarry/src/preprocessing/text/text_pipeline.h