Sha256: f868a0022ff221bbec17ce2a5ce32abc8b5d82e63f66bb5b9058304a39ad9277

Contents?: true

Size: 691 Bytes

Versions: 8

Compression:

Stored size: 691 Bytes

Contents

#include "preprocessing/text/text_pipeline.h"
#include "simple_tokeniser.h"

void Preprocessing::Text::SimpleTokeniser::tokenise(char *text) {
  char *start, *end;
  bool intoken = false;
  bool active  = true;
  
  // to simplify the code, the while condition is not while(*text),
  // because the if(intoken) block needs to run when *text == 0 at
  // the end of the string.
  while(active) {
    active = *text;
    if(isalnum(*text)) {
      if(!intoken) {
        intoken = true;
        start = text;
      }
    } else {
      if(intoken) {
        intoken = false;
        *text = 0;
        end = text;
        pipeline->process_token(start, end);
      }
    }
    text++;
  }  
}

Version data entries

8 entries across 8 versions & 1 rubygems

Version Path
thera-0.0.8 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.7 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.6 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.5 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.4 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.3 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.2 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp
thera-0.0.1 lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp