Sha256: 24c98a94ece1a529bd22dbe18ff0646a7d303185f609624a8ff9a5709b8ca34a
Contents?: true
Size: 791 Bytes
Versions: 1
Compression:
Stored size: 791 Bytes
Contents
require_relative 'simple' module Selector # # Selector which uses a n-gram dictionary to generate feature vectors # # @author Andreas Eger # class NGram < Selector::Simple attr_reader :gram_size def initialize args={} super @gram_size = args.fetch(:gram_size) { 2 } end def label "ngram" end # # fetches all words snippets from one data entry, removes stopwords and very short words # @param data [PreprocessedData] # @param gram_size [Integer] gram size # # @return [Array<String>] def extract_words_from_data data, gram_size=@gram_size (data.data.flat_map(&:split) - stopwords) .delete_if { |e| e.size <= 3 } .each_cons(gram_size).map{|e| e.join " " } end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
svm_helper-0.1.0 | lib/svm_helper/selectors/n_gram.rb |