Sha256: 6cd60bee35cc9fb17e412cbc4b64b865917f2282ad6d1548e6f45d072b80a6ad
Contents?: true
Size: 807 Bytes
Versions: 1
Compression:
Stored size: 807 Bytes
Contents
require_relative 'simple' module Selector # # Selector which uses a n-gram dictionary to generate feature vectors # # @author Andreas Eger # class NGram < Selector::Simple attr_reader :gram_size def initialize classification, args={} super @gram_size = args.fetch(:gram_size) { 2 } end def label "ngram" end # # fetches all words snippets from one data entry, removes stopwords and very short words # @param data [PreprocessedData] # @param gram_size [Integer] gram size # # @return [Array<String>] def extract_words_from_data data, gram_size=@gram_size (data.data.flat_map(&:split) - stopwords) .delete_if { |e| e.size <= 3 } .each_cons(gram_size).map{|e| e.join " " } end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
svm_helper-0.1.1 | lib/svm_helper/selectors/n_gram.rb |