Sha256: e2d21e038ea0a3aebed8ba0bfadb8c33ca8de2fec881901eb9ec46d83682ee28

Contents?: true

Size: 836 Bytes

Versions: 3

Compression:

Stored size: 836 Bytes

Contents

module Company
  module Mapping
    class CompanyCorpus < Corpus
      def initialize(path=nil)
        super()
        import_csv path if path
      end

      def add id, company_name, aliases=[]
        push doc(company_name, id)
        aliases.each_with_index do |company_alias, i|
          push doc(company_alias, "#{id}_#{i}")
        end
      end

      # build a corpus from a csv file
      def import_csv path
        CSV.foreach(path) do |row|
          array = row.first.split(";")
          add array.first, array[1], array[2..-1]
        end
        @corpus
      end

      private

      def doc content, id
        alias_doc = TextDocument.new
        alias_doc.contents = content.gsub(",", "").gsub(".", "")
        alias_doc.id = id
        alias_doc
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
company-mapping-0.2.3 lib/company/mapping/document_utils/company_corpus.rb
company-mapping-0.2.2 lib/company/mapping/document_utils/company_corpus.rb
company-mapping-0.2.1 lib/company/mapping/document_utils/company_corpus.rb