Sha256: 3b74451b606330d857be11b7422cb732c28c90415fdef58cea3ea3f5a87c8c1d

Contents?: true

Size: 1.11 KB

Versions: 1

Compression:

Stored size: 1.11 KB

Contents

module Company
  module Mapping

#CompanyMapper given a corpus of documents (that contains company names) can map a new document with an existing one
#if one exists
    class CompanyMapper

      def initialize(corpus)
        @corpus = corpus
        @tfidf = TFIDF.new(@corpus)
        @tfidf.calculate
      end

      #maps a given company to a company exists to the given corpus. If the maximum name similarity found exceeds the given
      # threshold then the company's id is returned as a match
      def map(company, threshold)
        @tfidf.calculate_tfidf_weights_of_new_document(company)

        _maxSim = 0.0
        _mapped_company = ""
        @corpus.each do |d|
          _similarity = @tfidf.similarity(d.id, company.id)

          if (_maxSim < _similarity)
            _maxSim = _similarity
            _mapped_company = d.id
            if (_maxSim == 1)
              break
            end
          end
        end

        if (_maxSim>threshold)
          return _mapped_company.to_s.sub(/\_.*/, "")
        else
          return nil
        end
      end
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
company-mapping-0.1.0 lib/company/mapping/company_mapper.rb