Sha256: 6e8475100fba23c22f75ffe04eea10210d2e594eb20a932f9acd488c66542bbc

Contents?: true

Size: 1.7 KB

Versions: 1

Compression:

Stored size: 1.7 KB

Contents

require 'active_support/core_ext/hash/keys'
require 'active_support/inflector'
require 'levenshtein-ffi'

module Crawler
  module Movie
    PROVIDERS = []
    SCORES = {}

    def self.add_provider(provider_name, options = {})
      options.assert_valid_keys :score, :insert_at

      PROVIDERS.insert(options[:insert_at] || -1, provider_name)

      if (score = options[:score])
        SCORES[provider_name] = score
      end
    end

    def self.configure
      yield self
    end

    def self.transliterate(string)
      ActiveSupport::Inflector.transliterate(string.gsub(/[:\-.,!?]/, ' ').strip.gsub(/\s+/, ' ')).downcase
    end

    def self.search(query, year: nil)
      movies = PROVIDERS.flat_map do |provider_name|
        camelized = ActiveSupport::Inflector.camelize("crawler/movie/providers/#{provider_name.to_s}")
        klass = ActiveSupport::Inflector.constantize(camelized)
        movies = klass.search(transliterate(query))

        movies.map do |movie|
          provider_score = SCORES[provider_name] || 0.5
          query_transliterated = transliterate(query)
          title_transliterated = transliterate(movie[:title])
          levenshtein_distance = Levenshtein.distance(query_transliterated, title_transliterated)
          max_size = [query_transliterated.size, title_transliterated.size].max.to_f
          title_score = (max_size - levenshtein_distance) / max_size
          year_score = 1.0 unless year
          year_score ||= movie[:release_date] && year.to_s == movie[:release_date].year.to_s ? 1.0 : 0.9

          {
            data: movie,
            score: provider_score * title_score * year_score
          }
        end
      end

      movies.max_by { |movie| movie[:score] }
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
crawler-movie-core-0.1.0 lib/crawler/movie.rb