Sha256: 3391be8fe64e66c7eb03f63bc71adf257a47549b8615c2173b6e04cc8046c74b

Contents?: true

Size: 1.49 KB

Versions: 3

Compression:

Stored size: 1.49 KB

Contents

# frozen_string_literal: true

module Html2rss
  class AutoSource
    ##
    # Reducer is responsible for reducing the list of articles.
    # It keeps only the longest attributes of articles with the same URL.
    # It also filters out invalid articles.
    class Reducer
      class << self
        def call(articles, **_options)
          Log.debug "Reducer: inited with #{articles.size} articles"

          reduce_by_keeping_longest_values(articles, keep: [:scraper]) { |article| article.url&.path }
        end

        private

        # @param articles [Array<Article>]
        # @return [Array<Article>] reduced articles
        def reduce_by_keeping_longest_values(articles, keep:, &)
          grouped_by_block = articles.group_by(&)
          grouped_by_block.each_with_object([]) do |(_key, grouped_articles), result|
            memo_object = {}
            grouped_articles.each do |article_hash|
              keep_longest_values(memo_object, article_hash, keep:)
            end

            result << Article.new(**memo_object)
          end
        end

        def keep_longest_values(memo_object, article_hash, keep:)
          article_hash.each do |key, value|
            next if value.eql?(memo_object[key])

            if keep.include?(key)
              memo_object[key] ||= []
              memo_object[key] << value
            elsif value && value.to_s.size > memo_object[key].to_s.size
              memo_object[key] = value
            end
          end
        end
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
html2rss-0.15.0 lib/html2rss/auto_source/reducer.rb
html2rss-0.14.0 lib/html2rss/auto_source/reducer.rb
html2rss-0.13.0 lib/html2rss/auto_source/reducer.rb