# frozen_string_literal: true
module Html2rss
class AutoSource
##
# The Scraper module contains all scrapers that can be used to extract articles.
# Each scraper should implement a `call` method that returns an array of article hashes.
# Each scraper should also implement an `articles?` method that returns true if the scraper
# can potentially be used to extract articles from the given HTML.
#
module Scraper
SCRAPERS = [
Html,
Schema,
SemanticHtml
].freeze
##
# Error raised when no suitable scraper is found.
class NoScraperFound < Html2rss::Error; end
##
# Returns an array of scrapers that claim to find articles in the parsed body.
# @param parsed_body [Nokogiri::HTML::Document] The parsed HTML body.
# @return [Array] An array of scraper classes that can handle the parsed body.
def self.from(parsed_body)
scrapers = SCRAPERS.select { |scraper| scraper.articles?(parsed_body) }
raise NoScraperFound, 'No suitable scraper found for URL.' if scrapers.empty?
scrapers
end
end
end
end