Sha256: 5a03a8b79483c624d44d00752788ab83b5e76591b64e3b0d8127d8601faac58e
Contents?: true
Size: 1.64 KB
Versions: 3
Compression:
Stored size: 1.64 KB
Contents
# frozen_string_literal: true require 'date' module Html2rss class AutoSource module Scraper class Schema ## # Base class for Schema.org schema_objects. # # @see https://schema.org/Article class Base DEFAULT_ATTRIBUTES = %i[id title description url image published_at].freeze def initialize(schema_object, url:) @schema_object = schema_object @url = url end # @return [Hash] the scraped article hash with DEFAULT_ATTRIBUTES def call DEFAULT_ATTRIBUTES.to_h do |attribute| [attribute, public_send(attribute)] end end def id = schema_object[:@id] || url&.path || title.to_s.downcase.gsub(/\s+/, '-') def title = schema_object[:title] def description [schema_object[:description], schema_object[:schema_object_body], schema_object[:abstract]] .max_by { |desc| desc.to_s.size } end # @return [Addressable::URI, nil] the URL of the schema object def url url = schema_object[:url] if url.to_s.empty? Log.debug("Schema#Base.url: no url in schema_object: #{schema_object.inspect}") return end Utils.build_absolute_url_from_relative(url, @url) end def image = images.first || nil def published_at = schema_object[:datePublished] private attr_reader :schema_object def images Array(schema_object[:image]).compact end end end end end end
Version data entries
3 entries across 3 versions & 1 rubygems