Sha256: 5a03a8b79483c624d44d00752788ab83b5e76591b64e3b0d8127d8601faac58e

Contents?: true

Size: 1.64 KB

Versions: 3

Compression:

Stored size: 1.64 KB

Contents

# frozen_string_literal: true

require 'date'

module Html2rss
  class AutoSource
    module Scraper
      class Schema
        ##
        # Base class for Schema.org schema_objects.
        #
        # @see https://schema.org/Article
        class Base
          DEFAULT_ATTRIBUTES = %i[id title description url image published_at].freeze

          def initialize(schema_object, url:)
            @schema_object = schema_object
            @url = url
          end

          # @return [Hash] the scraped article hash with DEFAULT_ATTRIBUTES
          def call
            DEFAULT_ATTRIBUTES.to_h do |attribute|
              [attribute, public_send(attribute)]
            end
          end

          def id = schema_object[:@id] || url&.path || title.to_s.downcase.gsub(/\s+/, '-')
          def title = schema_object[:title]

          def description
            [schema_object[:description], schema_object[:schema_object_body], schema_object[:abstract]]
              .max_by { |desc| desc.to_s.size }
          end

          # @return [Addressable::URI, nil] the URL of the schema object
          def url
            url = schema_object[:url]
            if url.to_s.empty?
              Log.debug("Schema#Base.url: no url in schema_object: #{schema_object.inspect}")
              return
            end

            Utils.build_absolute_url_from_relative(url, @url)
          end

          def image = images.first || nil
          def published_at = schema_object[:datePublished]

          private

          attr_reader :schema_object

          def images
            Array(schema_object[:image]).compact
          end
        end
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
html2rss-0.15.0 lib/html2rss/auto_source/scraper/schema/base.rb
html2rss-0.14.0 lib/html2rss/auto_source/scraper/schema/base.rb
html2rss-0.13.0 lib/html2rss/auto_source/scraper/schema/base.rb