Sha256: c92f5b935a711a915ddd404d8196f5535dc0a08bbb5a7d8092e0c7baf1bfecf2

Contents?: true

Size: 1.6 KB

Versions: 4

Compression:

Stored size: 1.6 KB

Contents

# frozen_string_literal: true

require "core"
require "ox"
require "refinements/string"

module Pennyworth
  module Loaders
    # Loads htmx documentation by scraping web page.
    class HTMX
      include Import[:http, :settings]

      using Refinements::String

      PARSER = Ox.tap do |ox|
        ox.default_options = {mode: :generic, effort: :tolerant, smart: true}
      end

      def self.text_for element
        parts = element.each.with_object [] do |item, content|
          content.append item.is_a?(Ox::Element) ? "`#{item.text}`" : item
        end

        parts.join.up.delete_suffix "."
      end

      def initialize(parser: PARSER, model: Models::HTMX, **)
        @parser = parser
        @model = model
        super(**)
      end

      def call uri
        read(uri).each.with_object [] do |row, entries|
          next unless row.locate("td") in Ox::Element => item, Ox::Element => description

          entries.append record_for(item, description, uri)
        end
      end

      private

      attr_reader :parser, :model

      def read uri
        http.get(uri).then do |response|
          [200, 301].include?(response.status) ? parse_rows(response.body.to_s) : Core::EMPTY_ARRAY
        end
      end

      def parse_rows(document) = parser.parse(document).locate "*/tr"

      def record_for item, description, uri
        model[
          label: (item.locate("*/code").first || item.locate("a").first).text,
          description: "#{self.class.text_for description}.",
          uri: (item.locate("*/@href").first || uri).sub(%r(\A(?=/)), settings.htmx_site_uri)
        ]
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
pennyworth-17.8.0 lib/pennyworth/loaders/htmx.rb
pennyworth-17.7.0 lib/pennyworth/loaders/htmx.rb
pennyworth-17.6.0 lib/pennyworth/loaders/htmx.rb
pennyworth-17.5.0 lib/pennyworth/loaders/htmx.rb