lib/wayfarer/page.rb in wayfarer-0.4.6 vs lib/wayfarer/page.rb in wayfarer-0.4.7
- old
+ new
@@ -1,37 +1,59 @@
# frozen_string_literal: true
module Wayfarer
+ # @!attribute [r] url
+ # @return [String] the URL that was fetched
+ # @!attribute [r] status_code
+ # @return [Fixnum] HTTP status code
+ # @!attribute [r] body
+ # @return [String] the body of the response
+ # @!attribute [r] headers
+ # @return [Hash] the headers of the response
+ # @note HTTP header keys are downcased, for example: `content-type`.
class Page
attr_reader :url,
:status_code,
:body,
:headers
+ # @!visibility private
def initialize(url:, status_code:, body:, headers:)
@url = url
@status_code = status_code
@body = body
@headers = headers.transform_keys(&:downcase)
end
- def doc
- return @doc if @doc
+ # Returns the MIME type of the response.
+ # @return [MIME::Type]
+ # @see https://www.rubydoc.info/gems/mime-types/MIME/Type
+ def mime_type
+ @mime_type ||= MIME::Types[content_type]&.first
+ end
- # If no Content-Type field is present, assume HTML/XML
- return @doc = Wayfarer::Parsing::XML.parse_html(body) unless headers["content-type"]
-
- content_type = headers["content-type"]
- sub_type = MIME::Types[content_type].first.sub_type
-
- @doc = case sub_type
- when "json" then Wayfarer::Parsing::JSON.parse(body)
- when "xml" then Wayfarer::Parsing::XML.parse_xml(body)
- else Wayfarer::Parsing::XML.parse_html(body)
- end
+ # Returns a parsed representation of the HTTP response or the browser DOM,
+ # depending on the Content-Type.
+ # @return [Nokogiri::HTML::Document] when Content-Type is `text/html`
+ # @see https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/HTML/Document Nokogiri::HTML::Document
+ # @return [Nokogiri::XML::Document] when Content-Type is `text/xml`
+ # @see https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/XML/Document Nokogiri::XML::Document
+ # @return [Hash] when Content-Type is `application/json`
+ # @note You can register custom parsers with {Wayfarer::Parsing.registry}.
+ def doc
+ @doc ||= Wayfarer::Parsing.parse(body, mime_type&.content_type || content_type)
end
+ # Returns a `MetaInspector::Document`.
+ # @return [MetaInspector::Document]
+ # @see https://www.rubydoc.info/gems/metainspector/MetaInspector/Document
def meta
- @meta ||= MetaInspector.new(url, document: body)
+ @meta ||= MetaInspector.new(url, document: body, headers: headers, normalize_url: false)
+ end
+
+ private
+
+ def content_type
+ @content_type ||= headers["content-type"]
end
end
end