lib/wayfarer/page.rb in wayfarer-0.4.6 vs lib/wayfarer/page.rb in wayfarer-0.4.7

- old
+ new

@@ -1,37 +1,59 @@ # frozen_string_literal: true module Wayfarer + # @!attribute [r] url + # @return [String] the URL that was fetched + # @!attribute [r] status_code + # @return [Fixnum] HTTP status code + # @!attribute [r] body + # @return [String] the body of the response + # @!attribute [r] headers + # @return [Hash] the headers of the response + # @note HTTP header keys are downcased, for example: `content-type`. class Page attr_reader :url, :status_code, :body, :headers + # @!visibility private def initialize(url:, status_code:, body:, headers:) @url = url @status_code = status_code @body = body @headers = headers.transform_keys(&:downcase) end - def doc - return @doc if @doc + # Returns the MIME type of the response. + # @return [MIME::Type] + # @see https://www.rubydoc.info/gems/mime-types/MIME/Type + def mime_type + @mime_type ||= MIME::Types[content_type]&.first + end - # If no Content-Type field is present, assume HTML/XML - return @doc = Wayfarer::Parsing::XML.parse_html(body) unless headers["content-type"] - - content_type = headers["content-type"] - sub_type = MIME::Types[content_type].first.sub_type - - @doc = case sub_type - when "json" then Wayfarer::Parsing::JSON.parse(body) - when "xml" then Wayfarer::Parsing::XML.parse_xml(body) - else Wayfarer::Parsing::XML.parse_html(body) - end + # Returns a parsed representation of the HTTP response or the browser DOM, + # depending on the Content-Type. + # @return [Nokogiri::HTML::Document] when Content-Type is `text/html` + # @see https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/HTML/Document Nokogiri::HTML::Document + # @return [Nokogiri::XML::Document] when Content-Type is `text/xml` + # @see https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/XML/Document Nokogiri::XML::Document + # @return [Hash] when Content-Type is `application/json` + # @note You can register custom parsers with {Wayfarer::Parsing.registry}. + def doc + @doc ||= Wayfarer::Parsing.parse(body, mime_type&.content_type || content_type) end + # Returns a `MetaInspector::Document`. + # @return [MetaInspector::Document] + # @see https://www.rubydoc.info/gems/metainspector/MetaInspector/Document def meta - @meta ||= MetaInspector.new(url, document: body) + @meta ||= MetaInspector.new(url, document: body, headers: headers, normalize_url: false) + end + + private + + def content_type + @content_type ||= headers["content-type"] end end end