lib/indieweb/endpoints/parsers.rb in indieweb-endpoints-0.4.0 vs lib/indieweb/endpoints/parsers.rb in indieweb-endpoints-0.5.0
- old
+ new
@@ -2,42 +2,57 @@
module Endpoints
module Parsers
extend Registerable
class BaseParser
- # Ultra-orthodox pattern matching allowed values in Link header `rel` parameter
- # https://tools.ietf.org/html/rfc8288#section-3.3
- REGEXP_REG_REL_TYPE_PATTERN = '[a-z\d][a-z\d\-\.]*'.freeze
+ attr_reader :response
- # Liberal pattern matching a string of text between angle brackets
- # https://tools.ietf.org/html/rfc5988#section-5.1
- REGEXP_TARGET_URI_PATTERN = /^<(.*)>;/.freeze
-
def initialize(response)
raise ArgumentError, "response must be an HTTP::Response (given #{response.class.name})" unless response.is_a?(HTTP::Response)
@response = response
end
def results
return unless results_from_http_request
- @results ||= Absolutely.to_abs(base: @response.uri.to_s, relative: results_from_http_request)
+ @results ||= Absolutely.to_abs(base: response.uri.to_s, relative: results_from_http_request)
rescue Absolutely::InvalidURIError => exception
raise InvalidURIError, exception
end
private
- def discrete_link_headers
- # Split Link headers with multiple values, flatten the resulting array, and strip whitespace
- # https://webmention.rocks/test/19
- @discrete_link_headers ||= @response.headers.get('link').map { |header| header.split(',') }.flatten.map(&:strip)
+ def results_from_body
+ LinkElementParser.new(response, self.class.identifier).results
end
+ def results_from_headers
+ LinkHeaderParser.new(response, self.class.identifier).results
+ end
+
+ def results_from_http_request
+ @results_from_http_request ||= results_from_headers || results_from_body || nil
+ end
+ end
+
+ class LinkElementParser
+ attr_reader :identifier, :response
+
+ def initialize(response, identifier)
+ @response = response
+ @identifier = identifier
+ end
+
+ def results
+ link_element['href'] if response_is_html && link_element
+ end
+
+ private
+
def doc
- @doc ||= Nokogiri::HTML(@response.body.to_s)
+ @doc ||= Nokogiri::HTML(response.body.to_s)
end
def link_element
# Return first `link` element with valid `rel` attribute
# https://www.w3.org/TR/indieauth/#discovery-1
@@ -48,45 +63,62 @@
def link_elements
@link_elements ||= doc.css(link_elements_css_selector)
end
def link_elements_css_selector
- @link_elements_css_selector ||= %(link[rel~="#{self.class.identifier}"][href])
+ @link_elements_css_selector ||= %(link[rel~="#{identifier}"][href]:not([href*="#"]))
end
- def link_header
- @link_header ||= link_headers.shift
+ def response_is_html
+ @response_is_html ||= response.mime_type == 'text/html'
end
+ end
- def link_headers
- # Reduce Link headers to those with valid `rel` attribute
- @link_headers ||= discrete_link_headers.find_all { |header| header.match?(regexp_rel_paramater_pattern) }
- end
+ class LinkHeaderParser
+ # Ultra-orthodox pattern matching allowed values in Link header `rel` parameter
+ # https://tools.ietf.org/html/rfc8288#section-3.3
+ REGEXP_REG_REL_TYPE_PATTERN = '[a-z\d][a-z\d\-\.]*'.freeze
- def regexp_rel_paramater_pattern
- # Ultra-orthodox pattern matching Link header `rel` parameter including a matching identifier value
- # https://www.w3.org/TR/webmention/#sender-discovers-receiver-webmention-endpoint
- @regexp_rel_paramater_pattern ||= /(?:;|\s)rel="?(?:#{REGEXP_REG_REL_TYPE_PATTERN}+\s)?#{self.class.identifier}(?:\s#{REGEXP_REG_REL_TYPE_PATTERN})?"?/
- end
+ # Liberal pattern capturing a string of text (excepting the octothorp) between angle brackets
+ # https://tools.ietf.org/html/rfc5988#section-5.1
+ REGEXP_TARGET_URI_PATTERN = '^<(.[^#]*)>;'.freeze
- def results_from_body
- link_element['href'] if response_is_html && link_element
+ attr_reader :identifier, :response
+
+ def initialize(response, identifier)
+ @response = response
+ @identifier = identifier
end
- def results_from_headers
+ def results
return unless link_header
- endpoint_match_data = link_header.match(REGEXP_TARGET_URI_PATTERN)
+ endpoint_match_data = link_header.match(/#{REGEXP_TARGET_URI_PATTERN}/)
return endpoint_match_data[1] if endpoint_match_data
end
- def response_is_html
- @response_is_html ||= @response.mime_type == 'text/html'
+ private
+
+ def discrete_link_headers
+ # Split Link headers with multiple values, flatten the resulting array, and strip whitespace
+ # https://webmention.rocks/test/19
+ @discrete_link_headers ||= response.headers.get('link').map { |header| header.split(',') }.flatten.map(&:strip)
end
- def results_from_http_request
- @results_from_http_request ||= results_from_headers || results_from_body || nil
+ def link_header
+ @link_header ||= link_headers.shift
+ end
+
+ def link_headers
+ # Reduce Link headers to those with valid `rel` attribute
+ @link_headers ||= discrete_link_headers.find_all { |header| header.match?(/#{REGEXP_TARGET_URI_PATTERN}\s*#{regexp_rel_paramater_pattern}/) }
+ end
+
+ def regexp_rel_paramater_pattern
+ # Ultra-orthodox pattern matching Link header `rel` parameter including a matching identifier value
+ # https://www.w3.org/TR/webmention/#sender-discovers-receiver-webmention-endpoint
+ @regexp_rel_paramater_pattern ||= %(rel="?(?:#{REGEXP_REG_REL_TYPE_PATTERN}+\s)?#{identifier}(?:\s#{REGEXP_REG_REL_TYPE_PATTERN})?"?)
end
end
end
end
end