lib/html2rss/utils.rb in html2rss-0.9.0 vs lib/html2rss/utils.rb in html2rss-0.10.0

- old
+ new

@@ -1,40 +1,115 @@ -require 'active_support/core_ext/hash' +# frozen_string_literal: true + require 'addressable/uri' -require 'builder' +require 'faraday' +require 'faraday/follow_redirects' require 'json' -require 'nokogiri' +require 'regexp_parser' +require 'tzinfo' +require 'mime/types' +require_relative 'object_to_xml_converter' module Html2rss ## # The collecting tank for utility methods. module Utils + ## + # @param url [String, Addressable::URI] + # @param base_url [String] + # @return [Addressable::URI] def self.build_absolute_url_from_relative(url, base_url) - url = URI(url) if url.is_a?(String) + url = Addressable::URI.parse(url.to_s) unless url.is_a?(Addressable::URI) return url if url.absolute? - URI(base_url).tap do |uri| - uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}" - uri.query = url.query - uri.fragment = url.fragment if url.fragment - end + base_uri = Addressable::URI.parse(base_url) + base_uri.path = '/' if base_uri.path.empty? + + base_uri.join(url).normalize end - def self.object_to_xml(object) - object.to_xml(skip_instruct: true, skip_types: true) + ## + # Removes any space, parses and normalizes the given url. + # @param url [String] + # @return [String, nil] sanitized and normalized URL, or nil if input is empty + def self.sanitize_url(url) + url = url.to_s.gsub(/\s+/, ' ').strip + return if url.empty? + + Addressable::URI.parse(url).normalize.to_s end - def self.get_class_from_name(snake_cased_name, module_name) - camel_cased_name = snake_cased_name.split('_').map(&:capitalize).join - class_name = ['Html2rss', module_name, camel_cased_name].join('::') - Object.const_get(class_name) + ## + # Allows override of time zone locally inside supplied block; resets previous time zone when done. + # + # @param time_zone [String] + # @param default_time_zone [String] + # @return [Object] whatever the given block returns + def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone) + raise ArgumentError, 'a block is required' unless block_given? + + time_zone = TZInfo::Timezone.get(time_zone) + + prev_tz = ENV.fetch('TZ', default_time_zone) + ENV['TZ'] = time_zone.name + yield + ensure + ENV['TZ'] = prev_tz if prev_tz end - def self.sanitize_url(url) - squished_url = url.to_s.split(' ').join - return if squished_url.to_s == '' + ## + # Builds a titleized representation of the URL. + # @param url [String, Addressable::URI] + # @return [String] + def self.titleized_url(url) + uri = Addressable::URI.parse(url) + host = uri.host - Addressable::URI.parse(squished_url).normalize.to_s + nicer_path = uri.path.split('/').reject(&:empty?) + nicer_path.any? ? "#{host}: #{nicer_path.map(&:capitalize).join(' ')}" : host + end + + ## + # @param url [String, Addressable::URI] + # @param convert_json_to_xml [true, false] Should JSON be converted to XML + # @param headers [Hash] additional HTTP request headers to use for the request + # @return [String] body of the HTTP response + def self.request_body_from_url(url, convert_json_to_xml: false, headers: {}) + response = Faraday.new(url:, headers:) do |faraday| + faraday.use Faraday::FollowRedirects::Middleware + faraday.adapter Faraday.default_adapter + end.get + + body = response.body + + convert_json_to_xml ? ObjectToXmlConverter.new(JSON.parse(body)).call : body + end + + ## + # Parses the given String and builds a Regexp out of it. + # + # It will remove one pair of surrounding slashes ('/') from the String + # to maintain backwards compatibility before building the Regexp. + # + # @param string [String] + # @return [Regexp] + def self.build_regexp_from_string(string) + raise ArgumentError, 'must be a string!' unless string.is_a?(String) + + string = string[1..-2] if string.start_with?('/') && string.end_with?('/') + Regexp::Parser.parse(string, options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE).to_re + end + + ## + # Guesses the content type based on the file extension of the URL. + # + # @param url [String, Addressable::URI] + # @return [String] guessed content type, defaults to 'application/octet-stream' + def self.guess_content_type_from_url(url) + url = url.to_s.split('?').first + + content_type = MIME::Types.type_for(File.extname(url).delete('.')) + content_type.first&.to_s || 'application/octet-stream' end end end