utils.rb in html2rss-0.10.0

- old
+ new
@@ -1,40 +1,115 @@
-require 'active_support/core_ext/hash'
+# frozen_string_literal: true
+
 require 'addressable/uri'
-require 'builder'
+require 'faraday'
+require 'faraday/follow_redirects'
 require 'json'
-require 'nokogiri'
+require 'regexp_parser'
+require 'tzinfo'
+require 'mime/types'
+require_relative 'object_to_xml_converter'
 
 module Html2rss
   ##
   # The collecting tank for utility methods.
   module Utils
+    ##
+    # @param url [String, Addressable::URI]
+    # @param base_url [String]
+    # @return [Addressable::URI]
     def self.build_absolute_url_from_relative(url, base_url)
-      url = URI(url) if url.is_a?(String)
+      url = Addressable::URI.parse(url.to_s) unless url.is_a?(Addressable::URI)
 
       return url if url.absolute?
 
-      URI(base_url).tap do |uri|
-        uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}"
-        uri.query = url.query
-        uri.fragment = url.fragment if url.fragment
-      end
+      base_uri = Addressable::URI.parse(base_url)
+      base_uri.path = '/' if base_uri.path.empty?
+
+      base_uri.join(url).normalize
     end
 
-    def self.object_to_xml(object)
-      object.to_xml(skip_instruct: true, skip_types: true)
+    ##
+    # Removes any space, parses and normalizes the given url.
+    # @param url [String]
+    # @return [String, nil] sanitized and normalized URL, or nil if input is empty
+    def self.sanitize_url(url)
+      url = url.to_s.gsub(/\s+/, ' ').strip
+      return if url.empty?
+
+      Addressable::URI.parse(url).normalize.to_s
     end
 
-    def self.get_class_from_name(snake_cased_name, module_name)
-      camel_cased_name = snake_cased_name.split('_').map(&:capitalize).join
-      class_name = ['Html2rss', module_name, camel_cased_name].join('::')
-      Object.const_get(class_name)
+    ##
+    # Allows override of time zone locally inside supplied block; resets previous time zone when done.
+    #
+    # @param time_zone [String]
+    # @param default_time_zone [String]
+    # @return [Object] whatever the given block returns
+    def self.use_zone(time_zone, default_time_zone: Time.now.getlocal.zone)
+      raise ArgumentError, 'a block is required' unless block_given?
+
+      time_zone = TZInfo::Timezone.get(time_zone)
+
+      prev_tz = ENV.fetch('TZ', default_time_zone)
+      ENV['TZ'] = time_zone.name
+      yield
+    ensure
+      ENV['TZ'] = prev_tz if prev_tz
     end
 
-    def self.sanitize_url(url)
-      squished_url = url.to_s.split(' ').join
-      return if squished_url.to_s == ''
+    ##
+    # Builds a titleized representation of the URL.
+    # @param url [String, Addressable::URI]
+    # @return [String]
+    def self.titleized_url(url)
+      uri = Addressable::URI.parse(url)
+      host = uri.host
 
-      Addressable::URI.parse(squished_url).normalize.to_s
+      nicer_path = uri.path.split('/').reject(&:empty?)
+      nicer_path.any? ? "#{host}: #{nicer_path.map(&:capitalize).join(' ')}" : host
+    end
+
+    ##
+    # @param url [String, Addressable::URI]
+    # @param convert_json_to_xml [true, false] Should JSON be converted to XML
+    # @param headers [Hash] additional HTTP request headers to use for the request
+    # @return [String] body of the HTTP response
+    def self.request_body_from_url(url, convert_json_to_xml: false, headers: {})
+      response = Faraday.new(url:, headers:) do |faraday|
+        faraday.use Faraday::FollowRedirects::Middleware
+        faraday.adapter Faraday.default_adapter
+      end.get
+
+      body = response.body
+
+      convert_json_to_xml ? ObjectToXmlConverter.new(JSON.parse(body)).call : body
+    end
+
+    ##
+    # Parses the given String and builds a Regexp out of it.
+    #
+    # It will remove one pair of surrounding slashes ('/') from the String
+    # to maintain backwards compatibility before building the Regexp.
+    #
+    # @param string [String]
+    # @return [Regexp]
+    def self.build_regexp_from_string(string)
+      raise ArgumentError, 'must be a string!' unless string.is_a?(String)
+
+      string = string[1..-2] if string.start_with?('/') && string.end_with?('/')
+      Regexp::Parser.parse(string, options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE).to_re
+    end
+
+    ##
+    # Guesses the content type based on the file extension of the URL.
+    #
+    # @param url [String, Addressable::URI]
+    # @return [String] guessed content type, defaults to 'application/octet-stream'
+    def self.guess_content_type_from_url(url)
+      url = url.to_s.split('?').first
+
+      content_type = MIME::Types.type_for(File.extname(url).delete('.'))
+      content_type.first&.to_s || 'application/octet-stream'
     end
   end
 end