require 'pathname'
require 'percy/capybara'

module Percy
  module Capybara
    module Loaders
      class BaseLoader
        # Modified version of Diego Perini's URL regex. https://gist.github.com/dperini/729294
        URL_REGEX = Regexp.new(
          # protocol identifier
          '((?:https?:)?//)' \
          '(' +
            # IP address exclusion
            # private & local networks
            '(?!(?:10|127)(?:\\.\\d{1,3}){3})' \
            '(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})' \
            '(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})' +
            # IP address dotted notation octets
            # excludes loopback network 0.0.0.0
            # excludes reserved space >= 224.0.0.0
            # excludes network & broacast addresses
            # (first & last IP address of each class)
            '(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])' \
            '(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}' \
            '(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))' \
          '|' +
            # host name
            '(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)' +
            # domain name
            '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*' \
          ')' +
          # port number
          '(:\\d{2,5})?' +
          # resource path
          "(/[^\\s\"']*)?",
        )

        SKIP_RESOURCE_EXTENSIONS = [
          '.map', # Ignore source maps.
          '.gz', # Ignore gzipped files.
        ].freeze

        MAX_FILESIZE_BYTES = 15 * 1024**2 # 15 MB.

        attr_reader :page

        # @param [Capybara::Session] page The Capybara page.
        # @param [bool] include_iframes Include iframes in the snapshot
        def initialize(options = {})
          @page = options[:page]
          @include_iframes = options[:include_iframes] || false
        end

        def build_resources
          raise NotImplementedError, 'subclass must implement abstract method'
        end

        def snapshot_resources
          raise NotImplementedError, 'subclass must implement abstract method'
        end

        # @private
        def root_html_resource
          Percy::Client::Resource.new(
            current_path, is_root: true, mimetype: 'text/html', content: page.html,
          )
        end

        # Transformed version of the current URL to be a relative path.
        # This important because Rack::Test uses "www.example.com" as the actual current URL,
        # which would force Percy to actually render example.com instead of the page. By always
        # using a URL path as the resource URL, we guarantee that Percy will render what it's given.
        #
        # @private
        def current_path
          current_url = page.current_url
          url_match = URL_REGEX.match(current_url)
          return url_match[4] if url_match

          # Special case: prepend a slash to the path to force a valid URL for things like
          # "about:srcdoc" iframe srcdoc pages.
          current_url = "/#{current_url}" if current_url[0] != '/'

          current_url
        end

        # NOTES:
        # - Doesn't handle multiple iframes with the same URL (`src` attribute)
        # @private
        def iframes_resources
          return [] unless @include_iframes

          resources = []

          page.all(:css, 'iframe').each do |iframe_element|
            iframe_url = iframe_element[:src]
            root_page_host = page.current_host
            begin
              page.within_frame(iframe_element) do
                next unless page.current_host == root_page_host
                path = URI.parse(iframe_url).path
                content = page.html
                sha = Digest::SHA256.hexdigest(content)
                resources <<
                  Percy::Client::Resource.new(
                    path,
                    content: content,
                    sha: sha,
                    mimetype: 'text/html',
                  )
              end
            rescue StandardError => e
              # Skip frame not found errors. This library doesn't explicitly depend on Poltergeist,
              # so we check the string class name.
              raise e unless e.class.to_s == 'Capybara::Poltergeist::FrameNotFound' ||
                  e.class.to_s == 'Capybara::Poltergeist::TimeoutError'
            end
          end

          resources
        rescue ::Capybara::NotSupportedByDriverError
          []
        end

        def _resources_from_dir(root_dir, base_url: '/')
          resources = []

          _find_files(root_dir).each do |path|
            # Skip certain extensions.
            next if SKIP_RESOURCE_EXTENSIONS.include?(File.extname(path))
            # Skip large files, these are hopefully downloads and not used in page rendering.
            next if File.size(path) > MAX_FILESIZE_BYTES

            # Replace the assets_dir with the base_url to generate the resource_url
            resource_url = _uri_join(base_url, path.sub(root_dir.to_s, ''))

            sha = Digest::SHA256.hexdigest(File.read(path))

            resources << Percy::Client::Resource.new(resource_url, sha: sha, path: path)
          end

          resources
        end

        # A simplified version of Find.find that only returns files and follows symlinks.
        def _find_files(*paths)
          paths.flatten!
          paths.map! { |p| Pathname.new(p) }
          files = []
          paths.each do |path|
            if path.file?
              files << path.to_s
            else
              files = files.concat(_find_files(path.children))
            end
          end
          files
        end

        def _uri_join(*paths)
          # We must swap File::SEPARATOR for '/' here because on Windows File.join
          # will use backslashes and this is a URL.
          File.join(paths).gsub(File::SEPARATOR, '/')
        end
      end
    end
  end
end