require 'percy/capybara/loaders/base_loader'
require 'digest'
require 'uri'

module Percy
  module Capybara
    module Loaders
      # Resource loader that uses the native Capybara browser interface to discover resources.
      # This loader uses JavaScript to discover page resources, so specs must be tagged with
      # "js: true" because the default Rack::Test driver does not support executing JavaScript.
      class NativeLoader < BaseLoader
        PATH_REGEX = /\A\/[^\\s\"']*/
        DATA_URL_REGEX = /\Adata:/
        LOCAL_HOSTNAMES = [
          'localhost',
          '127.0.0.1',
          '0.0.0.0',
        ].freeze

        def snapshot_resources
          resources = []
          resources << root_html_resource
          resources += _get_css_resources
          resources += _get_image_resources
          resources += iframes_resources
          resources
        end

        def build_resources
          []
        end

        # @private
        def _get_css_resources
          resources = []
          # Find all CSS resources.
          # http://www.quirksmode.org/dom/w3c_css.html#access
          script = <<-JS
            function findStylesRecursively(stylesheet, css_urls) {
              if (stylesheet.href) {  // Skip stylesheet without hrefs (inline stylesheets).
                css_urls.push(stylesheet.href);

                // Remote stylesheet rules cannot be accessed because of the same-origin policy.
                // Unfortunately, if you touch .cssRules in Selenium, it throws a JavascriptError
                // with 'The operation is insecure'. To work around this, skip reading rules of
                // remote stylesheets but still include them for fetching.
                //
                // TODO: If a remote stylesheet has an @import, it will be missing because we don't
                // notice it here. We could potentially recursively fetch remote imports in
                // ruby-land below.
                var parser = document.createElement('a');
                parser.href = stylesheet.href;
                if (parser.host != window.location.host) {
                  return;
                }
              }
              for (var i = 0; i < stylesheet.cssRules.length; i++) {
                var rule = stylesheet.cssRules[i];
                // Depth-first search, handle recursive @imports.
                if (rule.styleSheet) {
                  findStylesRecursively(rule.styleSheet, css_urls);
                }
              }
            }

            var css_urls = [];
            for (var i = 0; i < document.styleSheets.length; i++) {
              findStylesRecursively(document.styleSheets[i], css_urls);
            }
            return css_urls;
          JS
          resource_urls = _evaluate_script(page, script)

          resource_urls.each do |url|
            next unless _should_include_url?(url)
            response = _fetch_resource_url(url)
            _absolute_url_to_relative!(url, _current_host_port)
            next unless response
            resources << Percy::Client::Resource.new(
              url, mimetype: 'text/css', content: response.body
            )
          end
          resources
        end
        private :_get_css_resources

        # @private
        def _get_image_resources
          resources = []
          image_urls = Set.new

          # Find all image tags on the page.
          page.all('img').each do |image_element|
            srcs = []
            srcs << image_element[:src] unless image_element[:src].nil?

            srcset_raw_urls = image_element[:srcset] || ''
            temp_urls = srcset_raw_urls.split(',')
            temp_urls.each do |temp_url|
              srcs << temp_url.split(' ').first
            end

            srcs.each do |url|
              image_urls << url
            end
          end

          # Find all CSS-loaded images which set a background-image.
          script = <<-JS
            var raw_image_urls = [];

            var tags = document.getElementsByTagName('*');
            var el;
            var rawValue;

            for (var i = 0; i < tags.length; i++) {
              el = tags[i];
              if (el.currentStyle) {
                rawValue = el.currentStyle['backgroundImage'];
              } else if (window.getComputedStyle) {
                rawValue = window.getComputedStyle(el).getPropertyValue('background-image');
              }
              if (!rawValue || rawValue === "none") {
                continue;
              } else {
                raw_image_urls.push(rawValue);
              }
            }
            return raw_image_urls;
          JS
          raw_image_urls = _evaluate_script(page, script)
          raw_image_urls.each do |raw_image_url|
            temp_urls = raw_image_url.scan(/url\(["']?(.*?)["']?\)/)
            # background-image can accept multiple url()s, so temp_urls is an array of URLs.
            temp_urls.each do |temp_url|
              url = temp_url[0]
              image_urls << url
            end
          end

          image_urls.each do |image_url|
            # If url references are blank, browsers will often fill them with the current page's
            # URL, which makes no sense and will never be renderable. Strip these.
            next if image_url == current_path \
              || image_url == page.current_url \
              || image_url.strip.empty?

            # Make the resource URL absolute to the current page. If it is already absolute, this
            # will have no effect.
            resource_url = URI.join(page.current_url, image_url).to_s

            # Skip duplicates.
            next if resources.find { |r| r.resource_url == resource_url }

            next unless _should_include_url?(resource_url)

            # Fetch the images.
            # TODO(fotinakis): this can be pretty inefficient for image-heavy pages because the
            # browser has already loaded them once and this fetch cannot easily leverage the
            # browser's cache. However, often these images are probably local resources served by a
            # development server, so it may not be so bad. Re-evaluate if this becomes an issue.
            response = _fetch_resource_url(resource_url)
            _absolute_url_to_relative!(resource_url, _current_host_port)
            next unless response

            resources << Percy::Client::Resource.new(
              resource_url, mimetype: response.content_type, content: response.body
            )
          end
          resources
        end
        private :_get_image_resources

        # @private
        def _fetch_resource_url(url)
          response = Percy::Capybara::HttpFetcher.fetch(url)
          unless response
            STDERR.puts '[percy] Warning: failed to fetch page resource, ' \
              "this might be a bug: #{url}"
            return nil
          end
          response
        end
        private :_fetch_resource_url

        # @private
        def _evaluate_script(page, script)
          script = <<-JS
            (function() {
              #{script}
            })();
          JS
          page.evaluate_script(script)
        end
        private :_evaluate_script

        # @private
        def _should_include_url?(url)
          # It is a URL or a path, but not a data URI.
          url_match = URL_REGEX.match(url)
          data_url_match = DATA_URL_REGEX.match(url)
          result = (url_match || PATH_REGEX.match(url)) && !data_url_match

          # Is not a remote URL.
          if url_match && !data_url_match
            host = url_match[2]
            result = LOCAL_HOSTNAMES.include?(host) || _same_server?(url, _current_host_port)
          end

          !!result
        end

        # @private
        def _current_host_port
          url_match = URL_REGEX.match(page.current_url)
          url_match[1] + url_match[2] + (url_match[3] || '')
        end

        # @private
        def _same_server?(url, host_port)
          url.start_with?(host_port + '/') || url == host_port
        end

        # @private
        def _absolute_url_to_relative!(url, host_port)
          url.gsub!(host_port + '/', '/') if url.start_with?(host_port + '/')
        end
        private :_absolute_url_to_relative!
      end
    end
  end
end