require 'set'
require 'faraday'
require 'httpclient'
require 'digest'

module Percy
  module Capybara
    class Client
      module Snapshots
        # Modified version of Diego Perini's URL regex. https://gist.github.com/dperini/729294
        URL_REGEX = Regexp.new(
          # protocol identifier
          "(?:(?:https?:)?//)" +
          "(" +
            # IP address exclusion
            # private & local networks
            "(?!(?:10|127)(?:\\.\\d{1,3}){3})" +
            "(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})" +
            "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" +
            # IP address dotted notation octets
            # excludes loopback network 0.0.0.0
            # excludes reserved space >= 224.0.0.0
            # excludes network & broacast addresses
            # (first & last IP address of each class)
            "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
            "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
            "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
          "|" +
            # host name
            "(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)" +
            # domain name
            "(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*" +
          ")" +
          # port number
          "(?::\\d{2,5})?" +
          # resource path
          "(?:/[^\\s\"']*)?"
        )
        PATH_REGEX = /\A\/[^\\s\"']*/
        DATA_URL_REGEX = /\Adata:/
        LOCAL_HOSTNAMES = [
          'localhost',
          '127.0.0.1',
          '0.0.0.0',
        ].freeze

        # Takes a snapshot of the given page HTML and its assets.
        #
        # @param [Capybara::Session] page The Capybara page to snapshot.
        # @param [Hash] options
        # @option options [String] :name A unique name for the current page that identifies it across
        #   builds. By default this is the URL of the page, but can be customized if the URL does not
        #   entirely identify the current state.
        def snapshot(page, options = {})
          return if !enabled?  # Silently skip if the client is disabled.
          name = options[:name]
          current_build_id = current_build['data']['id']
          resource_map = _find_resources(page)
          snapshot = client.create_snapshot(current_build_id, resource_map.values, name: name)

          # Upload the content for any missing resources.
          snapshot['data']['relationships']['missing-resources']['data'].each do |missing_resource|
            sha = missing_resource['id']
            client.upload_resource(current_build_id, resource_map[sha].content)
          end

          # Finalize the snapshot.
          client.finalize_snapshot(snapshot['data']['id'])

          true
        end

        # @private
        def _find_resources(page)
          resource_map = {}
          resources = []
          resources << _get_root_html_resource(page)
          resources += _get_css_resources(page)
          resources += _get_image_resources(page)
          resources.each { |resource| resource_map[resource.sha] = resource }
          resource_map
        end
        private :_find_resources

        # @private
        def _get_root_html_resource(page)
          # Primary HTML.
          script = <<-JS
            var htmlElement = document.getElementsByTagName('html')[0];
            return htmlElement.outerHTML;
          JS
          html = _evaluate_script(page, script)
          resource_url = page.current_url
          Percy::Client::Resource.new(
            resource_url, is_root: true, mimetype: 'text/html', content: html)
        end
        private :_get_root_html_resource

        # @private
        def _get_css_resources(page)
          resources = []
          # Find all CSS resources.
          # http://www.quirksmode.org/dom/w3c_css.html#access
          script = <<-JS
            function findStylesRecursively(stylesheet, css_urls) {
              if (stylesheet.href) {  // Skip stylesheet without hrefs (inline stylesheets).
                css_urls.push(stylesheet.href);

                // Remote stylesheet rules cannot be accessed because of the same-origin policy.
                // Unfortunately, if you touch .cssRules in Selenium, it throws a JavascriptError
                // with 'The operation is insecure'. To work around this, skip reading rules of
                // remote stylesheets but still include them for fetching.
                //
                // TODO: If a remote stylesheet has an @import, it will be missing because we don't
                // notice it here. We could potentially recursively fetch remote imports in
                // ruby-land below.
                var parser = document.createElement('a');
                parser.href = stylesheet.href;
                if (parser.host != window.location.host) {
                  return;
                }
              }
              for (var i = 0; i < stylesheet.cssRules.length; i++) {
                var rule = stylesheet.cssRules[i];
                // Depth-first search, handle recursive @imports.
                if (rule.styleSheet) {
                  findStylesRecursively(rule.styleSheet, css_urls);
                }
              }
            }

            var css_urls = [];
            for (var i = 0; i < document.styleSheets.length; i++) {
              findStylesRecursively(document.styleSheets[i], css_urls);
            }
            return css_urls;
          JS
          resource_urls = _evaluate_script(page, script)

          resource_urls.each do |url|
            next if !_should_include_url?(url)
            response = _fetch_resource_url(url)
            next if !response
            sha = Digest::SHA256.hexdigest(response.body)
            resources << Percy::Client::Resource.new(
              url, mimetype: 'text/css', content: response.body)
          end
          resources
        end
        private :_get_css_resources

        # @private
        def _get_image_resources(page)
          resources = []
          image_urls = Set.new

          # Find all image tags on the page.
          page.all('img').each do |image_element|
            srcs = []
            srcs << image_element[:src] if !image_element[:src].nil?

            srcset_raw_urls = image_element[:srcset] || ''
            temp_urls = srcset_raw_urls.split(',')
            temp_urls.each do |temp_url|
              srcs << temp_url.split(' ').first
            end

            srcs.each do |url|
              image_urls << url
            end
          end

          # Find all CSS-loaded images which set a background-image.
          script = <<-JS
            var raw_image_urls = [];

            var tags = document.getElementsByTagName('*');
            var el;
            var rawValue;

            for (var i = 0; i < tags.length; i++) {
              el = tags[i];
              if (el.currentStyle) {
                rawValue = el.currentStyle['backgroundImage'];
              } else if (window.getComputedStyle) {
                rawValue = window.getComputedStyle(el).getPropertyValue('background-image');
              }
              if (!rawValue || rawValue === "none") {
                continue;
              } else {
                raw_image_urls.push(rawValue);
              }
            }
            return raw_image_urls;
          JS
          raw_image_urls = _evaluate_script(page, script)
          raw_image_urls.each do |raw_image_url|
            temp_urls = raw_image_url.scan(/url\(["']?(.*?)["']?\)/)
            # background-image can accept multiple url()s, so temp_urls is an array of URLs.
            temp_urls.each do |temp_url|
              url = temp_url[0]
              image_urls << url
            end
          end

          image_urls.each do |image_url|
            # If url references are blank, browsers will often fill them with the current page's
            # URL, which makes no sense and will never be renderable. Strip these.
            next if image_url == page.current_url || image_url.strip.empty?

            # Make the resource URL absolute to the current page. If it is already absolute, this
            # will have no effect.
            resource_url = URI.join(page.current_url, image_url).to_s

            next if !_should_include_url?(resource_url)

            # Fetch the images.
            # TODO(fotinakis): this can be pretty inefficient for image-heavy pages because the
            # browser has already loaded them once and this fetch cannot easily leverage the
            # browser's cache. However, often these images are probably local resources served by a
            # development server, so it may not be so bad. Re-evaluate if this becomes an issue.
            response = _fetch_resource_url(resource_url)
            next if !response

            sha = Digest::SHA256.hexdigest(response.body)
            resources << Percy::Client::Resource.new(
              resource_url, mimetype: response.content_type, content: response.body)
          end
          resources
        end
        private :_get_image_resources

        # @private
        def _should_include_url?(url)
          # It is a URL or a path, but not a data URI.
          url_match = URL_REGEX.match(url)
          data_url_match = DATA_URL_REGEX.match(url)
          result = (url_match || PATH_REGEX.match(url)) && !data_url_match

          # Is not a remote URL.
          if url_match && !data_url_match
            host = url_match[1]
            result = LOCAL_HOSTNAMES.include?(host)
          end

          !!result
        end

        # @private
        def _fetch_resource_url(url)
          response = Percy::Capybara::HttpFetcher.fetch(url)
          if !response
            STDERR.puts "[percy] Warning: failed to fetch page resource, this might be a bug: #{url}"
            return nil
          end
          response
        end
        private :_fetch_resource_url

        # @private
        def _evaluate_script(page, script)
          script = <<-JS
            (function() {
              #{script}
            })();
          JS
          page.evaluate_script(script)
        end
        private :_evaluate_script
      end
    end
  end
end