require 'find'
require 'digest'
require 'uri'
require 'thread/pool'

Thread::Pool.abort_on_exception = true

module Percy
  class Cli
    module Snapshot
      # Static resource types that an HTML file might load and that we want to upload for rendering.
      STATIC_RESOURCE_EXTENSIONS = [
        '.css', '.jpg', '.jpeg', '.gif', '.ico', '.png', '.bmp', '.pict', '.tif', '.tiff', '.ttf',
        '.eot', '.woff', '.otf', '.svg', '.svgz', '.webp', '.ps',
      ].freeze

      DEFAULT_SNAPSHOTS_REGEX = /\.(html|htm)$/

      # Modified version of Diego Perini's URL regex. https://gist.github.com/dperini/729294
      REMOTE_URL_REGEX_STRING = (
        # protocol identifier
        "(?:(?:https?:)?//)" +
        "(?:" +
          # IP address exclusion
          # private & local networks
          "(?!(?:10|127)(?:\\.\\d{1,3}){3})" +
          "(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})" +
          "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" +
          # IP address dotted notation octets
          # excludes loopback network 0.0.0.0
          # excludes reserved space >= 224.0.0.0
          # excludes network & broacast addresses
          # (first & last IP address of each class)
          "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
          "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
          "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
        "|" +
          # host name
          "(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)" +
          # domain name
          "(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*" +
          # TLD identifier
          "(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))" +
        ")" +
        # port number
        "(?::\\d{2,5})?" +
        # resource path
        "(?:/[^\\s\"']*)?"
      )
      HTML_REMOTE_URL_REGEX = Regexp.new("(<link.*?href=['\"](" + REMOTE_URL_REGEX_STRING + ")[^>]+)")

      # Match all url("https://...") styles, with whitespace and quote variatinos.
      CSS_REMOTE_URL_REGEX = Regexp.new(
        "url\\s*\\([\"'\s]*(" + REMOTE_URL_REGEX_STRING + ")[\"'\s]*\\)"
      )

      def run_snapshot(root_dir, options = {})
        repo = options[:repo] || Percy.config.repo
        strip_prefix = File.absolute_path(options[:strip_prefix] || root_dir)
        autoload_remote_resources = options[:autoload_remote_resources] || false
        num_threads = options[:threads] || 10
        snapshot_limit = options[:snapshot_limit]

        # Find all the static files in the given root directory.
        root_paths = find_root_paths(root_dir, snapshots_regex: options[:snapshots_regex])
        resource_paths = find_resource_paths(root_dir)
        root_resources = build_resources(root_paths, strip_prefix, is_root: true)
        related_resources = build_resources(resource_paths, strip_prefix)

        if autoload_remote_resources
          remote_urls = find_remote_urls(root_paths + resource_paths)
          related_resources += build_remote_resources(remote_urls)
        end

        all_resources = root_resources + related_resources

        if root_resources.empty?
          say "No root resource files found. Are there HTML files in the given directory?"
          exit(-1)
        end

        say 'Creating build...'
        build = Percy.create_build(repo, resources: related_resources)

        say 'Uploading build resources...'
        upload_missing_resources(build, build, all_resources, {num_threads: num_threads})

        # Upload a snapshot for every root resource, and associate the related_resources.
        output_lock = Mutex.new
        snapshot_thread_pool = Thread.pool(num_threads)
        total = snapshot_limit ? [root_resources.length, snapshot_limit].min : root_resources.length
        root_resources.each_with_index do |root_resource, i|
          break if snapshot_limit && i + 1 > snapshot_limit
          snapshot_thread_pool.process do
            output_lock.synchronize do
              say "Uploading snapshot (#{i+1}/#{total}): #{root_resource.resource_url}"
            end
            snapshot = Percy.create_snapshot(build['data']['id'], [root_resource])
            upload_missing_resources(build, snapshot, all_resources, {num_threads: num_threads})
            Percy.finalize_snapshot(snapshot['data']['id'])
          end
        end
        snapshot_thread_pool.wait
        snapshot_thread_pool.shutdown

        # Finalize the build.
        say 'Finalizing build...'
        Percy.finalize_build(build['data']['id'])
        say "Done! Percy is now processing, you can view the visual diffs here:"
        say build['data']['attributes']['web-url']
      end

      private

      def find_root_paths(dir_path, options = {})
        snapshots_regex = options[:snapshots_regex] || DEFAULT_SNAPSHOTS_REGEX

        file_paths = []
        Find.find(dir_path).each do |relative_path|
          path = File.absolute_path(relative_path)
          # Skip directories.
          next if !FileTest.file?(path)
          # Skip files that don't match the snapshots_regex.
          next if !path.match(snapshots_regex)
          file_paths << path
        end
        file_paths
      end

      def find_resource_paths(dir_path)
        file_paths = []
        Find.find(dir_path).each do |relative_path|
          path = File.absolute_path(relative_path)

          # Skip directories.
          next if !FileTest.file?(path)
          # Skip dot files.
          next if path.match(/\/\./)
          # Only include files with the above static extensions.
          next if !Percy::Cli::STATIC_RESOURCE_EXTENSIONS.include?(File.extname(path))

          file_paths << path
        end
        file_paths
      end

      def find_remote_urls(file_paths)
        urls = []
        file_paths.each do |path|
          extension = File.extname(path)
          case extension
          when '.html'
            content = File.read(path)
            urls += content.scan(HTML_REMOTE_URL_REGEX).map do |match|
              next if !match[0].include?('stylesheet')  # Only include links with rel="stylesheet".
              maybe_add_protocol(match[1])
            end
          when '.css'
            content = File.read(path)
            urls += content.scan(CSS_REMOTE_URL_REGEX).map { |match| maybe_add_protocol(match[0]) }
          end
        end
        urls.compact.uniq
      end

      def maybe_add_protocol(url)
        url[0..1] == '//' ? "http:#{url}" : url
      end

      def build_resources(paths, strip_prefix, options = {})
        resources = []

        # Strip trailing slash from strip_prefix.
        strip_prefix = strip_prefix[0..-2] if strip_prefix[-1] == '/'

        paths.each do |path|
          sha = Digest::SHA256.hexdigest(File.read(path))
          resource_url = URI.escape(path.sub(strip_prefix, ''))
          resources << Percy::Client::Resource.new(
            resource_url, sha: sha, is_root: options[:is_root], path: path)
        end
        resources
      end

      def build_remote_resources(remote_urls)
        resources = []

        bar = Commander::UI::ProgressBar.new(
          remote_urls.length,
          title: 'Fetching remote resources...',
          format: ':title |:progress_bar| :percent_complete% complete - :url',
          width: 20,
          complete_message: "Fetched #{remote_urls.length} remote resources.",
        )

        remote_urls.each do |url|
          bar.increment url: url
          begin
            response = Faraday.get(url)
          rescue Faraday::Error::ConnectionFailed, Faraday::SSLError => e
            say_error e
            next
          end
          if response.status != 200
            say_error "Remote resource failed, skipping (#{response.status}): #{url}"
            next
          end

          sha = Digest::SHA256.hexdigest(response.body)
          resources << Percy::Client::Resource.new(url, sha: sha, content: response.body)
        end
        resources
      end

      # Uploads missing resources either for a build or snapshot.
      def upload_missing_resources(build, obj, potential_resources, options = {})
        # Upload the content for any missing resources.
        missing_resources = obj['data']['relationships']['missing-resources']['data']
        bar = Commander::UI::ProgressBar.new(
          missing_resources.length,
          title: 'Uploading resources...',
          format: ':title |:progress_bar| :percent_complete% complete - :resource_url',
          width: 20,
          complete_message: nil,
        )
        output_lock = Mutex.new
        uploader_thread_pool = Thread.pool(options[:num_threads] || 10)
        missing_resources.each do |missing_resource|
          uploader_thread_pool.process do
            missing_resource_sha = missing_resource['id']
            resource = potential_resources.find { |r| r.sha == missing_resource_sha }
            path = resource.resource_url
            output_lock.synchronize do
              bar.increment resource_url: resource.resource_url
            end

            # Remote resources are stored in 'content', local resources are read from the filesystem.
            content = resource.content || File.read("#{resource.path}")

            Percy.upload_resource(build['data']['id'], content)
          end
        end
        uploader_thread_pool.wait
        uploader_thread_pool.shutdown
      end
    end
  end
end