require 'thread'
require 'fileutils'
require 'active_support/core_ext'
require 'securerandom'
require_relative 'helper'

module ImgDl
  class Parser
    include Helper

    Default_Options = {url_limit_count: nil,url_reg: nil,image_limit_count: nil,image_reg: nil,recursive: false,prefix: nil,interval: 0}

    attr_reader :agent,:origin_url,:options,:image_count,:url_count,:running,:error_urls,:downloaded_image_count,:success_download,:status,:dl_status
    alias running? running

    def initialize url,save_path,options = {}
      @agent = Mechanize.new
      @agent.user_agent_alias = 'Linux Mozilla'
      @origin_url = URI url
      @current_url = URI url
      @_urls = Hash.new 0
      @_imgs = Hash.new 0
      @save_path = save_path
      FileUtils.mkdir_p save_path
      @image_count = 0
      @url_count = 0
      @urls = Queue.new
      @error_urls = Queue.new
      enq_urls url
      @images = Queue.new
      @options = Default_Options.merge options
      define_options_helper @options
      @downloaded_image_count = 0
      @running = true
      @downloading = true
      @success_download = 0
      @status = "start"
      @dl_status = "ready"
    end

    def start
      Thread.start{parse}
      download
    rescue StandardError => e
      p e
    end

    def parse
      loop do
        break unless next_parse?
        sleep interval
        @status = "get url"
        url = @urls.shift
        url = URI.escape url if url.respond_to? :gsub
        @current_url = URI url
        begin
          page = @agent.get url
        rescue StandardError => e
          @error_urls << [url,e]
          puts e
          next
        end
        unless page.respond_to? :images
          redo
        end
        parse_images page
        if continue?
          parse_links page
        end
      end
      @running = false
      @status = "parser complete"
    end

    def default_head
      @_default_head ||= {"USER-AGENT"=>"Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17", "ACCEPT-ENCODING"=>"gzip,deflate,sdch","ACCEPT" => '*/*', "ACCEPT-CHARSET"=>"UTF-8,*;q=0.5", "ACCEPT-LANGUAGE"=>"zh-CN,zh;q=0.8","connection" => "close"}
    end

    def download
      @dl_status = "start"
      @_download_image = 0
      EM.run do
        loop do
          if !running? && (@images.empty? || (image_limit_count? && @_download_image >= image_limit_count))
            @dl_status = "all done"
            download_complete? and EM.stop
            break
          end
          if @images.empty?
            if running?
              @dl_status = "wait parser"
              sleep 3
              redo
            else
              next
            end
          end
          @_download_image += 1
          @dl_status = "shift image url"
          image_uri = @images.shift
          @dl_status = "download image #{image_uri}"
          http = EventMachine::HttpRequest.new(image_uri).get head: default_head
          http.callback { |res|
            res.response_header["CONTENT_TYPE"] =~ /^image\/(\w+)/
              type = $1
            if type
              @success_download += 1
              save_image type,res.response
            else
              @error_urls << [image_uri,"image download error"]
            end
            @downloaded_image_count += 1
            @dl_status = "success: download image #{image_uri}"
            download_complete? and EM.stop
          }
          http.errback  { |res|
            @error_urls << [image_uri,"image download error"]
            @downloaded_image_count += 1
            @dl_status = "failed: download image #{image_uri}"
            download_complete? and EM.stop
          }
        end
      end
      @dl_status = "download complete"
      @downloading = false
    end

    protected
    def download_complete? 
      !running? && (@downloaded_image_count >= @_download_image || (image_limit_count? and @downloaded_image_count >= image_limit_count))
    end

    def random_file_name
      SecureRandom.uuid
    end

    def save_image name = random_file_name,type,content
      file_name = File.join @save_path,"#{prefix}#{name}.#{type}"
      File.open(file_name,"w+") do |io|
        io.binmode
        io.write content
      end
    end

    def valid_url? url
      URI url
    rescue StandardError => e
      @error_urls << [url,e]
      false 
    end

    def enq_urls link
      if !link_dup?(link) && valid_url?(link)
        @_urls[link] += 1
        @urls << link
        @url_count += 1
      end
    end

    def enq_images src
      if !image_dup?(src) && valid_url?(src)
        @_imgs[src] += 1
        @images << src
        @image_count += 1
      end
    end

    def link_dup? link
      @_urls.has_key? link
    end

    def image_dup? src
      @_imgs.has_key? src
    end

    def valid_link? link
      if url_reg?
        link.to_s =~ url_reg && !link_dup?(link)
      else
        !link_dup?(link)
      end
    end

    def parse_links page
      @status = "parse urls"
      links = page.links.map{|link| link.href.present? and URI.join @current_url,URI.escape(link.href) rescue nil}
      links.select!{|link| link.present? and valid_link?(link)} 
      links.each{|link| enq_urls link}
    end

    def parse_images page
      @status = "parse images"
      images = page.images.map{|img| img.src.present? && URI.join(@current_url,URI.escape(img.src))}
      images.select!{|img| img.to_s =~ image_reg} if image_reg?
      images.each{|img| enq_images img}
    end

    def continue?
      recursive? && (image_limit_count? ? @image_count < image_limit_count : true) && (url_limit_count? ? @url_count < url_limit_count : true)
    end

    def next_parse?
      (image_limit_count? ? @image_count < image_limit_count : true) && (url_limit_count? ? @url_count < url_limit_count : true) && !@urls.empty?
    end

  end
end