# Copyright (C) 2013  Kouhei Sutou <kou@clear-code.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

require "pathname"
require "uri"

module ChupaText
  class Extractor
    include Loggable

    def initialize
      @decomposers = []
    end

    # Sets the extractor up by the configuration. It adds decomposers
    # enabled in the configuration.
    #
    # @param [Configuration] configuration The configuration to be
    #   applied.
    #
    # @return [void]
    def apply_configuration(configuration)
      decomposers = Decomposers.create(Decomposer.registry,
                                       configuration.decomposer)
      decomposers.each do |decomposer|
        add_decomposer(decomposer)
      end
    end

    def add_decomposer(decomposer)
      @decomposers << decomposer
    end

    # Extracts texts from input. Each extracted text is passes to the
    # given block.
    #
    # @param [Data, String] input The input to be extracted texts.
    #   If `input` is `String`, it is treated as the local file path or URI
    #   of input data.
    #
    # @yield [text_data] Gives extracted text data to the block.
    #   The block may be called zero or more times.
    # @yieldparam [Data] text_data The extracted text data.
    #   You can get text data by `text_data.body`.
    #
    # @return [void]
    def extract(input)
      targets = [ensure_data(input)]
      until targets.empty?
        target = targets.pop
        debug do
          "#{log_tag}[extract][target] <#{target.path}>:<#{target.mime_type}>"
        end
        if target.text_plain?
          yield(target)
          next
        end
        decomposer = find_decomposer(target)
        if decomposer.nil?
          debug {"#{log_tag}[extract][decomposer] not found"}
          yield(target) if target.text?
          next
        end
        debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
        decomposer.decompose(target) do |decomposed|
          debug do
            "#{log_tag}[extract][decomposed] " +
              "#{decomposer.class}: " +
              "<#{target.path}>:<#{target.mime_type}> -> " +
              "<#{decomposed.mime_type}>"
          end
          targets.push(decomposed)
        end
      end
    end

    private
    def ensure_data(input)
      if input.is_a?(Data)
        input
      else
        InputData.new(input)
      end
    end

    def find_decomposer(data)
      @decomposers.find do |decomposer|
        decomposer.target?(data)
      end
    end

    def log_tag
      "[extractor]"
    end
  end
end