# encoding: UTF-8

module Quesadilla
  # Extract entities from text
  class Extractor
    require 'quesadilla/core_ext/string'
    Dir[File.expand_path('../extractor/*.rb', __FILE__)].each { |f| require f }

    include Autolinks
    include Emoji
    include Hashtags
    include HTML
    include Markdown
    include Users

    # @return [Hash] default extractor options
    def self.default_options
      {
        markdown: true,
        markdown_code: true,
        markdown_links: true,
        markdown_triple_emphasis: true,
        markdown_double_emphasis: true,
        markdown_emphasis: true,
        markdown_strikethrough: true,
        hashtags: true,
        autolinks: true,
        emoji: true,
        users: false,
        user_validator: nil,
        html: true,
        html_renderer: Quesadilla::HTMLRenderer
      }
    end

    # @param options [Hash] an optional options hash. Defaults to `Quesadilla::Extractor.default_options`.
    # @option options [Boolean] Should extract Markdown. Defaults to `true`.
    # @option options markdown_code [Boolean] Should extract Markdown code. Defaults to `true`.
    # @option options markdown_links [Boolean] Should extract Markdown links. Defaults to `true`.
    # @option options markdown_triple_emphasis [Boolean] Should extract Markdown triple emphasis (bold italic). Defaults to `true`.
    # @option options markdown_double_emphasis [Boolean] Should extract Markdown double emphasis (bold). Defaults to `true`.
    # @option options markdown_emphasis [Boolean] Should extract Markdown emphasis (italic). Defaults to `true`.
    # @option options markdown_strikethrough [Boolean] Should extract Markdown strikethrough. Defaults to `true`.
    # @option options hashtags [Boolean] Should extract hashtags. Defaults to `true`.
    # @option options autolinks [Boolean] Should automatically detect links. Defaults to `true`.
    # @option options emoji [Boolean] Should extract named emoji. Defaults to `true`.
    # @option options users [Boolean] Should extract user mentions. Defaults to `false`.
    # @option options user_validator A callable object to validate a username. This should return the user ID of the user or nil if it is invalid. Invalid users will be left as plain text. If the validator is nil, all usernames will be extracted. Defaults to `nil`.
    # @option options html [Boolean] Should generate HTML. Defaults to `true`.
    # @option options html_renderer [Class] class to use as HTML renderer. Defaults to `Quesadilla::HTMLRenderer`.
    def initialize(options = {})
      @options = self.class.default_options.merge(options)
      @renderer = @options[:html_renderer].new if @options[:html]
    end

    # Extract entities from text
    # @param original_text the text to extract from
    # @return [Hash] hash containing the display text, html text, and entities
    def extract(original_text)
      @original_text = original_text.dup

      # Emoji colon-syntax
      replace_emoji if @options[:emoji]

      @working_text = @original_text.dup
      @entities = []

      # Get entities
      extract_markdown if @options[:markdown]
      extract_hashtags if @options[:hashtags]
      extract_autolinks if @options[:autolinks]
      extract_users if @options[:users]

      # Sort entities
      @entities.sort! do |a, b|
        a[:indices].first <=> b[:indices].first
      end

      # Adjust display for each entity
      display_text = sub_entities(@original_text, @entities)

      # Return
      hash = {
        display_text: display_text,
        entities: @entities
      }
      hash[:display_html] = display_html(display_text, @entities) if @options[:html]
      hash
    end

  private

    # Invisible character from the reserved range replaces markdown we've already parsed.
    REPLACE_TOKEN = "\uf042".freeze

    def display_url(url)
      url = url.gsub(/(?:https?:\/\/)?(?:www\.)?/i, '').q_truncate(32, omission: '…')
      url = url[0...(url.length - 1)] if url[-1, 1] == '/'
      url
    end

    def quality_url(url)
      return url if url.include?('://')
      'http://' + url
    end

    def sub_entities(input_text, entities, display = false, &block)
      # Adjust output text for each entity
      output_text = input_text
      offset = 0
      entities.each do |entity|
        entity_original_text = display ? entity[:display_text] : entity[:text]
        entity_display_text = if block_given?
          yield(entity)
        else
          entity[:display_text]
        end

        indices = display ? entity[:display_indices] : entity[:indices]

        # Use the entity's display text instead of original text if they're different
        unless entity_original_text == entity_display_text
          # Get the fragment before the entity
          bf_end = indices[0] - 1 - offset
          before_frag = bf_end <= 0 ? '' : output_text[0..bf_end]

          # Get the fragment after the entity
          af_start = indices[1] - offset
          af_end = output_text.length - 1
          after_frag = af_start > af_end ? '' : output_text[af_start..af_end]

          # Update the output text
          output_text = before_frag + entity_display_text + after_frag
        end

        # Update offset
        adjust = entity_original_text.length - entity_display_text.length
        unless display
          entity[:display_indices] = [entity[:indices][0] - offset, entity[:indices][1] - offset - adjust]
        end
        offset += adjust
      end
      output_text
    end
  end
end