require 'unf'

module Twitter
  module Validation extend self
    DEFAULT_TCO_URL_LENGTHS = {
      :short_url_length => 23,
    }

    # :weighted_length the weighted length of tweet based on weights specified in the config
    # :valid If tweet is valid
    # :permillage permillage of the tweet over the max length specified in config
    # :valid_range_start beginning of valid text
    # :valid_range_end End index of valid part of the tweet text (inclusive)
    # :display_range_start beginning index of display text
    # :display_range_end end index of display text (inclusive)
    class ParseResults < Hash

      RESULT_PARAMS = [:weighted_length, :valid, :permillage, :valid_range_start, :valid_range_end, :display_range_start, :display_range_end]

      def self.empty
        return ParseResults.new(weighted_length: 0, permillage: 0, valid: true, display_range_start: 0, display_range_end: 0, valid_range_start: 0, valid_range_end: 0)
      end

      def initialize(params = {})
        RESULT_PARAMS.each do |key|
          super[key] = params[key] if params.key?(key)
        end
      end
    end

    # Parse input text and return hash with descriptive parameters populated.
    def parse_tweet(text, options = {})
      options = DEFAULT_TCO_URL_LENGTHS.merge(options)
      config = options[:config] || Twitter::Configuration.default_configuration
      normalized_text = text.to_nfc
      normalized_text_length = normalized_text.char_length
      unless (normalized_text_length > 0)
        ParseResults.empty()
      end

      scale = config.scale
      max_weighted_tweet_length = config.max_weighted_tweet_length
      scaled_max_weighted_tweet_length = max_weighted_tweet_length * scale
      transformed_url_length = config.transformed_url_length * scale
      ranges = config.ranges

      url_entities = Twitter::Extractor.extract_urls_with_indices(normalized_text)

      has_invalid_chars = false
      weighted_count = 0
      offset = 0
      display_offset = 0
      valid_offset = 0

      while offset < normalized_text_length
        # Reset the default char weight each pass through the loop
        char_weight = config.default_weight
        url_entities.each do |url_entity|
          if url_entity[:indices].first == offset
            url_length = url_entity[:indices].last - url_entity[:indices].first
            weighted_count += transformed_url_length
            offset += url_length
            display_offset += url_length
            if weighted_count <= scaled_max_weighted_tweet_length
              valid_offset += url_length
            end
            # Finding a match breaks the loop; order of ranges matters.
            break
          end
        end

        if offset < normalized_text_length
          code_point = normalized_text[offset]

          ranges.each do |range|
            if range.contains?(code_point.unpack("U").first)
              char_weight = range.weight
              break
            end
          end

          weighted_count += char_weight

          has_invalid_chars = contains_invalid?(normalized_text[offset]) unless has_invalid_chars
          char_count = code_point.char_length
          offset += char_count
          display_offset += char_count

          if !has_invalid_chars && (weighted_count <= scaled_max_weighted_tweet_length)
            valid_offset += char_count
          end
        end
      end
      normalized_text_offset = text.char_length - normalized_text.char_length
      scaled_weighted_length = weighted_count / scale
      is_valid = !has_invalid_chars && (scaled_weighted_length <= max_weighted_tweet_length)
      permillage = scaled_weighted_length * 1000 / max_weighted_tweet_length

      return ParseResults.new(weighted_length: scaled_weighted_length, permillage: permillage, valid: is_valid, display_range_start: 0, display_range_end: (display_offset + normalized_text_offset - 1), valid_range_start: 0, valid_range_end: (valid_offset + normalized_text_offset - 1))
    end

    def contains_invalid?(text)
      return false if !text || text.empty?
      begin
        return true if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
      rescue ArgumentError
        # non-Unicode value.
        return true
      end
      return false
    end

    def valid_username?(username)
      return false if !username || username.empty?

      extracted = Twitter::Extractor.extract_mentioned_screen_names(username)
      # Should extract the username minus the @ sign, hence the [1..-1]
      extracted.size == 1 && extracted.first == username[1..-1]
    end

    VALID_LIST_RE = /\A#{Twitter::Regex[:valid_mention_or_list]}\z/o
    def valid_list?(username_list)
      match = username_list.match(VALID_LIST_RE)
      # Must have matched and had nothing before or after
      !!(match && match[1] == "" && match[4] && !match[4].empty?)
    end

    def valid_hashtag?(hashtag)
      return false if !hashtag || hashtag.empty?

      extracted = Twitter::Extractor.extract_hashtags(hashtag)
      # Should extract the hashtag minus the # sign, hence the [1..-1]
      extracted.size == 1 && extracted.first == hashtag[1..-1]
    end

    def valid_url?(url, unicode_domains=true, require_protocol=true)
      return false if !url || url.empty?

      url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
      return false unless (url_parts && url_parts.to_s == url)

      scheme, authority, path, query, fragment = url_parts.captures

      return false unless ((!require_protocol ||
                           (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
                           valid_match?(path, Twitter::Regex[:validate_url_path]) &&
                           valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
                           valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))

      return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) ||
             (!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority]))
    end

    # These methods are deprecated, will be removed in future.
    extend Deprecation

    MAX_LENGTH_LEGACY = 140

    # DEPRECATED: Please use parse_text instead.
    #
    # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
    # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
    # string no matter which actual form was transmitted. For example:
    #
    #     U+0065  Latin Small Letter E
    # +   U+0301  Combining Acute Accent
    # ----------
    # =   2 bytes, 2 characters, displayed as é (1 visual glyph)
    #     … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
    #
    # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
    #
    def tweet_length(text, options = {})
      options = DEFAULT_TCO_URL_LENGTHS.merge(options)

      length = text.to_nfc.unpack("U*").length

      Twitter::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
        length += start_position - end_position
        length += options[:short_url_length] if url.length > 0
      end

      length
    end
    deprecate :tweet_length, :parse_tweet

    # DEPRECATED: Please use parse_text instead.
    #
    # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
    # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
    # will allow quicker feedback.
    #
    # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
    #
    #   <tt>:too_long</tt>:: if the <tt>text</tt> is too long
    #   <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
    #   <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
    def tweet_invalid?(text)
      return :empty if !text || text.empty?
      begin
        return :too_long if tweet_length(text) > MAX_LENGTH_LEGACY
        return :invalid_characters if Twitter::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
      rescue ArgumentError
        # non-Unicode value.
        return :invalid_characters
      end

      return false
    end
    deprecate :tweet_invalid?, :parse_tweet

    def valid_tweet_text?(text)
      !tweet_invalid?(text)
    end
    deprecate :valid_tweet_text?, :parse_tweet

    private

    def valid_match?(string, regex, optional=false)
      return (string && string.match(regex) && $~.to_s == string) unless optional

      !(string && (!string.match(regex) || $~.to_s != string))
    end
  end
end