# encoding: ascii-8bit

# Copyright 2014 Ball Aerospace & Technologies Corp.
# All Rights Reserved.
#
# This program is free software; you can modify and/or redistribute it
# under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 3 with
# attribution addendums as found in the LICENSE.txt

require 'irb/ruby-lex'
require 'stringio'

# Clear the $VERBOSE global since we're overriding methods
old_verbose = $VERBOSE; $VERBOSE = nil
class RubyLex
  if self.method_defined?(:indent)
    attr_writer :indent
  else
    attr_accessor :indent
  end
  # @return [Integer] The expression line number. This can differ from the
  #   actual line number due to white space and Ruby control keywords.
  attr_accessor :exp_line_no

  # Resets the RubyLex in preparation of parsing a line
  def reinitialize
    @seek                      = 0
    @exp_line_no               = 1
    @line_no                   = 1
    @base_char_no              = 0
    @char_no                   = 0
    @rests.clear
    @readed.clear
    @here_readed.clear
    @indent                    = 0
    @indent_stack.clear
    @lex_state                 = EXPR_BEG
    @space_seen                = false
    @here_header               = false
    @continue                  = false
    @line                      = ''
    @skip_space                = false
    @readed_auto_clean_up      = false
    @exception_on_syntax_error = true
    @prompt                    = nil
  end

  # Monkey patch to fix performance issue caused by call to reverse
  def get_readed
    if idx = @readed.rindex("\n")
      @base_char_no = @readed.size - (idx + 1)
    else
      @base_char_no += @readed.size
    end

    readed = @readed.join("")
    @readed = []
    readed
  end

  # Monkey patch to fix performance issue caused by call to reverse
  def ungetc(c = nil)
    if @here_readed.empty?
      c2 = @readed.pop
    else
      c2 = @here_readed.pop
    end
    c = c2 unless c
    @rests.unshift c #c =
    @seek -= 1
    if c == "\n"
      @line_no -= 1
      if idx = @readed.rindex("\n")
        @char_no = idx + 1
      else
        @char_no = @base_char_no + @readed.size
      end
    else
      @char_no -= 1
    end
  end
end
$VERBOSE = old_verbose

class RubyLexUtils
  # Regular expression to detect blank lines
  BLANK_LINE_REGEX  = /^\s*$/
  # Regular expression to detect lines containing only 'else'
  LONELY_ELSE_REGEX = /^\s*else\s*$/

  # Ruby keywords
  KEYWORD_TOKENS = [RubyToken::TkCLASS,
                    RubyToken::TkMODULE,
                    RubyToken::TkDEF,
                    RubyToken::TkUNDEF,
                    RubyToken::TkBEGIN,
                    RubyToken::TkRESCUE,
                    RubyToken::TkENSURE,
                    RubyToken::TkEND,
                    RubyToken::TkIF,
                    RubyToken::TkUNLESS,
                    RubyToken::TkTHEN,
                    RubyToken::TkELSIF,
                    RubyToken::TkELSE,
                    RubyToken::TkCASE,
                    RubyToken::TkWHEN,
                    RubyToken::TkWHILE,
                    RubyToken::TkUNTIL,
                    RubyToken::TkFOR,
                    RubyToken::TkBREAK,
                    RubyToken::TkNEXT,
                    RubyToken::TkREDO,
                    RubyToken::TkRETRY,
                    RubyToken::TkIN,
                    RubyToken::TkDO,
                    RubyToken::TkRETURN,
                    RubyToken::TkIF_MOD,
                    RubyToken::TkUNLESS_MOD,
                    RubyToken::TkWHILE_MOD,
                    RubyToken::TkUNTIL_MOD,
                    RubyToken::TkALIAS,
                    RubyToken::TklBEGIN,
                    RubyToken::TklEND,
                    RubyToken::TkfLBRACE]

  # Ruby keywords which define the beginning of a block: do, {, begin
  BLOCK_BEGINNING_TOKENS = [RubyToken::TkDO,
                            RubyToken::TkfLBRACE,
                            RubyToken::TkBEGIN]

  # Create a new RubyLex and StringIO to hold the text to operate on
  def initialize
    @lex    = RubyLex.new
    @lex_io = StringIO.new('')
  end

  # @param text [String]
  # @return [Boolean] Whether the text contains the 'begin' keyword
  def contains_begin?(text)
    @lex.reinitialize
    @lex.exception_on_syntax_error = false
    @lex_io.string = text
    @lex.set_input(@lex_io)
    while token = @lex.token
      if token.class == RubyToken::TkBEGIN
        return true
      end
    end
    return false
  end

  # @param text [String]
  # @return [Boolean] Whether the text contains a Ruby keyword
  def contains_keyword?(text)
    @lex.reinitialize
    @lex.exception_on_syntax_error = false
    @lex_io.string = text
    @lex.set_input(@lex_io)
    while token = @lex.token
      if KEYWORD_TOKENS.include?(token.class)
        return true
      end
    end
    return false
  end

  # @param text [String]
  # @return [Boolean] Whether the text contains a keyword which starts a block.
  #   i.e. 'do', '{', or 'begin'
  def contains_block_beginning?(text)
    @lex.reinitialize
    @lex.exception_on_syntax_error = false
    @lex_io.string = text
    @lex.set_input(@lex_io)
    while token = @lex.token
      if BLOCK_BEGINNING_TOKENS.include?(token.class)
        return true
      end
    end
    return false
  end

  # @param text [String]
  # @param progress_dialog [Cosmos::ProgressDialog] If this is set, the overall
  #   progress will be set as the processing progresses
  # @return [String] The text with all comments removed
  def remove_comments(text, progress_dialog = nil)
    comments_removed = text.clone
    @lex.reinitialize
    @lex.exception_on_syntax_error = false
    @lex_io.string = text
    @lex.set_input(@lex_io)
    need_remove = nil
    delete_ranges = []
    token_count = 0
    progress = 0.0
    while token = @lex.token
      token_count += 1
      if need_remove
        delete_ranges << (need_remove..(token.seek - 1))
        need_remove = nil
      end
      if token.class == RubyToken::TkCOMMENT
        need_remove = token.seek
      end
      if progress_dialog and token_count % 10000 == 0
        progress += 0.01
        progress = 0.0 if progress >= 0.99
        progress_dialog.set_overall_progress(progress)
      end
    end

    if need_remove
      delete_ranges << (need_remove..(text.length - 1))
      need_remove = nil
    end

    delete_count = 0
    delete_ranges.reverse_each do |range|
      delete_count += 1
      comments_removed[range] = ''
      if progress_dialog and delete_count % 10000 == 0
        progress += 0.01
        progress = 0.0 if progress >= 0.99
        progress_dialog.set_overall_progress(progress)
      end
    end

    return comments_removed
  end

  # Yields each lexed segment and if the segment is instrumentable
  #
  # @param text [String]
  # @yieldparam line [String] The entire line
  # @yieldparam instrumentable [Boolean] Whether the line is instrumentable
  # @yieldparam inside_begin [Integer] The level of indentation
  # @yieldparam line_no [Integer] The current line number
  def each_lexed_segment(text)
    lex = RubyLex.new
    lex.exception_on_syntax_error = false
    lex_io = StringIO.new(text)
    lex.set_input(lex_io)

    while lexed = lex.lex
      line_no = lex.exp_line_no

      if contains_begin?(lexed)
        inside_begin = lex.indent - 1
      end

      if lex.indent == inside_begin
        inside_begin = nil
      end

      loop do # loop to allow restarting for nested conditions

        # Yield blank lines and lonely else lines before the actual line
        while (index = lexed.index("\n"))
          line = lexed[0..index]
          if line =~ BLANK_LINE_REGEX
            yield line, true, inside_begin, line_no
            line_no += 1
            lexed = lexed[(index + 1)..-1]
          elsif line =~ LONELY_ELSE_REGEX
            yield line, false, inside_begin, line_no
            line_no += 1
            lexed = lexed[(index + 1)..-1]
          else
            break
          end
        end

        if contains_keyword?(lexed)
          if contains_block_beginning?(lexed)
            section = ''
            lexed.each_line do |lexed_part|
              section << lexed_part
              if contains_block_beginning?(section)
                yield section, false, inside_begin, line_no
                break
              end
              line_no += 1
            end
            line_no += 1
            remainder = lexed[(section.length)..-1]
            lexed = remainder
            next unless remainder.empty?
          else
            yield lexed, false, inside_begin, line_no
          end
        else
          num_left_brackets  = lexed.count('{')
          num_right_brackets = lexed.count('}')
          if num_left_brackets != num_right_brackets
            # Don't instrument lines with unequal numbers of { and } brackets
            yield lexed, false, inside_begin, line_no
          else
            yield lexed, true, inside_begin, line_no
          end
        end

        lex.exp_line_no = lex.line_no

        break
      end # loop do

    end # while lexed

  end # def each_lexed_segment

end