# encoding:ascii-8bit

module Parser
  module Source

    ##
    # @api public
    #
    class Buffer
      attr_reader :name, :first_line

      ENCODING_RE =
        /\#.*coding\s*[:=]\s*
          (
            # Special-case: there's a UTF8-MAC encoding.
            (utf8-mac)
          |
            # Chew the suffix; it's there for emacs compat.
            ([A-Za-z0-9_-]+?)(-unix|-dos|-mac)
          |
            ([A-Za-z0-9_-]+)
          )
        /x

      def self.recognize_encoding(string)
        return if string.empty?

        # extract the first two lines in an efficient way
        string =~ /\A(.*)\n?(.*\n)?/
        first_line, second_line = $1, $2

        if first_line =~ /\A\xef\xbb\xbf/ # BOM
          return Encoding::UTF_8
        elsif first_line[0, 2] == '#!'
          encoding_line = second_line
        else
          encoding_line = first_line
        end

        if (result = ENCODING_RE.match(encoding_line))
          Encoding.find(result[2] || result[3] || result[5])
        else
          nil
        end
      end

      # Lexer expects UTF-8 input. This method processes the input
      # in an arbitrary valid Ruby encoding and returns an UTF-8 encoded
      # string.
      #
      def self.reencode_string(string)
        original_encoding = string.encoding
        detected_encoding = recognize_encoding(string.force_encoding(Encoding::BINARY))

        if detected_encoding.nil?
          string.force_encoding(original_encoding)
        elsif detected_encoding == Encoding::BINARY
          string
        else
          string.
            force_encoding(detected_encoding).
            encode(Encoding::UTF_8)
        end
      end

      def initialize(name, first_line = 1)
        @name        = name
        @source      = nil
        @first_line  = first_line

        @lines       = nil
        @line_begins = nil
      end

      def read
        File.open(@name, 'rb') do |io|
          self.source = io.read
        end

        self
      end

      def source
        if @source.nil?
          raise RuntimeError, 'Cannot extract source from uninitialized Source::Buffer'
        end

        @source
      end

      def source=(source)
        if defined?(Encoding)
          source = source.dup if source.frozen?
          source = self.class.reencode_string(source)
        end

        self.raw_source = source
      end

      def raw_source=(source)
        if @source
          raise ArgumentError, 'Source::Buffer is immutable'
        end

        @source = source.gsub(/\r\n/, "\n").freeze
      end

      def decompose_position(position)
        line_no, line_begin = line_for(position)

        [ @first_line + line_no, position - line_begin ]
      end

      def source_line(lineno)
        unless @lines
          @lines = @source.lines.to_a
          @lines.each { |line| line.gsub!(/\n$/, '') }

          # Lexer has an "infinite stream of EOF symbols" after the
          # actual EOF, so in some cases (e.g. EOF token of ruby-parse -E)
          # tokens will refer to one line past EOF.
          @lines << ""
        end

        @lines[lineno - @first_line].dup
      end

      private

      def line_begins
        unless @line_begins
          @line_begins, index = [ [ 0, 0 ] ], 1

          @source.each_char do |char|
            if char == "\n"
              @line_begins.unshift [ @line_begins.length, index ]
            end

            index += 1
          end
        end

        @line_begins
      end

      def line_for(position)
        if line_begins.respond_to? :bsearch
          # Fast O(log n) variant for Ruby >=2.0.
          line_begins.bsearch do |line, line_begin|
            line_begin <= position
          end
        else
          # Slower O(n) variant for Ruby <2.0.
          line_begins.find do |line, line_begin|
            line_begin <= position
          end
        end
      end
    end

  end
end