module Puppet::Pops module Parser # Helper class that keeps track of where line breaks are located and can answer questions about positions. # class Locator # Creates, or recreates a Locator. A Locator is created if index is not given (a scan is then # performed of the given source string. # def self.locator(string, file, index = nil, char_offsets = false) if char_offsets LocatorForChars.new(string, file, index) else Locator19.new(string, file, index) end end # Returns the file name associated with the string content def file end # Returns the string content def string end def to_s "Locator for file #{file}" end # Returns the position on line (first position on a line is 1) def pos_on_line(offset) end # Returns the line number (first line is 1) for the given offset def line_for_offset(offset) end # Returns the offset on line (first offset on a line is 0). # def offset_on_line(offset) end # Returns the character offset for a given reported offset def char_offset(byte_offset) end # Returns the length measured in number of characters from the given start and end byte offset def char_length(offset, end_offset) end # Extracts the text from offset with given length (measured in what the locator uses for offset) # @returns String - the extracted text def extract_text(offset, length) end def extract_tree_text(ast) first = ast.offset last = first + ast.length ast._pcore_all_contents([]) do |m| next unless m.is_a?(Model::Positioned) m_offset = m.offset m_last = m_offset + m.length first = m_offset if m_offset < first last = m_last if m_last > last end extract_text(first, last - first) end # Returns the line index - an array of line offsets for the start position of each line, starting at 0 for # the first line. # def line_index() end # Common byte based impl that works for all rubies (stringscanner is byte based def self.compute_line_index(string) scanner = StringScanner.new(string) result = [0] # first line starts at 0 while scanner.scan_until(/\n/) result << scanner.pos end result.freeze end # Produces an URI with path?line=n&pos=n. If origin is unknown the URI is string:?line=n&pos=n def to_uri(ast) f = file if f.nil? || f.empty? f = 'string:' else f = Puppet::Util.path_to_uri(f).to_s end offset = ast.offset URI("#{f}?line=#{line_for_offset(offset)}&pos=#{pos_on_line(offset)}") end class AbstractLocator < Locator attr_accessor :line_index attr_reader :string attr_reader :file # Create a locator based on a content string, and a boolean indicating if ruby version support multi-byte strings # or not. # def initialize(string, file, line_index = nil) @string = string.freeze @file = file.freeze @prev_offset = nil @prev_line = nil @line_index = line_index.nil? ? Locator.compute_line_index(@string) : line_index end # Returns the position on line (first position on a line is 1) def pos_on_line(offset) offset_on_line(offset) +1 end def to_location_hash(reported_offset, end_offset) pos = pos_on_line(reported_offset) offset = char_offset(reported_offset) length = char_length(reported_offset, end_offset) start_line = line_for_offset(reported_offset) { :line => start_line, :pos => pos, :offset => offset, :length => length} end # Returns the index of the smallest item for which the item > the given value # This is a min binary search. Although written in Ruby it is only slightly slower than # the corresponding method in C in Ruby 2.0.0 - the main benefit to use this method over # the Ruby C version is that it returns the index (not the value) which means there is not need # to have an additional structure to get the index (or record the index in the structure). This # saves both memory and CPU. It also does not require passing a block that is called since this # method is specialized to search the line index. # def ary_bsearch_i(ary, value) low = 0 high = ary.length mid = nil smaller = false satisfied = false v = nil while low < high do mid = low + ((high - low) / 2) v = (ary[mid] > value) if v == true satisfied = true smaller = true elsif !v smaller = false else raise TypeError, "wrong argument, must be boolean or nil, got '#{v.class}'" end if smaller high = mid else low = mid + 1; end end return nil if low == ary.length return nil if !satisfied return low end def hash [string, file, line_index].hash end # Equal method needed by serializer to perform tabulation def eql?(o) self.class == o.class && string == o.string && file == o.file && line_index == o.line_index end # Returns the line number (first line is 1) for the given offset def line_for_offset(offset) if @prev_offset == offset # use cache return @prev_line end line_nbr = ary_bsearch_i(line_index, offset) if line_nbr # cache @prev_offset = offset @prev_line = line_nbr return line_nbr end # If not found it is after last # clear cache @prev_offset = @prev_line = nil return line_index.size end end # A Sublocator locates a concrete locator (subspace) in a virtual space. # The `leading_line_count` is the (virtual) number of lines preceding the first line in the concrete locator. # The `leading_offset` is the (virtual) byte offset of the first byte in the concrete locator. # The `leading_line_offset` is the (virtual) offset / margin in characters for each line. # # This illustrates characters in the sublocator (`.`) inside the subspace (`X`): # # 1:XXXXXXXX # 2:XXXX.... .. ... .. # 3:XXXX. . .... .. # 4:XXXX............ # # This sublocator would be configured with leading_line_count = 1, # leading_offset=8, and leading_line_offset=4 # # Note that leading_offset must be the same for all lines and measured in characters. # # A SubLocator is only used during parsing as the parser will translate the local offsets/lengths to # the parent locator when a sublocated expression is reduced. Do not call the methods # `char_offset` or `char_length` as those methods will raise an error. # class SubLocator < AbstractLocator attr_reader :locator attr_reader :leading_line_count attr_reader :leading_offset attr_reader :has_margin attr_reader :margin_per_line def initialize(locator, str, leading_line_count, leading_offset, has_margin, margin_per_line) super(str, locator.file) @locator = locator @leading_line_count = leading_line_count @leading_offset = leading_offset @has_margin = has_margin @margin_per_line = margin_per_line # Since lines can have different margin - accumulated margin per line must be computed # and since this accumulated margin adjustment is needed more than once; both for start offset, # and for end offset (to compute global length) it is computed up front here. # The accumulated_offset holds the sum of all removed margins before a position on line n (line index is 1-n, # and (unused) position 0 is always 0). # The last entry is duplicated since there will be the line "after last line" that would otherwise require # conditional logic. # @accumulated_margin = margin_per_line.reduce([0]) {|memo, val| memo << memo[-1] + val; memo } @accumulated_margin << @accumulated_margin[-1] end def file @locator.file end # Returns array with transposed (local) offset and (local) length. The transposed values # take the margin into account such that it is added to the content to the right # # Using X to denote margin and where end of line is explicitly shown as \n: # ``` # XXXXabc\n # XXXXdef\n # ``` # A local offset of 0 is translated to the start of the first heredoc line, and a length of 1 is adjusted to # 5 - i.e to cover "XXXXa". A local offset of 1, with length 1 would cover "b". # A local offset of 4 and length 1 would cover "XXXXd" # # It is possible that lines have different margin and that is taken into account. # def to_global(offset, length) # simple case, no margin return [offset + @leading_offset, length] unless @has_margin # compute local start and end line start_line = line_for_offset(offset) end_line = line_for_offset(offset+length) # complex case when there is a margin transposed_offset = offset == 0 ? @leading_offset : offset + @leading_offset + @accumulated_margin[start_line] transposed_length = length + @accumulated_margin[end_line] - @accumulated_margin[start_line] + # the margins between start and end (0 is line 1) (offset_on_line(offset) == 0 ? margin_per_line[start_line - 1] : 0) # include start's margin in position 0 [transposed_offset, transposed_length] end # Do not call this method def char_offset(offset) raise "Should not be called" end # Do not call this method def char_length(offset, end_offset) raise "Should not be called" end end class LocatorForChars < AbstractLocator def offset_on_line(offset) line_offset = line_index[ line_for_offset(offset)-1 ] offset - line_offset end def char_offset(char_offset) char_offset end def char_length(offset, end_offset) end_offset - offset end # Extracts the text from char offset with given byte length # @returns String - the extracted text def extract_text(offset, length) string.slice(offset, length) end end # This implementation is for Ruby19 and Ruby20. It uses byteslice to get strings from byte based offsets. # For Ruby20 this is faster than using the Stringscanner.charpos method (byteslice outperforms it, when # strings are frozen). # class Locator19 < AbstractLocator include Types::PuppetObject def self._pcore_type @type ||= Types::PObjectType.new('Puppet::AST::Locator', { 'attributes' => { 'string' => Types::PStringType::DEFAULT, 'file' => Types::PStringType::DEFAULT, 'line_index' => { Types::KEY_TYPE => Types::POptionalType.new(Types::PArrayType.new(Types::PIntegerType::DEFAULT)), Types::KEY_VALUE => nil } } }) end # Returns the offset on line (first offset on a line is 0). # Ruby 19 is multibyte but has no character position methods, must use byteslice def offset_on_line(offset) line_offset = line_index[ line_for_offset(offset)-1 ] @string.byteslice(line_offset, offset-line_offset).length end # Returns the character offset for a given byte offset # Ruby 19 is multibyte but has no character position methods, must use byteslice def char_offset(byte_offset) string.byteslice(0, byte_offset).length end # Returns the length measured in number of characters from the given start and end byte offset def char_length(offset, end_offset) string.byteslice(offset, end_offset - offset).length end # Extracts the text from byte offset with given byte length # @returns String - the extracted text def extract_text(offset, length) string.byteslice(offset, length) end end end end end