lib/prism/parse_result.rb in prism-0.22.0 vs lib/prism/parse_result.rb in prism-0.23.0

- old
+ new

@@ -7,22 +7,20 @@ class Source # The source code that this source object represents. attr_reader :source # The line number where this source starts. - attr_accessor :start_line + attr_reader :start_line # The list of newline byte offsets in the source code. attr_reader :offsets - # Create a new source object with the given source code and newline byte - # offsets. If no newline byte offsets are given, they will be computed from - # the source code. - def initialize(source, start_line = 1, offsets = compute_offsets(source)) + # Create a new source object with the given source code. + def initialize(source, start_line = 1, offsets = []) @source = source - @start_line = start_line - @offsets = offsets + @start_line = start_line # set after parsing is done + @offsets = offsets # set after parsing is done end # Perform a byteslice on the source code using the given byte offset and # byte length. def slice(byte_offset, length) @@ -54,10 +52,27 @@ # Return the column number in characters for the given byte offset. def character_column(byte_offset) character_offset(byte_offset) - character_offset(line_start(byte_offset)) end + # Returns the offset from the start of the file for the given byte offset + # counting in code units for the given encoding. + # + # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the + # concept of code units that differs from the number of characters in other + # encodings, it is not captured here. + def code_units_offset(byte_offset, encoding) + byteslice = source.byteslice(0, byte_offset).encode(encoding) + (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length + end + + # Returns the column number in code units for the given encoding for the + # given byte offset. + def code_units_column(byte_offset, encoding) + code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) + end + private # Binary search through the offsets to find the line number for the given # byte offset. def find_line(byte_offset) @@ -75,18 +90,10 @@ end end left - 1 end - - # Find all of the newlines in the source code and return their byte offsets - # from the start of the string an array. - def compute_offsets(code) - offsets = [0] - code.b.scan("\n") { offsets << $~.end(0) } - offsets - end end # This represents a location in the source. class Location # A Source object that is used to determine more information from the given @@ -136,10 +143,15 @@ # starts. def start_character_offset source.character_offset(start_offset) end + # The offset from the start of the file in code units of the given encoding. + def start_code_units_offset(encoding = Encoding::UTF_16LE) + source.code_units_offset(start_offset, encoding) + end + # The byte offset from the beginning of the source where this location ends. def end_offset start_offset + length end @@ -147,10 +159,15 @@ # ends. def end_character_offset source.character_offset(end_offset) end + # The offset from the start of the file in code units of the given encoding. + def end_code_units_offset(encoding = Encoding::UTF_16LE) + source.code_units_offset(end_offset, encoding) + end + # The line number where this location starts. def start_line source.line(start_offset) end @@ -175,19 +192,31 @@ # the line. def start_character_column source.character_column(start_offset) end + # The column number in code units of the given encoding where this location + # starts from the start of the line. + def start_code_units_column(encoding = Encoding::UTF_16LE) + source.code_units_column(start_offset, encoding) + end + # The column number in bytes where this location ends from the start of the # line. def end_column source.column(end_offset) end # The column number in characters where this location ends from the start of # the line. def end_character_column source.character_column(end_offset) + end + + # The column number in code units of the given encoding where this location + # ends from the start of the line. + def end_code_units_column(encoding = Encoding::UTF_16LE) + source.code_units_column(end_offset, encoding) end # Implement the hash pattern matching interface for Location. def deconstruct_keys(keys) { start_offset: start_offset, end_offset: end_offset }