lib/prism/parse_result.rb in prism-0.22.0 vs lib/prism/parse_result.rb in prism-0.23.0
- old
+ new
@@ -7,22 +7,20 @@
class Source
# The source code that this source object represents.
attr_reader :source
# The line number where this source starts.
- attr_accessor :start_line
+ attr_reader :start_line
# The list of newline byte offsets in the source code.
attr_reader :offsets
- # Create a new source object with the given source code and newline byte
- # offsets. If no newline byte offsets are given, they will be computed from
- # the source code.
- def initialize(source, start_line = 1, offsets = compute_offsets(source))
+ # Create a new source object with the given source code.
+ def initialize(source, start_line = 1, offsets = [])
@source = source
- @start_line = start_line
- @offsets = offsets
+ @start_line = start_line # set after parsing is done
+ @offsets = offsets # set after parsing is done
end
# Perform a byteslice on the source code using the given byte offset and
# byte length.
def slice(byte_offset, length)
@@ -54,10 +52,27 @@
# Return the column number in characters for the given byte offset.
def character_column(byte_offset)
character_offset(byte_offset) - character_offset(line_start(byte_offset))
end
+ # Returns the offset from the start of the file for the given byte offset
+ # counting in code units for the given encoding.
+ #
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
+ # concept of code units that differs from the number of characters in other
+ # encodings, it is not captured here.
+ def code_units_offset(byte_offset, encoding)
+ byteslice = source.byteslice(0, byte_offset).encode(encoding)
+ (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
+ end
+
+ # Returns the column number in code units for the given encoding for the
+ # given byte offset.
+ def code_units_column(byte_offset, encoding)
+ code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
+ end
+
private
# Binary search through the offsets to find the line number for the given
# byte offset.
def find_line(byte_offset)
@@ -75,18 +90,10 @@
end
end
left - 1
end
-
- # Find all of the newlines in the source code and return their byte offsets
- # from the start of the string an array.
- def compute_offsets(code)
- offsets = [0]
- code.b.scan("\n") { offsets << $~.end(0) }
- offsets
- end
end
# This represents a location in the source.
class Location
# A Source object that is used to determine more information from the given
@@ -136,10 +143,15 @@
# starts.
def start_character_offset
source.character_offset(start_offset)
end
+ # The offset from the start of the file in code units of the given encoding.
+ def start_code_units_offset(encoding = Encoding::UTF_16LE)
+ source.code_units_offset(start_offset, encoding)
+ end
+
# The byte offset from the beginning of the source where this location ends.
def end_offset
start_offset + length
end
@@ -147,10 +159,15 @@
# ends.
def end_character_offset
source.character_offset(end_offset)
end
+ # The offset from the start of the file in code units of the given encoding.
+ def end_code_units_offset(encoding = Encoding::UTF_16LE)
+ source.code_units_offset(end_offset, encoding)
+ end
+
# The line number where this location starts.
def start_line
source.line(start_offset)
end
@@ -175,19 +192,31 @@
# the line.
def start_character_column
source.character_column(start_offset)
end
+ # The column number in code units of the given encoding where this location
+ # starts from the start of the line.
+ def start_code_units_column(encoding = Encoding::UTF_16LE)
+ source.code_units_column(start_offset, encoding)
+ end
+
# The column number in bytes where this location ends from the start of the
# line.
def end_column
source.column(end_offset)
end
# The column number in characters where this location ends from the start of
# the line.
def end_character_column
source.character_column(end_offset)
+ end
+
+ # The column number in code units of the given encoding where this location
+ # ends from the start of the line.
+ def end_code_units_column(encoding = Encoding::UTF_16LE)
+ source.code_units_column(end_offset, encoding)
end
# Implement the hash pattern matching interface for Location.
def deconstruct_keys(keys)
{ start_offset: start_offset, end_offset: end_offset }