module CodeRay # The Tokens class represents a list of tokens returned from # a Scanner. It's actually just an Array with a few helper methods. # # A token itself is not a special object, just two elements in an Array: # * the _token_ _text_ (the original source of the token in a String) or # a _token_ _action_ (begin_group, end_group, begin_line, end_line) # * the _token_ _kind_ (a Symbol representing the type of the token) # # It looks like this: # # ..., '# It looks like this', :comment, ... # ..., '3.1415926', :float, ... # ..., '$^', :error, ... # # Some scanners also yield sub-tokens, represented by special # token actions, for example :begin_group and :end_group. # # The Ruby scanner, for example, splits "a string" into: # # [ # :begin_group, :string, # '"', :delimiter, # 'a string', :content, # '"', :delimiter, # :end_group, :string # ] # # Tokens can be used to save the output of a Scanners in a simple # Ruby object that can be send to an Encoder later: # # tokens = CodeRay.scan('price = 2.59', :ruby).tokens # tokens.encode(:html) # tokens.html # CodeRay.encoder(:html).encode_tokens(tokens) # # Tokens gives you the power to handle pre-scanned code very easily: # You can serialize it to a JSON string and store it in a database, pass it # around to encode it more than once, send it to other algorithms... class Tokens < Array # Remove Array#filter that is a new alias for Array#select on Ruby 2.6, # for method_missing called with filter method. undef_method :filter if instance_methods.include?(:filter) # The Scanner instance that created the tokens. attr_accessor :scanner # Encode the tokens using encoder. # # encoder can be # * a plugin name like :html oder 'statistic' # * an Encoder object # # options are passed to the encoder. def encode encoder, options = {} encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym encoder.encode_tokens self, options end # Turn tokens into a string by concatenating them. def to_s encode CodeRay::Encoders::Encoder.new end # Redirects unknown methods to encoder calls. # # For example, if you call +tokens.html+, the HTML encoder # is used to highlight the tokens. def method_missing meth, options = {} encode meth, options rescue PluginHost::PluginNotFound super end # Split the tokens into parts of the given +sizes+. # # The result will be an Array of Tokens objects. The parts have # the text size specified by the parameter. In addition, each # part closes all opened tokens. This is useful to insert tokens # betweem them. # # This method is used by @Scanner#tokenize@ when called with an Array # of source strings. The Diff encoder uses it for inline highlighting. def split_into_parts *sizes return Array.new(sizes.size) { Tokens.new } if size == 2 && first == '' parts = [] opened = [] content = nil part = Tokens.new part_size = 0 size = sizes.first i = 0 for item in self case content when nil content = item when String if size && part_size + content.size > size # token must be cut if part_size < size # some part of the token goes into this part content = content.dup # content may no be safe to change part << content.slice!(0, size - part_size) << item end # close all open groups and lines... closing = opened.reverse.flatten.map do |content_or_kind| case content_or_kind when :begin_group :end_group when :begin_line :end_line else content_or_kind end end part.concat closing begin parts << part part = Tokens.new size = sizes[i += 1] end until size.nil? || size > 0 # ...and open them again. part.concat opened.flatten part_size = 0 redo unless content.empty? else part << content << item part_size += content.size end content = nil when Symbol case content when :begin_group, :begin_line opened << [content, item] when :end_group, :end_line opened.pop else raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item] end part << content << item content = nil else raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item] end end parts << part parts << Tokens.new while parts.size < sizes.size parts end # Return the actual number of tokens. def count size / 2 end alias text_token push def begin_group kind; push :begin_group, kind end def end_group kind; push :end_group, kind end def begin_line kind; push :begin_line, kind end def end_line kind; push :end_line, kind end alias tokens concat end end