lib/rouge/lexer.rb in rouge-3.3.0 vs lib/rouge/lexer.rb in rouge-3.4.0

- old
+ new

@@ -21,10 +21,18 @@ # @see #lex def lex(stream, opts={}, &b) new(opts).lex(stream, &b) end + # In case #continue_lex is called statically, we simply + # begin a new lex from the beginning, since there is no state. + # + # @see #continue_lex + def continue_lex(*a, &b) + lex(*a, &b) + end + # Given a name in string, return the correct lexer class. # @param [String] name # @return [Class<Rouge::Lexer>,nil] def find(name) registry[name.to_s] @@ -116,11 +124,11 @@ @demo = File.read(demo_file, mode: 'rt:bom|utf-8') end # @return a list of all lexers. def all - registry.values.uniq + @all ||= registry.values.uniq end # Guess which lexer to use based on a hash of info. # # This accepts the same arguments as Lexer.guess, but will never throw @@ -186,20 +194,28 @@ def enable_debug! @debug_enabled = true end def disable_debug! - @debug_enabled = false + remove_instance_variable :@debug_enabled end def debug_enabled? - !!@debug_enabled + (defined? @debug_enabled) ? true : false end + # Determine if a lexer has a method named +:detect?+ defined in its + # singleton class. + def detectable? + @detectable ||= methods(false).include?(:detect?) + end + protected # @private def register(name, lexer) + # reset an existing list of lexers + @all = nil if @all registry[name.to_s] = lexer end public # Used to specify or get the canonical name of this lexer class. @@ -234,10 +250,17 @@ (@aliases ||= []).concat(args) end # Specify a list of filename globs associated with this lexer. # + # If a filename glob is associated with more than one lexer, this can + # cause a Guesser::Ambiguous error to be raised in various guessing + # methods. These errors can be avoided by disambiguation. Filename globs + # are disambiguated in one of two ways. Either the lexer will define a + # `self.detect?` method (intended for use with shebangs and doctypes) or a + # manual rule will be specified in Guessers::Disambiguation. + # # @example # class Ruby < Lexer # filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile' # end def filenames(*fnames) @@ -254,11 +277,13 @@ (@mimetypes ||= []).concat(mts) end # @private def assert_utf8!(str) - return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name + encoding = str.encoding.name + return if encoding == 'US-ASCII' || encoding == 'UTF-8' || encoding == 'ASCII-8BIT' + raise EncodingError.new( "Bad encoding: #{str.encoding.names.join(',')}. " + "Please convert your string to UTF-8." ) end @@ -388,20 +413,28 @@ def reset! end # Given a string, yield [token, chunk] pairs. If no block is given, # an enumerator is returned. - # - # @option opts :continue - # Continue the lex from the previous state (i.e. don't call #reset!) - def lex(string, opts={}, &b) - return enum_for(:lex, string, opts) unless block_given? + def lex(string, opts=nil, &b) + if opts + warn 'the :continue option to Formatter#lex is deprecated, use #continue_lex instead.' + return continue_lex(string, &b) + end + return enum_for(:lex, string) unless block_given? + Lexer.assert_utf8!(string) + reset! - reset! unless opts[:continue] + continue_lex(string, &b) + end + # Continue the lex from the the current state without resetting + def continue_lex(string, &b) + return enum_for(:continue_lex, string, &b) unless block_given? + # consolidate consecutive tokens of the same type last_token = nil last_val = nil stream_tokens(string) do |tok, val| next if val.empty? @@ -452,11 +485,9 @@ @_loaded_lexers = {} def self.load_lexer(relpath) return if @_loaded_lexers.key?(relpath) @_loaded_lexers[relpath] = true - - root = Pathname.new(__FILE__).dirname.join('lexers') - load root.join(relpath) + load File.join(__dir__, 'lexers', relpath) end end end