Sha256: ff85eb112605d197ce895089e934793bf500aea36ee5a5983b9729a28e89b924

Contents?: true

Size: 1.47 KB

Versions: 14

Compression:

Stored size: 1.47 KB

Contents

class CharacterSet
  module Parser
    module_function

    def codepoints_from_enumerable(object)
      raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each)
      # Use #each to check first element (only this works for all Enumerables)
      object.each do |e|
        return object            if e.is_a?(Integer) && e >= 0 && e < 0x110000
        return object.map(&:ord) if e.is_a?(String)  && e.length == 1
        raise ArgumentError, "#{e.inspect} is not valid as a codepoint"
      end
    end

    def codepoints_from_bracket_expression(string)
      raise ArgumentError, 'pass a String'   unless string.is_a?(String)
      raise ArgumentError, 'advanced syntax' if string =~ /\\[^uUx]|[^\\]\[|&&/

      content = strip_brackets(string)
      literal_content = eval_escapes(content)

      prev_chr = nil
      in_range = false

      literal_content.each_char.map do |chr|
        if chr == '-' && prev_chr && prev_chr != '\\' && prev_chr != '-'
          in_range = true
          nil
        else
          result = in_range ? ((prev_chr.ord + 1)..(chr.ord)).to_a : chr.ord
          in_range = false
          prev_chr = chr
          result
        end
      end.compact.flatten
    end

    def strip_brackets(string)
      string[/\A\[\^?(.*)\]\z/, 1] || string.dup
    end

    def eval_escapes(string)
      string.gsub(/\\U(\h{8})|\\u(\h{4})|U\+(\h+)|\\x(\h{2})|\\u\{(\h+)\}/) do
        ($1 || $2 || $3 || $4 || $5).to_i(16).chr('utf-8')
      end
    end
  end
end

Version data entries

14 entries across 14 versions & 1 rubygems

Version Path
character_set-1.4.0-java lib/character_set/parser.rb
character_set-1.4.0 lib/character_set/parser.rb
character_set-1.3.0-java lib/character_set/parser.rb
character_set-1.3.0 lib/character_set/parser.rb
character_set-1.2.0-java lib/character_set/parser.rb
character_set-1.2.0 lib/character_set/parser.rb
character_set-1.1.2-java lib/character_set/parser.rb
character_set-1.1.2 lib/character_set/parser.rb
character_set-1.1.1-java lib/character_set/parser.rb
character_set-1.1.1 lib/character_set/parser.rb
character_set-1.1.0-java lib/character_set/parser.rb
character_set-1.1.0 lib/character_set/parser.rb
character_set-1.0.0-java lib/character_set/parser.rb
character_set-1.0.0 lib/character_set/parser.rb