# coding: utf-8 require 'fig/tokenized_string' require 'fig/tokenized_string/plain_segment' module Fig; end class Fig::StringTokenizer # subexpression_matchers is an array of hashes. Each hash is expected to # contain two keys: :pattern and :action. # # The :pattern value needs to be a regular expression for the substring that # needs special handling. # # The :action value needs to be a block that takes two parameters. # # The first parameter is the text that was matched and the second is the # error block passed to #tokenize(). # # On success the block returns either a String containing replacement text or # a Fig::TokenizedString::Token representing the special handling of the # consumed text. If there was a problem, then the error block should have # been invoked and the block should return nil. # # # metacharacters is a regular expression character class for characters that # need to be escaped when un-single quoting a string. def initialize(subexpression_matchers = [], metacharacters = '') @subexpression_matchers = subexpression_matchers @metacharacters = metacharacters return end # Takes a block that is invoked when there is an error. Block receives a # single parameter of an error message that is the end of a statement # describing the problem, with no leading space character. For example, # given «'foo», the block will receive a message like 'has unbalanced single # quotes.'. # # Returns the TokenizedString; if there was a parse error, then the return # value will be nil (and the block will have been invoked). def tokenize(string, &error_block) @string = string.clone @error_block = error_block @single_quoted = nil @segments = [] strip_quotes_and_process_escapes return if @segments.empty? return Fig::TokenizedString.new(@segments, @single_quoted, @metacharacters) end private def strip_quotes_and_process_escapes() if @string.length == 0 @single_quoted = false @segments << Fig::TokenizedString::PlainSegment.new('') return end @single_quoted = strip_single_quotes_and_process_escapes return if @single_quoted.nil? if @single_quoted @segments << Fig::TokenizedString::PlainSegment.new(@string.clone) return end strip_double_quotes_and_process_escapes return end def strip_single_quotes_and_process_escapes() return false if @string[0..0] != %q<'> && @string[-1..-1] != %q<'> return false if @string =~ %r< # «\'» is legal \A ( [^\\']* (?: \\{2} )* \\ ' )* \z >x if ( @string.length == 1 || @string[0..0] != %q<'> || @string[-1..-1] != %q<'> || @string =~ %r< [^\\] (?: \\{2} )* (?: \\ | ' .* ) ' \z >x ) @error_block.call 'has unbalanced single quotes.' return end if @string =~ %r< [^\\] (?: \\{2} )*? \\ ([^\\']) >x @error_block.call( "contains a bad escape sequence (\\#{$1}) inside single quotes." ) return end @string.sub!( %r< \A ' (.*) ' \z >xm, '\1') return true end def strip_double_quotes_and_process_escapes() was_quoted = check_and_strip_double_quotes return if was_quoted.nil? if @string == %q<\\'> @segments << Fig::TokenizedString::PlainSegment.new(%q<'>) return end generate_segments was_quoted return end def check_and_strip_double_quotes() # We accept any unquoted single character at this point. Later validation # will catch bad characters. return false if @string =~ %r< \A \\ . \z >xm if @string[0..0] == %q<"> if @string.length == 1 || @string[-1..-1] != %q<"> @error_block.call 'has unbalanced double quotes.' return end if @string =~ %r< [^\\] (?: \\{2} )*? \\ " \z >xm @error_block.call \ 'has unbalanced double quotes; the trailing double quote is escaped.' return end @string.sub!( %r< \A " (.*) " \z >xm, '\1' ) return true elsif @string =~ %r< (?: \A | [^\\] ) (?: \\{2} )* " \z >xm @error_block.call \ %q<has unbalanced double quotes; it ends in a double quote when it didn't start with one.> return end return false end def generate_segments(was_quoted) plain_string = nil while ! @string.empty? if @string =~ %r< \A (\\+) ([^\\] .*)? \z >xm slashes, remainder = $1, $2 if slashes.length % 2 == 1 if remainder.nil? @error_block.call 'ends in an incomplete escape.' return end subexpression_matched = subexpression_match(remainder) return if subexpression_matched.nil? if ( subexpression_matched || remainder[0..0] == %q<"> || ! was_quoted && remainder[0..0] == %q<'> ) plain_string ||= '' plain_string << slashes plain_string << remainder[0..0] @string = remainder[1..-1] || '' else @error_block.call \ "contains a bad escape sequence (\\#{remainder[0..0]})." return end else plain_string ||= '' plain_string << slashes @string = remainder end else replacement, remainder = subexpression_match @string return if replacement.nil? if replacement if replacement.is_a? String plain_string << replacement else if ! plain_string.nil? @segments << Fig::TokenizedString::PlainSegment.new(plain_string) plain_string = nil end @segments << replacement end @string = remainder elsif @string =~ %r< \A " >xm @error_block.call 'contains an unescaped double quote.' return elsif ! was_quoted && @string =~ %r< \A ' >xm @error_block.call 'contains an unescaped single quote.' return else plain_string ||= '' plain_string << @string[0..0] @string = @string[1..-1] || '' end end end if plain_string @segments << Fig::TokenizedString::PlainSegment.new(plain_string) end return end def subexpression_match(sub_string) @subexpression_matchers.each do |matcher| pattern = matcher[:pattern] if sub_string =~ %r< \A ( #{pattern} ) >x subexpression, remainder = $1, $' replacement = matcher[:action].call subexpression, @error_block return if ! replacement return [replacement, remainder] end end return false end end