lib/RichTextScanner.rb in taskjuggler-0.0.3 vs lib/RichTextScanner.rb in taskjuggler-0.0.4

- old
+ new

@@ -1,11 +1,11 @@ #!/usr/bin/env ruby -w # encoding: UTF-8 # # = RichTextScanner.rb -- The TaskJuggler III Project Management Software # -# Copyright (c) 2006, 2007, 2008, 2009 by Chris Schlaeger <cs@kde.org> +# Copyright (c) 2006, 2007, 2008, 2009, 2010 by Chris Schlaeger <cs@kde.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # @@ -51,10 +51,11 @@ end # This is a wrapper for nextToken only used for debugging. #def nextToken # tok = nextTokenI + # raise "Token Error:" unless tok && tok[0] && tok[1] # puts "#{tok[0]}: #{tok[1]}" # tok #end # Return the next token from the input text. @@ -66,10 +67,12 @@ return tok end if @mode == :funcarg return nextTokenFuncArg + elsif @mode == :href + return nextTokenHRef end if @beginOfLine && @mode == :wiki if (res = nextTokenWikiBOL) return res end @@ -122,11 +125,11 @@ # Function arguments have the following formats: # <[blockfunc par1="value1" par2='value2']> # <-inlinefunc par1="value1" ... -> def nextTokenFuncArg token = [ '.', '<END>' ] - while c = nextChar + while (c = nextChar) case c when ' ', "\n", "\t" if (tok = readBlanks(c)) token = tok break @@ -151,12 +154,48 @@ return [ 'INLINEFUNCEND', '->' ] end returnChar end end + token end + def nextTokenHRef + token = [ '.', '<END>' ] + while (c = nextChar) + if c.nil? + # We've reached the end of the text. + return [ '.', '<END>' ] + elsif c == ' ' || c == "\t" || c == "\n" + # Sequences of tabs, spaces and newlines are treated as token + # boundaries, but otherwise they are ignored. + readSequence(" \n\t") + return [ 'SPACE', ' ' ] + elsif c == '<' && !@ignoreInlineMarkup + if nextChar == '-' && isIdStart(peek(1)) + token = readId('', 'QUERY') + unless nextChar == '-' && nextChar == '>' + error('unterminated_query', + "Inline query must be terminated with '->'") + end + return token + else + # It's not a query. + returnChar(2) + @ignoreInlineMarkup = true + next + end + elsif c == ']' + @mode = :wiki + return [ 'HREFEND', ']' ] + else + return nextTokenWord(c) + end + end + token + end + def nextTokenWikiBOL # Some characters have only a special meaning at the start of the line. # When the last token pushed the cursor into a new line, this flag is set # to true. @@ -232,11 +271,11 @@ # We've reached the end of the text. [ '.', '<END>' ] elsif c == ' ' || c == "\t" # Sequences of tabs or spaces are treated as token boundaries, but # otherwise they are ignored. - readSequence(' ', "\t") + readSequence(" \t") [ 'SPACE', ' ' ] elsif c == "'" && !@ignoreInlineMarkup # Sequence of 2 ' means italic, 3 ' means bold, 4 ' means monospaced # code, 5 ' means italic and bold. Anything else is just normal text. level = readSequenceMax("'", 5) @@ -268,14 +307,19 @@ @ignoreInlineMarkup = true nil end elsif c == '[' level = readSequenceMax('[', 2) - [ level == 1 ? 'HREF' : 'REF', '[' * level ] - elsif c == ']' - level = readSequenceMax(']', 2) - [ level == 1 ? 'HREFEND' : 'REFEND', ']' * level ] + if level == 1 + @mode = :href + [ 'HREF' , '[' ] + else + [ 'REF', '[[' ] + end + elsif c == ']' && peek == ']' + nextChar + [ 'REFEND', ']]' ] elsif c == "\n" nextTokenNewline elsif c == '<' && !@ignoreInlineMarkup nextTokenOpenAngle else @@ -289,11 +333,11 @@ # We've reached the end of the text. [ '.', '<END>' ] elsif c == ' ' || c == "\t" # Sequences of tabs or spaces are treated as token boundaries, but # otherwise they are ignored. - readSequence(' ', "\t") + readSequence(" \t") [ 'SPACE', ' ' ] elsif c == "\n" nextTokenNewline elsif c == '<' && !@ignoreInlineMarkup nextTokenOpenAngle @@ -308,18 +352,28 @@ def nextTokenNewline # Newlines are pretty important as they can terminate blocks and turn # the next character into the start of a control sequence. # Hard linebreaks consist of a newline followed by another newline or # any of the begin-of-line control characters. - if (c = nextChar) && "\n*#< =-".include?(c) + if (c = nextChar).nil? + # We hit the end of the text. + [ '.', '<END>' ] + elsif c == '<' && peekMatch('[') + # the '<' can be a start of a block (BLOCKFUNCSTART) or inline text + # (INLINEFUNCSTART). Only for the first case the linebreak is real. returnChar if c != "\n" # The next character may be a control character. @beginOfLine = true [ 'LINEBREAK', "\n" ] - elsif c.nil? - # We hit the end of the text. - [ '.', '<END>' ] + elsif "\n*# =-".include?(c) + # These characters correspond to the first characters of a block + # element. When they are found at the begin of the line, the newline + # was really a line break. + returnChar if c != "\n" + # The next character may be a control character. + @beginOfLine = true + [ 'LINEBREAK', "\n" ] else # Single line breaks are treated as spaces. Return the char after # the newline and start with this one again. returnChar [ 'SPACE', ' ' ] @@ -333,10 +387,15 @@ @mode = :nowiki elsif peekMatch('/nowiki>') # Turn most wiki markup interpretation on. @pos += '/nowiki>'.length @mode = :wiki + elsif peekMatch('-') && @mode == :wiki + nextChar + # Switch the parser to function argument parsing mode. + @mode = :funcarg + return [ 'INLINEFUNCSTART', '<-' ] else # We've not found a valid control sequence. Push back the character # and make sure we treat it as a normal character. @ignoreInlineMarkup = true returnChar @@ -351,15 +410,19 @@ @ignoreInlineMarkup = false str = '' str << c # Now we can collect characters of a word until we hit a whitespace. while (c = nextChar) && !" \n\t".include?(c) - if @mode == :wiki + case @mode + when :wiki # Or at least to ' characters in a row. break if c == "'" && peek == "'" - # Or a ] or < + # Or a -, ] or < break if ']<'.include?(c) + when :href + # Look for - of the end mark -> end ']' + break if c == '-' || c == ']' || c == '<' else # Make sure we find the </nowiki> tag even within a word. break if c == '<' end str << c @@ -370,11 +433,16 @@ end # Deliver the next character. Keep track of the cursor position. In case we # reach the end, nil is returned. def nextChar - return nil if @pos >= @textLength + if @pos >= @textLength + # Correct @pos so that returnChar works properly but mutliple reads of + # EOT are ignored. + @pos = @textLength + 1 + return nil + end c = @text[@pos] @pos += 1 if c == ?\n @lineNo += 1 # Save the position of the line start for later use during error @@ -428,13 +496,13 @@ end # Read a sequence of characters that are all contained in the _chars_ Array. # If a character is found that is not in _chars_ the method returns the so # far found sequence of chars as String. - def readSequence(*chars) + def readSequence(chars) sequence = '' - while chars.include?(c = nextChar) + while (c = nextChar) && chars.index(c) sequence << c end # Push back the character that did no longer match. returnChar sequence @@ -483,19 +551,23 @@ end c = nextChar end end - def readId(c) + def isIdStart(c) + (('a'..'z') === c || ('A'..'Z') === c || c == '_') + end + + def readId(c, tokenType = 'ID') token = "" token << c while (c = nextChar) && (('a'..'z') === c || ('A'..'Z') === c || ('0'..'9') === c || c == '_') token << c end returnChar - return [ 'ID', token ] + return [ tokenType, token ] end def readString(terminator) token = "" while (c = nextChar) && c != terminator