lib/RichTextScanner.rb in taskjuggler-0.0.3 vs lib/RichTextScanner.rb in taskjuggler-0.0.4
- old
+ new
@@ -1,11 +1,11 @@
#!/usr/bin/env ruby -w
# encoding: UTF-8
#
# = RichTextScanner.rb -- The TaskJuggler III Project Management Software
#
-# Copyright (c) 2006, 2007, 2008, 2009 by Chris Schlaeger <cs@kde.org>
+# Copyright (c) 2006, 2007, 2008, 2009, 2010 by Chris Schlaeger <cs@kde.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
@@ -51,10 +51,11 @@
end
# This is a wrapper for nextToken only used for debugging.
#def nextToken
# tok = nextTokenI
+ # raise "Token Error:" unless tok && tok[0] && tok[1]
# puts "#{tok[0]}: #{tok[1]}"
# tok
#end
# Return the next token from the input text.
@@ -66,10 +67,12 @@
return tok
end
if @mode == :funcarg
return nextTokenFuncArg
+ elsif @mode == :href
+ return nextTokenHRef
end
if @beginOfLine && @mode == :wiki
if (res = nextTokenWikiBOL)
return res
end
@@ -122,11 +125,11 @@
# Function arguments have the following formats:
# <[blockfunc par1="value1" par2='value2']>
# <-inlinefunc par1="value1" ... ->
def nextTokenFuncArg
token = [ '.', '<END>' ]
- while c = nextChar
+ while (c = nextChar)
case c
when ' ', "\n", "\t"
if (tok = readBlanks(c))
token = tok
break
@@ -151,12 +154,48 @@
return [ 'INLINEFUNCEND', '->' ]
end
returnChar
end
end
+ token
end
+ def nextTokenHRef
+ token = [ '.', '<END>' ]
+ while (c = nextChar)
+ if c.nil?
+ # We've reached the end of the text.
+ return [ '.', '<END>' ]
+ elsif c == ' ' || c == "\t" || c == "\n"
+ # Sequences of tabs, spaces and newlines are treated as token
+ # boundaries, but otherwise they are ignored.
+ readSequence(" \n\t")
+ return [ 'SPACE', ' ' ]
+ elsif c == '<' && !@ignoreInlineMarkup
+ if nextChar == '-' && isIdStart(peek(1))
+ token = readId('', 'QUERY')
+ unless nextChar == '-' && nextChar == '>'
+ error('unterminated_query',
+ "Inline query must be terminated with '->'")
+ end
+ return token
+ else
+ # It's not a query.
+ returnChar(2)
+ @ignoreInlineMarkup = true
+ next
+ end
+ elsif c == ']'
+ @mode = :wiki
+ return [ 'HREFEND', ']' ]
+ else
+ return nextTokenWord(c)
+ end
+ end
+ token
+ end
+
def nextTokenWikiBOL
# Some characters have only a special meaning at the start of the line.
# When the last token pushed the cursor into a new line, this flag is set
# to true.
@@ -232,11 +271,11 @@
# We've reached the end of the text.
[ '.', '<END>' ]
elsif c == ' ' || c == "\t"
# Sequences of tabs or spaces are treated as token boundaries, but
# otherwise they are ignored.
- readSequence(' ', "\t")
+ readSequence(" \t")
[ 'SPACE', ' ' ]
elsif c == "'" && !@ignoreInlineMarkup
# Sequence of 2 ' means italic, 3 ' means bold, 4 ' means monospaced
# code, 5 ' means italic and bold. Anything else is just normal text.
level = readSequenceMax("'", 5)
@@ -268,14 +307,19 @@
@ignoreInlineMarkup = true
nil
end
elsif c == '['
level = readSequenceMax('[', 2)
- [ level == 1 ? 'HREF' : 'REF', '[' * level ]
- elsif c == ']'
- level = readSequenceMax(']', 2)
- [ level == 1 ? 'HREFEND' : 'REFEND', ']' * level ]
+ if level == 1
+ @mode = :href
+ [ 'HREF' , '[' ]
+ else
+ [ 'REF', '[[' ]
+ end
+ elsif c == ']' && peek == ']'
+ nextChar
+ [ 'REFEND', ']]' ]
elsif c == "\n"
nextTokenNewline
elsif c == '<' && !@ignoreInlineMarkup
nextTokenOpenAngle
else
@@ -289,11 +333,11 @@
# We've reached the end of the text.
[ '.', '<END>' ]
elsif c == ' ' || c == "\t"
# Sequences of tabs or spaces are treated as token boundaries, but
# otherwise they are ignored.
- readSequence(' ', "\t")
+ readSequence(" \t")
[ 'SPACE', ' ' ]
elsif c == "\n"
nextTokenNewline
elsif c == '<' && !@ignoreInlineMarkup
nextTokenOpenAngle
@@ -308,18 +352,28 @@
def nextTokenNewline
# Newlines are pretty important as they can terminate blocks and turn
# the next character into the start of a control sequence.
# Hard linebreaks consist of a newline followed by another newline or
# any of the begin-of-line control characters.
- if (c = nextChar) && "\n*#< =-".include?(c)
+ if (c = nextChar).nil?
+ # We hit the end of the text.
+ [ '.', '<END>' ]
+ elsif c == '<' && peekMatch('[')
+ # the '<' can be a start of a block (BLOCKFUNCSTART) or inline text
+ # (INLINEFUNCSTART). Only for the first case the linebreak is real.
returnChar if c != "\n"
# The next character may be a control character.
@beginOfLine = true
[ 'LINEBREAK', "\n" ]
- elsif c.nil?
- # We hit the end of the text.
- [ '.', '<END>' ]
+ elsif "\n*# =-".include?(c)
+ # These characters correspond to the first characters of a block
+ # element. When they are found at the begin of the line, the newline
+ # was really a line break.
+ returnChar if c != "\n"
+ # The next character may be a control character.
+ @beginOfLine = true
+ [ 'LINEBREAK', "\n" ]
else
# Single line breaks are treated as spaces. Return the char after
# the newline and start with this one again.
returnChar
[ 'SPACE', ' ' ]
@@ -333,10 +387,15 @@
@mode = :nowiki
elsif peekMatch('/nowiki>')
# Turn most wiki markup interpretation on.
@pos += '/nowiki>'.length
@mode = :wiki
+ elsif peekMatch('-') && @mode == :wiki
+ nextChar
+ # Switch the parser to function argument parsing mode.
+ @mode = :funcarg
+ return [ 'INLINEFUNCSTART', '<-' ]
else
# We've not found a valid control sequence. Push back the character
# and make sure we treat it as a normal character.
@ignoreInlineMarkup = true
returnChar
@@ -351,15 +410,19 @@
@ignoreInlineMarkup = false
str = ''
str << c
# Now we can collect characters of a word until we hit a whitespace.
while (c = nextChar) && !" \n\t".include?(c)
- if @mode == :wiki
+ case @mode
+ when :wiki
# Or at least to ' characters in a row.
break if c == "'" && peek == "'"
- # Or a ] or <
+ # Or a -, ] or <
break if ']<'.include?(c)
+ when :href
+ # Look for - of the end mark -> end ']'
+ break if c == '-' || c == ']' || c == '<'
else
# Make sure we find the </nowiki> tag even within a word.
break if c == '<'
end
str << c
@@ -370,11 +433,16 @@
end
# Deliver the next character. Keep track of the cursor position. In case we
# reach the end, nil is returned.
def nextChar
- return nil if @pos >= @textLength
+ if @pos >= @textLength
+ # Correct @pos so that returnChar works properly but mutliple reads of
+ # EOT are ignored.
+ @pos = @textLength + 1
+ return nil
+ end
c = @text[@pos]
@pos += 1
if c == ?\n
@lineNo += 1
# Save the position of the line start for later use during error
@@ -428,13 +496,13 @@
end
# Read a sequence of characters that are all contained in the _chars_ Array.
# If a character is found that is not in _chars_ the method returns the so
# far found sequence of chars as String.
- def readSequence(*chars)
+ def readSequence(chars)
sequence = ''
- while chars.include?(c = nextChar)
+ while (c = nextChar) && chars.index(c)
sequence << c
end
# Push back the character that did no longer match.
returnChar
sequence
@@ -483,19 +551,23 @@
end
c = nextChar
end
end
- def readId(c)
+ def isIdStart(c)
+ (('a'..'z') === c || ('A'..'Z') === c || c == '_')
+ end
+
+ def readId(c, tokenType = 'ID')
token = ""
token << c
while (c = nextChar) &&
(('a'..'z') === c || ('A'..'Z') === c || ('0'..'9') === c ||
c == '_')
token << c
end
returnChar
- return [ 'ID', token ]
+ return [ tokenType, token ]
end
def readString(terminator)
token = ""
while (c = nextChar) && c != terminator