# encoding: utf-8
# text/formatted/parser.rb : Implements a bi-directional parser between a subset
# of html and formatted text arrays
#
# Copyright February 2010, Daniel Nelson. All Rights Reserved.
#
# This is free software. Please see the LICENSE and COPYING files for details.
#
module Prawn
module Text
module Formatted
class Parser
PARSER_REGEX = begin
regex_string = "\n|" +
"||" +
"||" +
"||" +
"||" +
"||" +
"||" +
"]*>||" +
"]*>||" +
"]*>||" +
"||" +
"||" +
"]*>||" +
"[^<\n]+"
regex = Regexp.new(regex_string, Regexp::MULTILINE)
end
def self.to_array(string)
tokens = string.gsub(/
/, "\n").scan(PARSER_REGEX)
self.array_from_tokens(tokens)
end
def self.to_string(array)
prefixes = { :bold => "",
:italic => "",
:underline => "",
:strikethrough => "",
:subscript => "",
:superscript => "" }
suffixes = { :bold => "",
:italic => "",
:underline => "",
:strikethrough => "",
:subscript => "",
:superscript => "" }
array.collect do |hash|
prefix = ""
suffix = ""
if hash[:styles]
hash[:styles].each do |style|
prefix = prefix + prefixes[style]
suffix = suffixes[style] + suffix
end
end
font = hash[:font] ? " name='#{hash[:font]}'" : nil
size = hash[:size] ? " size='#{hash[:size]}'" : nil
if hash[:character_spacing]
character_spacing = " character_spacing='#{hash[:character_spacing]}'"
else
character_spacing = nil
end
if font || size || character_spacing
prefix = prefix + ""
suffix = ""
end
link = hash[:link] ? " href='#{hash[:link]}'" : nil
anchor = hash[:anchor] ? " anchor='#{hash[:anchor]}'" : nil
if link || anchor
prefix = prefix + ""
suffix = ""
end
if hash[:color]
if hash[:color].kind_of?(Array)
prefix = prefix + ""
else
prefix = prefix + ""
end
suffix = ""
end
string = hash[:text].gsub("&", "&").gsub(">", ">").gsub("<", "<")
prefix + string + suffix
end.join
end
def self.array_paragraphs(array) #:nodoc:
paragraphs = []
paragraph = []
previous_string = "\n"
scan_pattern = /[^\n]+|\n/
array.each do |hash|
hash[:text].scan(scan_pattern).each do |string|
if string == "\n"
paragraph << hash.dup.merge(:text => "\n") if previous_string == "\n"
paragraphs << paragraph unless paragraph.empty?
paragraph = []
else
paragraph << hash.dup.merge(:text => string)
end
previous_string = string
end
end
paragraphs << paragraph unless paragraph.empty?
paragraphs
end
private
def self.array_from_tokens(tokens)
array = []
styles = []
colors = []
link = nil
anchor = nil
fonts = []
sizes = []
character_spacings = []
while token = tokens.shift
case token
when "", ""
styles << :bold
when "", ""
styles << :italic
when ""
styles << :underline
when ""
styles << :strikethrough
when ""
styles << :subscript
when ""
styles << :superscript
when "", ""
styles.delete(:bold)
when "", ""
styles.delete(:italic)
when ""
styles.delete(:underline)
when ""
styles.delete(:strikethrough)
when ""
styles.delete(:subscript)
when ""
styles.delete(:superscript)
when "", ""
link = nil
anchor = nil
when ""
colors.pop
when ""
fonts.pop
sizes.pop
character_spacings.pop
else
if token =~ /^]*>$/ or token =~ /^]*>$/
matches = /href="([^"]*)"/.match(token) || /href='([^']*)'/.match(token)
link = matches[1] unless matches.nil?
matches = /anchor="([^"]*)"/.match(token) || /anchor='([^']*)'/.match(token)
anchor = matches[1] unless matches.nil?
elsif token =~ /^]*>$/
matches = /rgb="#?([^"]*)"/.match(token) || /rgb='#?([^']*)'/.match(token)
colors << matches[1] if matches
matches = /c="#?([^"]*)" +m="#?([^"]*)" +y="#?([^"]*)" +k="#?([^"]*)"/.match(token) ||
/c='#?([^']*)' +m='#?([^']*)' +y='#?([^']*)' +k='#?([^']*)'/.match(token)
colors << [matches[1].to_i, matches[2].to_i, matches[3].to_i, matches[4].to_i] if matches
# intend to support rgb="#ffffff" or rgb='#ffffff',
# r="255" g="255" b="255" or r='255' g='255' b='255',
# and c="100" m="100" y="100" k="100" or
# c='100' m='100' y='100' k='100'
# color = { :rgb => "#ffffff" }
# color = { :r => 255, :g => 255, :b => 255 }
# color = { :c => 100, :m => 100, :y => 100, :k => 100 }
elsif token =~ /^]*>$/
matches = /name="([^"]*)"/.match(token) || /name='([^']*)'/.match(token)
fonts << matches[1] unless matches.nil?
matches = /size="([^"]*)"/.match(token) || /size='([^']*)'/.match(token)
sizes << matches[1].to_f unless matches.nil?
matches = /character_spacing="([^"]*)"/.match(token) || /character_spacing='([^']*)'/.match(token)
character_spacings << matches[1].to_f unless matches.nil?
else
string = token.gsub("<", "<").gsub(">", ">").gsub("&", "&")
array << { :text => string,
:styles => styles.dup,
:color => colors.last,
:link => link,
:anchor => anchor,
:font => fonts.last,
:size => sizes.last,
:character_spacing => character_spacings.last }
end
end
end
array
end
end
end
end
end