# encoding: utf-8 # text/formatted/parser.rb : Implements a bi-directional parser between a subset # of html and formatted text arrays # # Copyright February 2010, Daniel Nelson. All Rights Reserved. # # This is free software. Please see the LICENSE and COPYING files for details. # module Prawn module Text module Formatted class Parser PARSER_REGEX = begin regex_string = "\n|" + "||" + "||" + "||" + "||" + "||" + "||" + "]*>||" + "]*>||" + "]*>||" + "||" + "||" + "]*>||" + "[^<\n]+" regex = Regexp.new(regex_string, Regexp::MULTILINE) end def self.to_array(string) tokens = string.gsub(//, "\n").scan(PARSER_REGEX) self.array_from_tokens(tokens) end def self.to_string(array) prefixes = { :bold => "", :italic => "", :underline => "", :strikethrough => "", :subscript => "", :superscript => "" } suffixes = { :bold => "", :italic => "", :underline => "", :strikethrough => "", :subscript => "", :superscript => "" } array.collect do |hash| prefix = "" suffix = "" if hash[:styles] hash[:styles].each do |style| prefix = prefix + prefixes[style] suffix = suffixes[style] + suffix end end font = hash[:font] ? " name='#{hash[:font]}'" : nil size = hash[:size] ? " size='#{hash[:size]}'" : nil if hash[:character_spacing] character_spacing = " character_spacing='#{hash[:character_spacing]}'" else character_spacing = nil end if font || size || character_spacing prefix = prefix + "" suffix = "" end link = hash[:link] ? " href='#{hash[:link]}'" : nil anchor = hash[:anchor] ? " anchor='#{hash[:anchor]}'" : nil if link || anchor prefix = prefix + "" suffix = "" end if hash[:color] if hash[:color].kind_of?(Array) prefix = prefix + "" else prefix = prefix + "" end suffix = "" end string = hash[:text].gsub("&", "&").gsub(">", ">").gsub("<", "<") prefix + string + suffix end.join end def self.array_paragraphs(array) #:nodoc: paragraphs = [] paragraph = [] previous_string = "\n" scan_pattern = /[^\n]+|\n/ array.each do |hash| hash[:text].scan(scan_pattern).each do |string| if string == "\n" paragraph << hash.dup.merge(:text => "\n") if previous_string == "\n" paragraphs << paragraph unless paragraph.empty? paragraph = [] else paragraph << hash.dup.merge(:text => string) end previous_string = string end end paragraphs << paragraph unless paragraph.empty? paragraphs end private def self.array_from_tokens(tokens) array = [] styles = [] colors = [] link = nil anchor = nil fonts = [] sizes = [] character_spacings = [] while token = tokens.shift case token when "", "" styles << :bold when "", "" styles << :italic when "" styles << :underline when "" styles << :strikethrough when "" styles << :subscript when "" styles << :superscript when "", "" styles.delete(:bold) when "", "" styles.delete(:italic) when "" styles.delete(:underline) when "" styles.delete(:strikethrough) when "" styles.delete(:subscript) when "" styles.delete(:superscript) when "", "" link = nil anchor = nil when "" colors.pop when "" fonts.pop sizes.pop character_spacings.pop else if token =~ /^]*>$/ or token =~ /^]*>$/ matches = /href="([^"]*)"/.match(token) || /href='([^']*)'/.match(token) link = matches[1] unless matches.nil? matches = /anchor="([^"]*)"/.match(token) || /anchor='([^']*)'/.match(token) anchor = matches[1] unless matches.nil? elsif token =~ /^]*>$/ matches = /rgb="#?([^"]*)"/.match(token) || /rgb='#?([^']*)'/.match(token) colors << matches[1] if matches matches = /c="#?([^"]*)" +m="#?([^"]*)" +y="#?([^"]*)" +k="#?([^"]*)"/.match(token) || /c='#?([^']*)' +m='#?([^']*)' +y='#?([^']*)' +k='#?([^']*)'/.match(token) colors << [matches[1].to_i, matches[2].to_i, matches[3].to_i, matches[4].to_i] if matches # intend to support rgb="#ffffff" or rgb='#ffffff', # r="255" g="255" b="255" or r='255' g='255' b='255', # and c="100" m="100" y="100" k="100" or # c='100' m='100' y='100' k='100' # color = { :rgb => "#ffffff" } # color = { :r => 255, :g => 255, :b => 255 } # color = { :c => 100, :m => 100, :y => 100, :k => 100 } elsif token =~ /^]*>$/ matches = /name="([^"]*)"/.match(token) || /name='([^']*)'/.match(token) fonts << matches[1] unless matches.nil? matches = /size="([^"]*)"/.match(token) || /size='([^']*)'/.match(token) sizes << matches[1].to_f unless matches.nil? matches = /character_spacing="([^"]*)"/.match(token) || /character_spacing='([^']*)'/.match(token) character_spacings << matches[1].to_f unless matches.nil? else string = token.gsub("<", "<").gsub(">", ">").gsub("&", "&") array << { :text => string, :styles => styles.dup, :color => colors.last, :link => link, :anchor => anchor, :font => fonts.last, :size => sizes.last, :character_spacing => character_spacings.last } end end end array end end end end end