# frozen_string_literal: true
module Prawn
module Text
module Formatted
# Implements a bi-directional parser between a subset of html and
# formatted text arrays.
class Parser
# @group Extension API
# Parser regular expression.
# @private
PARSER_REGEX =
begin
regex_string =
"\n|" \
'||' \
'||' \
'||' \
'||' \
'||' \
'||' \
']*>||' \
']*>||' \
']*>||' \
'||' \
'||' \
']*>||' \
"[^<\n]+"
Regexp.new(regex_string, Regexp::MULTILINE)
end
# Escaped characters.
# @private
ESCAPE_CHARS = {
'&' => '&',
'>' => '>',
'<' => '<',
}.freeze
# @private
UNESCAPE_CHARS = ESCAPE_CHARS.invert.freeze
# Parse formatted string.
#
# @param string [String]
# @return [Array] Text fragments.
def self.format(string, *_args)
tokens = string.gsub(%r{
}, "\n").scan(PARSER_REGEX)
array_from_tokens(tokens)
end
# Serialize text fragments to an inline format string.
#
# @param array [Array]
# @return [String]
def self.to_string(array)
prefixes = {
bold: '',
italic: '',
underline: '',
strikethrough: '',
subscript: '',
superscript: '',
}
suffixes = {
bold: '',
italic: '',
underline: '',
strikethrough: '',
subscript: '',
superscript: '',
}
array
.map { |hash|
prefix = ''
suffix = ''
hash[:styles]&.each do |style|
prefix += prefixes[style]
suffix = suffixes[style] + suffix
end
font = hash[:font] ? " name='#{hash[:font]}'" : nil
size = hash[:size] ? " size='#{hash[:size]}'" : nil
character_spacing =
if hash[:character_spacing]
" character_spacing='#{hash[:character_spacing]}'"
end
if font || size || character_spacing
prefix += ""
suffix = ''
end
link = hash[:link] ? " href='#{hash[:link]}'" : nil
anchor = hash[:anchor] ? " anchor='#{hash[:anchor]}'" : nil
if link || anchor
prefix += ""
suffix = ''
end
if hash[:color]
prefix +=
if hash[:color].is_a?(Array)
""
else
""
end
suffix = ''
end
string = escape(hash[:text])
prefix + string + suffix
}
.join
end
# Break text into paragraphs.
#
# @private
# @param array [Array] Text fragments.
# @return [Array>] Pragraphs of text fragments.
def self.array_paragraphs(array)
paragraphs = []
paragraph = []
previous_string = "\n"
scan_pattern = /[^\n]+|\n/
array.each do |hash|
hash[:text].scan(scan_pattern).each do |string|
if string == "\n"
if previous_string == "\n"
paragraph << hash.dup.merge(text: "\n")
end
paragraphs << paragraph unless paragraph.empty?
paragraph = []
else
paragraph << hash.dup.merge(text: string)
end
previous_string = string
end
end
paragraphs << paragraph unless paragraph.empty?
paragraphs
end
# @private
# @param tokens [Array]
# @return [Array]
def self.array_from_tokens(tokens)
array = []
styles = []
colors = []
link = nil
anchor = nil
local = nil
fonts = []
sizes = []
character_spacings = []
tokens.each do |token|
case token
when '', ''
styles << :bold
when '', ''
styles << :italic
when ''
styles << :underline
when ''
styles << :strikethrough
when ''
styles << :subscript
when ''
styles << :superscript
when '', ''
styles.delete(:bold)
when '', ''
styles.delete(:italic)
when ''
styles.delete(:underline)
when ''
styles.delete(:strikethrough)
when ''
styles.delete(:subscript)
when ''
styles.delete(:superscript)
when '', ''
link = nil
anchor = nil
local = nil
when ''
colors.pop
when ''
fonts.pop
sizes.pop
character_spacings.pop
when /^]*>$/, /^]*>$/
matches = /href="([^"]*)"/.match(token) ||
/href='([^']*)'/.match(token)
link = matches[1] unless matches.nil?
matches = /anchor="([^"]*)"/.match(token) ||
/anchor='([^']*)'/.match(token)
anchor = matches[1] unless matches.nil?
matches = /local="([^"]*)"/.match(token) ||
/local='([^']*)'/.match(token)
local = matches[1] unless matches.nil?
when /^]*>$/
matches = /rgb="#?([^"]*)"/.match(token) ||
/rgb='#?([^']*)'/.match(token)
colors << matches[1] if matches
match = /c="#?([^"]*)"/.match(token) ||
/c='#?([^']*)'/.match(token)
c = Integer(match[1], 10) unless match.nil?
match = /m="#?([^"]*)"/.match(token) ||
/m='#?([^']*)'/.match(token)
m = Integer(match[1], 10) unless match.nil?
match = /y="#?([^"]*)"/.match(token) ||
/y='#?([^']*)'/.match(token)
y = Integer(match[1], 10) unless match.nil?
match = /k="#?([^"]*)"/.match(token) ||
/k='#?([^']*)'/.match(token)
k = Integer(match[1], 10) unless match.nil?
colors << [c, m, y, k] if [c, m, y, k].all?
# intend to support rgb="#ffffff" or rgb='#ffffff',
# r="255" g="255" b="255" or r='255' g='255' b='255',
# and c="100" m="100" y="100" k="100" or
# c='100' m='100' y='100' k='100'
# color = { :rgb => "#ffffff" }
# color = { :r => 255, :g => 255, :b => 255 }
# color = { :c => 100, :m => 100, :y => 100, :k => 100 }
when /^]*>$/
matches = /name="([^"]*)"/.match(token) ||
/name='([^']*)'/.match(token)
fonts << matches[1] unless matches.nil?
matches = /size="([^"]*)"/.match(token) ||
/size='([^']*)'/.match(token)
sizes << Float(matches[1]) unless matches.nil?
matches = /character_spacing="([^"]*)"/.match(token) ||
/character_spacing='([^']*)'/.match(token)
character_spacings << Float(matches[1]) unless matches.nil?
else
string = unescape(token)
array << {
text: string,
styles: styles.dup,
color: colors.last,
local: local,
link: link,
anchor: anchor,
font: fonts.last,
size: sizes.last,
character_spacing: character_spacings.last,
}
end
end
array
end
# Escape characters that can interfere with inline format parsing.
#
# @param text [String]
# @return [String]
def self.escape(text)
text.gsub(Regexp.union(ESCAPE_CHARS.keys), ESCAPE_CHARS)
end
# Unescape characters that can interfere with inline format parsing.
#
# @param text [String]
# @return [String]
def self.unescape(text)
text.gsub(Regexp.union(UNESCAPE_CHARS.keys), UNESCAPE_CHARS)
end
end
end
end
end