'
)
end
end
# cleans up double and single quotes in textual objects
# "hello world" => “ hello world”
def filter_beautify_quotes
traverse do |tag,txt|
txt.content = txt.content.
# apostrophes
gsub(/(\S)'(s)/i, '\1%s\2' % SINGLE_QUOTE_CLOSE).
gsub(/(\Ss)'(\s)/i, '\1%s\2' % SINGLE_QUOTE_CLOSE).
# double quotes
gsub(/"(\S.*?\S)"/, '%s\1%s' % [DOUBLE_QUOTE_OPEN, DOUBLE_QUOTE_CLOSE]).
# single quotes
gsub(/'(\S.*?\S)'/, '%s\1%s' % [SINGLE_QUOTE_OPEN, SINGLE_QUOTE_CLOSE])
end
end
# cleans up the various dash forms:
# 12--13 => 12–13
# the car---it was red---was destroyed => ...—it was red—...
def filter_beautify_dashes
traverse do |tag,txt|
txt.content = txt.content.
# between two numbers we have an en dash
# this would be a bit cleaner with (negative) lookbehind
gsub(/(\d)--(\d)/, "\\1#{EN_DASH}\\2").
# we can also have multiple en-dashes
gsub(/\b(--(--)+)(\b|\z|\s)/) do ||
EN_DASH * ($1.length / 2) + $3
end.
# three dashes in general are an em dash
gsub(/(\s|\b)---(\s|\b)/, "\\1#{EM_DASH}\\2")
end
end
# convert basic arrow forms to unicode characters
def filter_beautify_arrows
traverse do |tag,txt|
txt.content = txt.content.
gsub(/(\s|\b)-->(\s|\b)/, "\\1#{ARROW_RIGHTWARD}\\2").
gsub(/(\s|\b)<--(\s|\b)/, "\\1#{ARROW_LEFTWARD}\\2").
gsub(/(\s|\b)<->(\s|\b)/, "\\1#{ARROW_LEFTRIGHT}\\2").
gsub(/(\s|\b)==>(\s|\b)/, "\\1#{ARROW_DOUBLE_RIGHTWARD}\\2").
gsub(/(\s|\b)<==(\s|\b)/, "\\1#{ARROW_DOUBLE_LEFTWARD}\\2").
gsub(/(\s|\b)<=>(\s|\b)/, "\\1#{ARROW_DOUBLE_LEFTRIGHT}\\2")
end
end
# converts 'x' signs between numbers into the unicode symbol
def filter_beautify_math
end
# convert a few shorthands like (c), (tm) to their unicode symbols
def filter_beautify_symbols
traverse do |tag,txt|
txt.content = txt.content.
gsub(/\(tm\)/i, TRADEMARK).
gsub(/\(c\)/i, COPYRIGHT).
gsub(/\(r\)/i, REGISTERED).
gsub(/(\b| )\.\.\.(\.)?/, "\\1#{ELLIPSIS}\\2")
end
end
end
end
end