module Orthotypo
class Composer
attr_reader :string, :ortho
SPACE = ' '.freeze
NBSP = ' '.freeze
NNBSP = ' '.freeze
PRECIOUS_TOKEN = 'orthotypopreciousthing'
def initialize(string, html: nil)
@string = string
@html = html
parse
end
protected
def chars_with_space_before
[]
end
def chars_with_space_before_after_digit
[
'%'
]
end
def chars_with_space_after
[
',',
'.'
]
end
def chars_with_space_around
[]
end
def chars_with_no_space_before
[]
end
def chars_with_no_space_around
[]
end
def pairs_with_space_around
[]
end
def pairs_with_no_space_around
[]
end
def chars_with_no_space_around_between_digits
[
'/',
':'
]
end
def chars_in_numbers
[
'.',
',',
'/',
':'
]
end
def is_html?
# TODO contains tags?
@html || contains_html_entities?
end
def contains_html_entities?
@contains_html_entities ||= html_entities.decode(string) != string
end
def prepare_linebreaks
@string.gsub! "\r\n", "
"
@string.gsub! "\r", "
"
@string.gsub! "\n", "
"
end
def prepare_ortho
@ortho = string.dup
@nokogiri = Nokogiri::HTML.fragment @ortho
end
def clean_ortho
@ortho = @nokogiri.to_s
end
def parse
prepare_linebreaks
prepare_ortho
preserve_precious_things
# Chars
parse_chars_with_space_before
parse_chars_with_space_before_after_digit
parse_chars_with_space_after
parse_chars_with_space_around
parse_chars_with_no_space_before
parse_chars_with_no_space_around
# Pairs
parse_pairs_with_space_around
parse_pairs_with_no_space_around
# Numbers
parse_chars_in_numbers
#
clean_ortho
restore_precious_things
end
def preserve_precious_things
@precious_things = {}
@nokogiri.traverse do |node|
if node.text?
has_leading_space = node.content.start_with? SPACE
has_trailing_space = node.content.end_with? SPACE
node.content = node.content.split(SPACE).map { |fragment|
store_if_precious(fragment)
}.join(SPACE)
node.content = SPACE + node.content if has_leading_space
node.content = node.content + SPACE if has_trailing_space
elsif node.element?
if node.name == 'a'
node.attributes.each do |key, attribute|
if attribute.name == 'href'
attribute.value = store_precious_thing(attribute.value)
end
end
end
end
end
end
def store_if_precious(string)
Analyzer::precious?(string) ? store_precious_thing(string)
: string
end
def store_precious_thing(string)
# Create token identifier
uid = SecureRandom.hex
token = "#{PRECIOUS_TOKEN}-#{uid}"
# Store value
@precious_things[uid] = string
# Return identifier
token
end
def restore_precious_things
@precious_things.each do |uid, value|
@ortho.gsub! "#{PRECIOUS_TOKEN}-#{uid}", value
end
end
def parse_chars_with_space_before
chars_with_space_before.each do |char|
# Espace normal avant -> espace fine insécable avant
fix(SPACE + char, NNBSP + char)
# Pas d'espace avant -> espace fine insécable avant
fix(/([[:alpha:]])#{Regexp.quote(char)}/, "\\1" + NNBSP + char)
end
end
def parse_chars_with_space_before_after_digit
chars_with_space_before_after_digit.each do |char|
fix(/([[:digit:]])#{Regexp.quote(char)}/, "\\1" + NNBSP + char)
end
end
def parse_chars_with_space_after
chars_with_space_after.each do |char|
# Espace avant -> pas d'espace avant
fix(SPACE + char, char)
# Pas d'espace après -> espace après
# FIXME
fix(/#{Regexp.quote(char)}([[:alpha:]])/, char + SPACE + "\\1")
end
end
def parse_chars_with_space_around
chars_with_space_around.each do |char|
# Espace normal avant -> espace fine insécable avant
fix(SPACE + char, NNBSP + char)
# Pas d'espace avant -> espace fine insécable avant
fix(/([[:alpha:]])#{Regexp.quote(char)}/, "\\1" + NNBSP + char)
end
end
def parse_chars_with_no_space_before
chars_with_no_space_before.each do |char|
# Espace avant -> pas d'espace avant
fix(SPACE + char, char)
end
end
def parse_chars_with_no_space_around
chars_with_no_space_around.each do |char|
# Espace avant -> pas d'espace avant
fix(SPACE + char, char)
# Espace après -> pas d'espace après
fix(char + SPACE, char)
end
end
def parse_pairs_with_space_around
pairs_with_space_around.each do |marks|
opening = marks.chars.first
closing = marks.chars.last
# Espace normal -> espace fine insécable
fix(opening + SPACE, opening + NNBSP)
fix(SPACE + closing, NNBSP + closing)
# Pas d'espace -> espace fine insécable
fix(/#{Regexp.quote(opening)}([^[:space:]])/, opening + NNBSP + "\\1")
fix(/([^[:space:]])#{Regexp.quote(closing)}/, "\\1" + NNBSP + closing)
end
end
def parse_pairs_with_no_space_around
pairs_with_no_space_around.each do |marks|
opening = marks.chars.first
closing = marks.chars.last
# Espace -> pas d'espace
fix(/#{Regexp.quote(opening)}[[:space:]](.+)[[:space:]]#{Regexp.quote(closing)}/, opening + "\\1" + closing)
end
end
def parse_chars_in_numbers
chars_in_numbers.each do |char|
fix(/([[:digit:]])[[:space:]]#{Regexp.quote(char)}([[:digit:]])/, "\\1" + char + "\\2")
fix(/([[:digit:]])[[:space:]]#{Regexp.quote(char)}[[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
fix(/([[:digit:]])#{Regexp.quote(char)}[[:space:]]([[:digit:]])/, "\\1" + char + "\\2")
end
end
def html_entities
@html_entities ||= HTMLEntities.new(:expanded)
end
def fix(bad, good)
@nokogiri.traverse do |node|
next unless node.text?
node.content = node.content.gsub(bad, good)
end
end
end
end