class HTMLEntities
InstructionError = Class.new(RuntimeError)
class Encoder #:nodoc:
INSTRUCTIONS = [:basic, :named, :decimal, :hexadecimal]
def initialize(flavor, instructions)
@flavor = flavor
instructions = [:basic] if instructions.empty?
validate_instructions(instructions)
build_basic_entity_encoder(instructions)
build_extended_entity_encoder(instructions)
end
def encode(source)
string = source.to_s.dup
string.gsub!(basic_entity_regexp){ encode_basic($&) }
string.gsub!(extended_entity_regexp){ encode_extended($&) }
string
end
private
def basic_entity_regexp
@basic_entity_regexp ||= (
case @flavor
when /^html/
/[<>"&]/
else
/[<>'"&]/
end
)
end
def extended_entity_regexp
@extended_entity_regexp ||= (
regexp_options = [nil]
if encoding_aware?
regexp = '[^\u{20}-\u{7E}]'
else
regexp = '[^\x20-\x7E]'
regexp_options << "U"
end
regexp += "|'" if @flavor == 'html4'
Regexp.new(regexp, *regexp_options)
)
end
def validate_instructions(instructions)
unknown_instructions = instructions - INSTRUCTIONS
if unknown_instructions.any?
raise InstructionError, "unknown encode_entities command(s): #{unknown_instructions.inspect}"
end
if (instructions.include?(:decimal) && instructions.include?(:hexadecimal))
raise InstructionError, "hexadecimal and decimal encoding are mutually exclusive"
end
end
def build_basic_entity_encoder(instructions)
if instructions.include?(:basic) || instructions.include?(:named)
method = :encode_named
elsif instructions.include?(:decimal)
method = :encode_decimal
elsif instructions.include?(:hexadecimal)
method = :encode_hexadecimal
end
instance_eval "def encode_basic(char)\n#{method}(char)\nend"
end
def build_extended_entity_encoder(instructions)
definition = "def encode_extended(char)\n"
([:named, :decimal, :hexadecimal] & instructions).each do |encoder|
definition << "encoded = encode_#{encoder}(char)\n"
definition << "return encoded if encoded\n"
end
definition << "char\n"
definition << "end"
instance_eval definition
end
def encode_named(char)
cp = char.unpack('U')[0]
(e = reverse_map[cp]) && "{e};"
end
def encode_decimal(char)
"#{char.unpack('U')[0]};"
end
def encode_hexadecimal(char)
"#{char.unpack('U')[0].to_s(16)};"
end
def reverse_map
@reverse_map ||= (
skips = HTMLEntities::SKIP_DUP_ENCODINGS[@flavor]
map = HTMLEntities::MAPPINGS[@flavor]
uniqmap = skips ? map.reject{|ent,hx| skips.include? ent} : map
uniqmap.invert
)
end
def encoding_aware?
"1.9".respond_to?(:encoding)
end
end
end