# encoding: UTF-8 require 'htmlentities/flavors' require 'htmlentities/encoder' require 'htmlentities/decoder' require 'htmlentities/version' # # HTML entity encoding and decoding for Ruby # class HTMLEntities UnknownFlavor = Class.new(RuntimeError) # # Create a new HTMLEntities coder for the specified flavor. # Available flavors are 'html4', 'expanded' and 'xhtml1' (the default). # # The only difference in functionality between html4 and xhtml1 is in the # handling of the apos (apostrophe) named entity, which is not defined in # HTML4. # # 'expanded' includes a large number of additional SGML entities drawn from # ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/SGML.TXT # it "maps SGML character entities from various public sets (namely, ISOamsa, # ISOamsb, ISOamsc, ISOamsn, ISOamso, ISOamsr, ISObox, ISOcyr1, ISOcyr2, # ISOdia, ISOgrk1, ISOgrk2, ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, # ISOpub, ISOtech, HTMLspecial, HTMLsymbol) to corresponding Unicode # characters." (sgml.txt). # # 'expanded' is a strict superset of the XHTML entities: every xhtml named # entity encodes and decodes the same under :expanded as under :xhtml1 # def initialize(flavor='xhtml1') @flavor = flavor.to_s.downcase raise UnknownFlavor, "Unknown flavor #{flavor}" unless FLAVORS.include?(@flavor) end # # Decode entities in a string into their UTF-8 # equivalents. The string should already be in UTF-8 encoding. # # Unknown named entities will not be converted # def decode(source) (@decoder ||= Decoder.new(@flavor)).decode(source) end # # Encode codepoints into their corresponding entities. Various operations # are possible, and may be specified in order: # # :basic :: Convert the five XML entities ('"<>&) # :named :: Convert non-ASCII characters to their named HTML 4.01 equivalent # :decimal :: Convert non-ASCII characters to decimal entities (e.g. Ӓ) # :hexadecimal :: Convert non-ASCII characters to hexadecimal entities (e.g. # ካ) # # You can specify the commands in any order, but they will be executed in # the order listed above to ensure that entity ampersands are not # clobbered and that named entities are replaced before numeric ones. # # If no instructions are specified, :basic will be used. # # Examples: # encode(str) - XML-safe # encode(str, :basic, :decimal) - XML-safe and 7-bit clean # encode(str, :basic, :named, :decimal) - 7-bit clean, with all # non-ASCII characters replaced with their named entity where possible, and # decimal equivalents otherwise. # # Note: It is the program's responsibility to ensure that the source # contains valid UTF-8 before calling this method. # def encode(source, *instructions) Encoder.new(@flavor, instructions).encode(source) end end