module Nokogiri
module HTML
###
# Nokogiri lets you write a SAX parser to process HTML but get HTML
# correction features.
#
# See Nokogiri::HTML::SAX::Parser for a basic example of using a
# SAX parser with HTML.
#
# For more information on SAX parsers, see Nokogiri::XML::SAX
module SAX
###
# This class lets you perform SAX style parsing on HTML with HTML
# error correction.
#
# Here is a basic usage example:
#
# class MyDoc < Nokogiri::XML::SAX::Document
# def start_element name, attributes = []
# puts "found a #{name}"
# end
# end
#
# parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
# parser.parse(File.read(ARGV[0], mode: 'rb'))
#
# For more information on SAX parsers, see Nokogiri::XML::SAX
class Parser < Nokogiri::XML::SAX::Parser
###
# Parse html stored in +data+ using +encoding+
def parse_memory data, encoding = 'UTF-8'
raise ArgumentError unless data
return unless data.length > 0
ctx = ParserContext.memory(data, encoding)
yield ctx if block_given?
ctx.parse_with self
end
###
# Parse a file with +filename+
def parse_file filename, encoding = 'UTF-8'
raise ArgumentError unless filename
raise Errno::ENOENT unless File.exist?(filename)
raise Errno::EISDIR if File.directory?(filename)
ctx = ParserContext.file(filename, encoding)
yield ctx if block_given?
ctx.parse_with self
end
end
end
end
end