# frozen_string_literal: true module Nokogiri module XML ### # The Reader parser allows you to effectively pull parse an \XML document. Once instantiated, # call Nokogiri::XML::Reader#each to iterate over each node. # # Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The # Reader is given an \XML document, and yields nodes to an each block. # # The Reader parser might be good for when you need the speed and low memory usage of a \SAX # parser, but do not want to write a SAX::Document handler. # # Here is an example of usage: # # reader = Nokogiri::XML::Reader.new <<~XML # # snuggles! # # XML # # reader.each do |node| # # node is an instance of Nokogiri::XML::Reader # puts node.name # end # # ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire # document, you must parse the document again. It may be better to capture all information you # need during a single iteration. # # ⚠ libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is # ignored. If a syntax error is encountered during parsing, an exception will be raised. class Reader include Enumerable TYPE_NONE = 0 # Element node type TYPE_ELEMENT = 1 # Attribute node type TYPE_ATTRIBUTE = 2 # Text node type TYPE_TEXT = 3 # CDATA node type TYPE_CDATA = 4 # Entity Reference node type TYPE_ENTITY_REFERENCE = 5 # Entity node type TYPE_ENTITY = 6 # PI node type TYPE_PROCESSING_INSTRUCTION = 7 # Comment node type TYPE_COMMENT = 8 # Document node type TYPE_DOCUMENT = 9 # Document Type node type TYPE_DOCUMENT_TYPE = 10 # Document Fragment node type TYPE_DOCUMENT_FRAGMENT = 11 # Notation node type TYPE_NOTATION = 12 # Whitespace node type TYPE_WHITESPACE = 13 # Significant Whitespace node type TYPE_SIGNIFICANT_WHITESPACE = 14 # Element end node type TYPE_END_ELEMENT = 15 # Entity end node type TYPE_END_ENTITY = 16 # \XML Declaration node type TYPE_XML_DECLARATION = 17 # A list of errors encountered while parsing attr_accessor :errors # The \XML source attr_reader :source alias_method :self_closing?, :empty_element? # :call-seq: # Reader.new(input) { |options| ... } → Reader # Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader # # Create a new Reader to parse an \XML document. # # [Required Parameters] # - +input+ (String | IO): The \XML document to parse. # # [Optional Parameters] # - +url:+ (String) The base URL of the document. # - +encoding:+ (String) The name of the encoding of the document. # - +options:+ (Integer | ParseOptions) Options to control the parser behavior. # Defaults to +ParseOptions::STRICT+. # # [Yields] # If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before # the fragment is parsed. See Nokogiri::XML::ParseOptions for more information. def self.new( string_or_io, url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT, url: url_, encoding: encoding_, options: options_ ) options = Nokogiri::XML::ParseOptions.new(options) if Integer === options yield options if block_given? if string_or_io.respond_to?(:read) return Reader.from_io(string_or_io, url, encoding, options.to_i) end Reader.from_memory(string_or_io, url, encoding, options.to_i) end private def initialize(source, url = nil, encoding = nil) # :nodoc: @source = source @errors = [] @encoding = encoding end # Get the attributes and namespaces of the current node as a Hash. # # This is the union of Reader#attribute_hash and Reader#namespaces # # [Returns] # (Hash) Attribute names and values, and namespace prefixes and hrefs. def attributes attribute_hash.merge(namespaces) end ### # Move the cursor through the document yielding the cursor to the block def each while (cursor = read) yield cursor end end end end end