require 'saxon/xdm' module Saxon # Builds XDM objects from XML sources, for use in XSLT or for query and # access class DocumentBuilder # Provides a simple configuraion DSL for DocumentBuilders. # @see DocumentBuilder.create class ConfigurationDSL # @api private # # Create a new instance and +instance_exec+ the passed-in block against it def self.define(document_builder, block) new(document_builder).instance_exec(&block) end # @api private def initialize(document_builder) @document_builder = document_builder end # Sets line numbering on or off # # @see DocumentBuilder#line_numbering= # # @param value [Boolean] on (true) or off (false) def line_numbering(value) @document_builder.line_numbering = value end # Sets the base URI of documents created using this instance. # # @see DocumentBuilder.base_uri= # # @param value [String, URI::File, URI::HTTP] The (absolute) base URI to use def base_uri(value) @document_builder.base_uri = value end # Sets the base URI of documents created using this instance. # # @see DocumentBuilder.base_uri= # # @param value [String, URI::File, URI::HTTP] The (absolute) base URI to use def whitespace_stripping_policy(value) @document_builder.whitespace_stripping_policy = value end # Sets the base URI of documents created using this instance. # # @see DocumentBuilder.base_uri= # # @param value [String, URI::File, URI::HTTP] The (absolute) base URI to use def dtd_validation(value) @document_builder.dtd_validation = value end end # Create a new DocumentBuilder that can be used to build new XML documents # with the passed-in {Saxon::Processor}. If a block is passed in it's # executed as a DSL for configuring the builder instance. # # @param processor [Saxon::Processor] the Processor # @yield An DocumentBuilder configuration DSL block # @return [Saxon::DocumentBuilder] the new instance def self.create(processor, &block) new(processor.to_java.newDocumentBuilder, &block) end attr_reader :s9_document_builder private :s9_document_builder # @api private # @param [net.sf.saxon.s9api.DocumentBuilder] s9_document_builder The # Saxon DocumentBuilder instance to wrap def initialize(s9_document_builder, &block) @s9_document_builder = s9_document_builder if block_given? ConfigurationDSL.define(self, block) end end # Report whether documents created using this instance will keep track of # the line and column numbers of elements. # # @return [Boolean] whether line numbering will be tracked def line_numbering? s9_document_builder.isLineNumbering end # Switch tracking of line and column numbers for elements in documents # created by this instance on or off # # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setLineNumbering # # @param on_or_not [Boolean] whether or not to track line numbering def line_numbering=(on_or_not) s9_document_builder.setLineNumbering(on_or_not) end # Return the default base URI to be used when building documents using this # instance. This value will be ignored if the source being parsed has an # intrinsic base URI (e.g. a File). # # Returns +nil+ if no URI is set (the default). # # @return [nil, URI::File, URI::HTTP] the default base URI (or nil) def base_uri uri = s9_document_builder.getBaseURI uri.nil? ? uri : URI(uri.to_s) end # Set the base URI of documents created using this instance. This value will # be ignored if the source being parsed has an intrinsic base URI (e.g. a # File) # # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setBaseURI # # @param uri [String, URI::File, URI::HTTP] The (absolute) base URI to use def base_uri=(uri) s9_document_builder.setBaseURI(java.net.URI.new(uri.to_s)) end # Return the Whitespace stripping policy for this instance. Returns one of # the standard policy names as a symbol, or the custom Java # WhitespaceStrippingPolicy if one was defined using # +#whitespace_stripping_policy = ->(qname) { ... }+. (See # {#whitespace_stripping_policy=} for more.) # # +:all+: All whitespace-only nodes will be discarded # # +:none+: No whitespace-only nodes will be discarded (the default if DTD or # schema validation is not in effect) # # +:ignorable+: Whitespace-only nodes inside elements defined as # element-only in the DTD or schema being used will be discarded (the # default if DTD or schema validation is in effect) # # +:unspecified+: the default, which in practice means :ignorable if DTD or # schema validation is in effect, and :none otherwise. # # @return [:all, :none, :ignorable, :unspecified, Proc] def whitespace_stripping_policy s9_policy = s9_document_builder.getWhitespaceStrippingPolicy case s9_policy when Saxon::S9API::WhitespaceStrippingPolicy::UNSPECIFIED :unspecified when Saxon::S9API::WhitespaceStrippingPolicy::NONE :none when Saxon::S9API::WhitespaceStrippingPolicy::IGNORABLE :ignorable when Saxon::S9API::WhitespaceStrippingPolicy::ALL :all else s9_policy end end # Set the whitespace stripping policy to be used for documents built with # this instance. # # Possible values are: # # * One of the standard policies, as a symbol (+:all+, +:none+, # +:ignorable+, +:unspecified+, see {#whitespace_stripping_policy}). # * A Java +net.sf.saxon.s9api.WhitesapceStrippingPolicy+ instance # * A Proc/lambda that is handed an element name as a {Saxon::QName}, and # should return true (if whitespace should be stripped for this element) # or false (it should not). # @example # whitespace_stripping_policy = ->(element_qname) { # element_qname == Saxon::QName.clark("{http://example.org/}element-name") # } # # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setWhitespaceStrippingPolicy # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/WhitespaceStrippingPolicy # @param policy [Symbol, Proc, Saxon::S9API::WhitespaceStrippingPolicy] the # policy to use def whitespace_stripping_policy=(policy) case policy when :unspecified, :none, :ignorable, :all s9_policy = Saxon::S9API::WhitespaceStrippingPolicy.const_get(policy.to_s.upcase.to_sym) when Proc wrapped_policy = ->(s9_qname) { policy.call(Saxon::QName.new(s9_qname)) } s9_policy = Saxon::S9API::WhitespaceStrippingPolicy.makeCustomPolicy(wrapped_policy) when Saxon::S9API::WhitespaceStrippingPolicy s9_policy = policy else raise InvalidWhitespaceStrippingPolicyError, "#{policy.inspect} is not one of the allowed Symbols, or a custom policy" end s9_document_builder.setWhitespaceStrippingPolicy(s9_policy) end # @return [Boolean] whether DTD Validation is enabled def dtd_validation? s9_document_builder.isDTDValidation end # Switches DTD validation on or off. # # It's important to note that DTD validation only applies to documents that # contain a ++, but switching DTD validation off doesn't stop the # XML parser Saxon uses from trying to retrieve the DTD that's referenced, # which can mean network requests. By default, the SAX parser Saxon uses # (Xerces) doesn't make use of XML catalogs, which causes problems when documents reference a DTD with a relative path as in: # # This can be controlled through a configuration option, however. # # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setDTDValidation # @see https://www.saxonica.com/documentation9.9/index.html#!sourcedocs/controlling-parsing # @param on [Boolean] whether DTD Validation should be enabled def dtd_validation=(on) s9_document_builder.setDTDValidation(on) end # @param [Saxon::Source] source The Saxon::Source containing the source # IO/string # @return [Saxon::XDM::Node] The Saxon::XDM::Node representing the root of the # document tree def build(source) XDM::Node.new(s9_document_builder.build(source.to_java)) end # @return [Java::NetSfSaxonS9api::DocumentBuilder] The underlying Java Saxon # DocumentBuilder instance def to_java s9_document_builder end end # Error raised when someone tries to set an invalid whitespace stripping # policy on a {DocumentBuilder} class InvalidWhitespaceStrippingPolicyError < RuntimeError end end