require 'nokogiri'  # FIXME: Implement using different modules as in RDF::TriX
require 'rdf/rdfxml/patches/graph_properties'

module RDF::RDFXML
  ##
  # An RDF/XML serialiser in Ruby
  #
  # Note that the natural interface is to write a whole graph at a time.
  # Writing statements or Triples will create a graph to add them to
  # and then serialize the graph.
  #
  # @example Obtaining a RDF/XML writer class
  #   RDF::Writer.for(:rdf)         #=> RDF::RDFXML::Writer
  #   RDF::Writer.for("etc/test.rdf")
  #   RDF::Writer.for(:file_name      => "etc/test.rdf")
  #   RDF::Writer.for(:file_extension => "rdf")
  #   RDF::Writer.for(:content_type   => "application/rdf+xml")
  #
  # @example Serializing RDF graph into an RDF/XML file
  #   RDF::RDFXML::Write.open("etc/test.rdf") do |writer|
  #     writer << graph
  #   end
  #
  # @example Serializing RDF statements into an RDF/XML file
  #   RDF::RDFXML::Writer.open("etc/test.rdf") do |writer|
  #     graph.each_statement do |statement|
  #       writer << statement
  #     end
  #   end
  #
  # @example Serializing RDF statements into an RDF/XML string
  #   RDF::RDFXML::Writer.buffer do |writer|
  #     graph.each_statement do |statement|
  #       writer << statement
  #     end
  #   end
  #
  # @author [Gregg Kellogg](http://kellogg-assoc.com/)
  class Writer < RDF::Writer
    format RDF::RDFXML::Format

    VALID_ATTRIBUTES = [:none, :untyped, :typed]

    attr_accessor :graph, :base_uri

    ##
    # Initializes the RDF/XML writer instance.
    #
    # Opitons:
    # max_depth:: Maximum depth for recursively defining resources, defaults to 3
    # base_uri:: Base URI of graph, used to shorting URI references
    # lang:: Output as root xml:lang attribute, and avoid generation _xml:lang_ where possible
    # attributes:: How to use XML attributes when serializing, one of :none, :untyped, :typed. The default is :none.
    # default_namespace:: URI to use as default namespace
    #
    # @param  [IO, File]               output
    # @param  [Hash{Symbol => Object}] options
    #   @option options [Integer]       :max_depth      (nil)
    #   @option options [String, #to_s] :base_uri (nil)
    #   @option options [String, #to_s] :lang   (nil)
    #   @option options [Array]         :attributes   (nil)
    #   @option options [String]        :default_namespace
    # @yield  [writer]
    # @yieldparam [RDF::Writer] writer
    def initialize(output = $stdout, options = {}, &block)
      @graph = RDF::Graph.new
      super
    end

    ##
    # @param  [Graph] graph
    # @return [void]
    def insert_graph(graph)
      @graph = graph
    end

    ##
    # @param  [Statement] statement
    # @return [void]
    def insert_statement(statement)
      @graph << statement
    end

    ##
    # Stores the RDF/XML representation of a triple.
    #
    # @param  [RDF::Resource] subject
    # @param  [RDF::URI]      predicate
    # @param  [RDF::Value]    object
    # @return [void]
    # @see    #write_epilogue
    def insert_triple(subject, predicate, object)
      @graph << RDF::Statement.new(subject, predicate, object)
    end

    ##
    # Outputs the RDF/XML representation of all stored triples.
    #
    # @return [void]
    # @see    #write_triple
    def write_epilogue
      @base_uri = nil
      @force_RDF_about = {}
      @max_depth = @options[:max_depth] || 3
      @base_uri = @options[:base_uri]
      @lang = @options[:lang]
      @attributes = @options[:attributes] || :none
      @debug = @options[:debug]
      @default_namespace = @options[:default_namespace]
      raise "Invalid attribute option '#{@attributes}', should be one of #{VALID_ATTRIBUTES.to_sentence}" unless VALID_ATTRIBUTES.include?(@attributes.to_sym)
      self.reset

      doc = Nokogiri::XML::Document.new

      add_debug "\nserialize: graph: #{@graph.size}"

      preprocess

      # Get QNames and necessary namespaces from predicates and objects
      @graph.predicates.each {|pred| add_debug("serialize pred: #{pred.inspect}"); get_qname(pred)}
      @graph.objects.each {|obj| add_debug("serialize obj: #{obj.inspect}"); get_qname(obj)}
      prefix(:rdf, RDF.to_uri)
      prefix(:xml, RDF::XML) if @base_uri || @lang
      
      if @default_namespace
        prefix(:__default__, @default_namespace.respond_to?(:to_uri) ? @default_namespace.to_uri : @default_namespace)
        @default_namespace_prefix = prefixes.invert[@default_namespace]
        add_debug("def_namespace: #{@default_namespace}, prefix: #{@default_namespace_prefix}")
      end
      
      add_debug "\nserialize: graph namespaces: #{prefixes.inspect}"
      
      doc.root = Nokogiri::XML::Element.new("rdf:RDF", doc)
      prefixes.each_pair do |p, uri|
        if p == :__default__
          doc.root.default_namespace = uri.to_s
        else
          doc.root.add_namespace(p.to_s, uri.to_s)
        end
      end
      doc.root["xml:lang"] = @lang if @lang
      doc.root["xml:base"] = @base_uri if @base_uri
      
      # Add statements for each subject
      order_subjects.each do |subject|
        #add_debug "subj: #{subject.inspect}"
        subject(subject, doc.root)
      end

      doc.write_xml_to(@output, :encoding => "UTF-8", :indent => 2)
    end
    
    protected
    def subject(subject, parent_node)
      node = nil
      
      if !is_done?(subject)
        subject_done(subject)
        properties = @graph.properties(subject)
        prop_list = sort_properties(properties)
        add_debug "subject: #{subject.inspect}, props: #{properties.inspect}"

        rdf_type, *rest = properties.fetch(RDF.type.to_s, [])
        if rdf_type.is_a?(RDF::URI)
          element = get_qname(rdf_type)
          properties[RDF.type.to_s] = rest
          
          # FIXME: different namespace logic
          type_ns = rdf_type.vocab rescue nil
          if type_ns && @default_namespace && type_ns.to_s == @default_namespace.to_s
            properties[RDF.type.to_s] = rest
            element = rdf_type.qname.last
          end
        end
        element ||= "rdf:Description"

        node = Nokogiri::XML::Element.new(element.to_s, parent_node.document)
      
        if subject.is_a?(RDF::Node)
          # Only need nodeID if it's referenced elsewhere
          node["rdf:nodeID"] = subject.to_s if ref_count(subject) > (@depth == 0 ? 0 : 1)
        else
          node["rdf:about"] = relativize(subject)
        end

        prop_list.each do |prop|
          prop_ref = RDF::URI.intern(prop)
          
          properties[prop].each do |object|
            @depth += 1
            predicate(prop_ref, object, node, properties[prop].length == 1)
            @depth -= 1
          end
        end
      elsif @force_RDF_about.include?(subject)
        add_debug "subject: #{subject.inspect}, force about"
        node = Nokogiri::XML::Element.new("rdf:Description", parent_node.document)
        node["rdf:about"] = relativize(subject)
        @force_RDF_about.delete(subject)
      end

      parent_node.add_child(node) if node
    end
    
    # Output a predicate into the specified node.
    #
    # If _is_unique_ is true, this predicate may be able to be serialized as an attribute
    def predicate(prop, object, node, is_unique)
      qname = get_qname(prop)
      raise RDF::WriterError, "No qname generated for <#{prop}>" unless qname

      # See if we can serialize as attribute.
      # * untyped attributes that aren't duplicated where xml:lang == @lang
      # * typed attributes that aren't duplicated if @dt_as_attr is true
      # * rdf:type
      as_attr = false
      as_attr ||= true if [:untyped, :typed].include?(@attributes) && prop == RDF.type

      # Untyped attribute with no lang, or whos lang is the same as the default and RDF.type
      add_debug("as_attr? #{@attributes}, plain? #{object.plain?}, lang #{@lang || 'nil'}:#{object.language || 'nil'}") if object.is_a?(RDF::Literal)
      as_attr ||= true if [:untyped, :typed].include?(@attributes) &&
        object.is_a?(RDF::Literal) && (object.plain? || (@lang && object.language.to_s == @lang.to_s))
      
      as_attr ||= true if [:typed].include?(@attributes) && object.is_a?(RDF::Literal) && object.typed?

      as_attr = false unless is_unique
      
      # Can't do as an attr if the qname has no prefix and there is no prefixed version
      if @default_namespace && prop.vocab.to_s == @default_namespace.to_s
        if as_attr
          if @default_namespace_prefix
            qname = "#{@default_namespace_prefix}:#{prop.qname.last}"
          else
            as_attr = false
          end
        else
          qname = prop.qname.last.to_s
        end
      end

      add_debug "predicate: #{qname}, as_attr: #{as_attr}, object: #{object.inspect}, done: #{is_done?(object)}, sub: #{@subjects.include?(object)}"
      qname = "rdf:li" if qname.match(/rdf:_\d+/)
      pred_node = Nokogiri::XML::Element.new(qname, node.document)
      
      if object.is_a?(RDF::Literal) || is_done?(object) || !@subjects.include?(object)
        # Literals or references to objects that aren't subjects, or that have already been serialized
        
        args = xml_args(object)
        add_debug "predicate: args=#{args.inspect}"
        attrs = args.pop
        
        if as_attr
          # Serialize as attribute
          pred_node.unlink
          pred_node = nil
          node[qname] = object.is_a?(RDF::URI) ? relativize(object) : object.value
          add_debug("node[#{qname}]=#{node[qname]}, #{object.class}")
        else
          # Serialize as element
          add_debug("serialize as element: #{attrs.inspect}")
          attrs.each_pair do |a, av|
            next if a.to_s == "xml:lang" && av.to_s == @lang # Lang already specified, don't repeat
            av = relativize(object) if a == "rdf:resource"
            add_debug "  elt attr #{a}=#{av}"
            pred_node[a] = av.to_s
          end
          add_debug "  elt #{'xmllit ' if object.is_a?(RDF::Literal) && object.datatype == RDF.XMLLiteral}content=#{args.first}" if !args.empty?
          if object.is_a?(RDF::Literal) && object.datatype == RDF.XMLLiteral
            pred_node.add_child(Nokogiri::XML::CharacterData.new(args.first, node.document))
          elsif args.first
            pred_node.content = args.first unless args.empty?
          end
        end
      else
        require 'rdf/rdfxml/patches/seq' unless RDF::Graph.respond_to?(:seq)
        
        # Check to see if it can be serialized as a collection
        col = @graph.seq(object)
        conformant_list = col.all? {|item| !item.is_a?(RDF::Literal)}
        o_props = @graph.properties(object)
        if conformant_list && o_props[RDF.first.to_s]
          # Serialize list as parseType="Collection"
          pred_node["rdf:parseType"] = "Collection"
          col.each do |item|
            # Mark the BNode subject of each item as being complete, so that it is not serialized
            @graph.query(:predicate => RDF.first, :object => item) do |statement|
              subject_done(statement.subject)
            end
            @force_RDF_about[item] = true
            subject(item, pred_node)
          end
        else
          if @depth < @max_depth
            @depth += 1
            subject(object, pred_node)
            @depth -= 1
          elsif object.is_a?(RDF::Node)
            pred_node["rdf:nodeID"] = object.id
          else
            pred_node["rdf:resource"] = relativize(object)
          end
        end
      end
      node.add_child(pred_node) if pred_node
    end

    def relativize(uri)
      uri = uri.to_s
      self.base_uri ? uri.sub(/^#{self.base_uri}/, "") : uri
    end

    def preprocess_triple(triple)
      super
      
      # Pre-fetch qnames, to fill namespaces
      get_qname(triple.predicate)
      get_qname(triple.object) if triple.predicate == RDF.type

      @references[triple.predicate] = ref_count(triple.predicate) + 1
    end

    MAX_DEPTH = 10
    INDENT_STRING = " "
    
    def top_classes; [RDF::RDFS.Class]; end
    def predicate_order; [RDF.type, RDF::RDFS.label, RDF::DC.title]; end
    
    def is_done?(subject)
      @serialized.include?(subject)
    end
    
    # Mark a subject as done.
    def subject_done(subject)
      @serialized[subject] = true
    end
    
    def order_subjects
      seen = {}
      subjects = []
      
      top_classes.each do |class_uri|
        graph.query(:predicate => RDF.type, :object => class_uri).map {|st| st.subject}.sort.uniq.each do |subject|
          #add_debug "order_subjects: #{subject.inspect}"
          subjects << subject
          seen[subject] = @top_levels[subject] = true
        end
      end
      
      # Sort subjects by resources over bnodes, ref_counts and the subject URI itself
      recursable = @subjects.keys.
        select {|s| !seen.include?(s)}.
        map {|r| [r.is_a?(RDF::Node) ? 1 : 0, ref_count(r), r]}.
        sort
      
      subjects += recursable.map{|r| r.last}
    end
    
    def preprocess
      @graph.each {|statement| preprocess_statement(statement)}
    end
    
    def preprocess_statement(statement)
      #add_debug "preprocess: #{statement.inspect}"
      references = ref_count(statement.object) + 1
      @references[statement.object] = references
      @subjects[statement.subject] = true
    end
    
    # Return the number of times this node has been referenced in the object position
    def ref_count(node)
      @references.fetch(node, 0)
    end

    # Return a QName for the URI, or nil. Adds namespace of QName to defined namespaces
    def get_qname(uri)
      if uri.is_a?(RDF::URI)
        # Duplicate logic from URI#qname to remember namespace assigned
        if uri.qname
          prefix(uri.qname.first, uri.vocab.to_uri)
          add_debug "get_qname(uri.qname): #{uri.qname.join(':')}"
          return uri.qname.join(":") 
        end
        
        # No vocabulary assigned, find one from cache of created namespace URIs
        prefixes.each_pair do |prefix, vocab|
          if uri.to_s.index(vocab.to_s) == 0
            uri.vocab = vocab
            local_name = uri.to_s[(vocab.to_s.length)..-1]
            add_debug "get_qname(ns): #{prefix}:#{local_name}"
            return "#{prefix}:#{local_name}"
          end
        end
        
        # No vocabulary found, invent one
        # Add bindings for predicates not already having bindings
        # short_name of URI for creating QNames.
        #   "#{base_uri]{#short_name}}" == uri
        local_name = uri.fragment
        local_name ||= begin
          path = uri.path.split("/")
          unless path &&
              path.length > 1 &&
              path.last.class == String &&
              path.last.length > 0 &&
              path.last.index("/") != 0
            return false
          end
          path.last
        end
        base_uri = uri.to_s[0..-(local_name.length + 1)]
        @tmp_ns = @tmp_ns ? @tmp_ns.succ : "ns0"
        add_debug "create namespace definition for #{uri}"
        uri.vocab = RDF::Vocabulary(base_uri)
        prefix(@tmp_ns.to_sym, uri.vocab.to_uri)
        add_debug "get_qname(tmp_ns): #{@tmp_ns}:#{local_name}"
        return "#{@tmp_ns}:#{local_name}"
      end
    end
    
    def reset
      @depth = 0
      @lists = {}
      prefixes = {}
      @references = {}
      @serialized = {}
      @subjects = {}
      @top_levels = {}
    end

    # Take a hash from predicate uris to lists of values.
    # Sort the lists of values.  Return a sorted list of properties.
    def sort_properties(properties)
      properties.keys.each do |k|
        properties[k] = properties[k].sort do |a, b|
          a_li = a.is_a?(RDF::URI) && a.qname && a.qname.last =~ /^_\d+$/ ? a.to_i : a.to_s
          b_li = b.is_a?(RDF::URI) && b.qname && b.qname.last =~ /^_\d+$/ ? b.to_i : b.to_s
          
          a_li <=> b_li
        end
      end
      
      # Make sorted list of properties
      prop_list = []
      
      predicate_order.each do |prop|
        next unless properties[prop]
        prop_list << prop.to_s
      end
      
      properties.keys.sort.each do |prop|
        next if prop_list.include?(prop.to_s)
        prop_list << prop.to_s
      end
      
      add_debug "sort_properties: #{prop_list.to_sentence}"
      prop_list
    end

    # XML content and arguments for serialization
    #  Encoding.the_null_encoding.xml_args("foo", "en-US") => ["foo", {"xml:lang" => "en-US"}]
    def xml_args(object)
      case object
      when RDF::Literal
        if object.plain?
          [object.value, {}]
        elsif object.has_language?
          [object.value, {"xml:lang" => object.language}]
        elsif object.datatype == RDF.XMLLiteral
          [object.value, {"rdf:parseType" => "Literal"}]
        else
          [object.value, {"rdf:datatype" => object.datatype.to_s}]
        end
      when RDF::Node
        [{"rdf:nodeID" => object.id}]
      when RDF::URI
        [{"rdf:resource" => object.to_s}]
      end
    end
    
    # Add debug event to debug array, if specified
    #
    # @param [String] message::
    def add_debug(message)
      @debug << message if @debug.is_a?(Array)
    end

    # Returns indent string multiplied by the depth
    def indent(modifier = 0)
      INDENT_STRING * (@depth + modifier)
    end
  end
end