# -*- encoding: utf-8 -*-
module RDF::NTriples
  ##
  # N-Triples serializer.
  #
  # Output is serialized for UTF-8, to serialize as ASCII
  # (with) unicode escapes, set :encoding => Encoding::ASCII as
  # an option to {RDF::NTriples::Writer#initialize}.
  #
  # @example Obtaining an NTriples writer class
  #   RDF::Writer.for(:ntriples)     #=> RDF::NTriples::Writer
  #   RDF::Writer.for("etc/test.nt")
  #   RDF::Writer.for(:file_name      => "etc/test.nt")
  #   RDF::Writer.for(:file_extension => "nt")
  #   RDF::Writer.for(:content_type   => "text/plain")
  #
  # @example Serializing RDF statements into an NTriples file
  #   RDF::NTriples::Writer.open("etc/test.nt") do |writer|
  #     graph.each_statement do |statement|
  #       writer << statement
  #     end
  #   end
  #
  # @example Serializing RDF statements into an NTriples string
  #   RDF::NTriples::Writer.buffer do |writer|
  #     graph.each_statement do |statement|
  #       writer << statement
  #     end
  #   end
  #
  # @example Serializing RDF statements into an NTriples string with escaped UTF-8
  #   RDF::NTriples::Writer.buffer(:encoding => Encoding::ASCII) do |writer|
  #     graph.each_statement do |statement|
  #       writer << statement
  #     end
  #   end
  #
  # @see http://www.w3.org/TR/rdf-testcases/#ntriples
  # @see http://www.w3.org/TR/n-triples/
  class Writer < RDF::Writer
    format RDF::NTriples::Format

    # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
    ESCAPE_PLAIN = /\A[\x20-\x21\x23-#{Regexp.escape '['}#{Regexp.escape ']'}-\x7E]*\z/m.freeze
    ESCAPE_ASCII = /\A[\x00-\x7F]*\z/m.freeze

    ##
    # Escape Literal and URI content. If encoding is ASCII, all unicode
    # is escaped, otherwise only ASCII characters that must be escaped are
    # escaped.
    #
    # @param  [String] string
    # @param  [Encoding] encoding
    # @return [String]
    # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
    def self.escape(string, encoding = nil)
      ret = case
        when string =~ ESCAPE_PLAIN # a shortcut for the simple case
          string
        when string.ascii_only?
          StringIO.open do |buffer|
            buffer.set_encoding(Encoding::ASCII)
            string.each_byte { |u| buffer << escape_ascii(u, encoding) }
            buffer.string
          end
        when string.respond_to?(:each_char) && encoding && encoding != Encoding::ASCII
          # Not encoding UTF-8 characters
          StringIO.open do |buffer|
            buffer.set_encoding(encoding)
            string.each_char do |u|
              buffer << case u.ord
              when (0x00..0x7F)
                escape_ascii(u, encoding)
              else
                u
              end
            end
            buffer.string
          end
        else
          # Encode ASCII && UTF-8 characters
          StringIO.open do |buffer|
            buffer.set_encoding(Encoding::ASCII)
            string.each_codepoint { |u| buffer << escape_unicode(u, encoding) }
            buffer.string
          end
      end
      encoding ? ret.encode(encoding) : ret
    end

    ##
    # Escape ascii and unicode characters.
    # If encoding is UTF_8, only ascii characters are escaped.
    #
    # @param  [Integer, #ord] u
    # @param  [Encoding] encoding
    # @return [String]
    # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
    def self.escape_unicode(u, encoding)
      case (u = u.ord)
        when (0x00..0x7F)        # ASCII 7-bit
          escape_ascii(u, encoding)
        when (0x80..0xFFFF)      # Unicode BMP
          escape_utf16(u)
        when (0x10000..0x10FFFF) # Unicode
          escape_utf32(u)
        else
          raise ArgumentError.new("expected a Unicode codepoint in (0x00..0x10FFFF), but got 0x#{u.to_s(16)}")
      end
    end

    ##
    # Standard ASCII escape sequences. If encoding is ASCII, use Test-Cases
    # sequences, otherwise, assume the test-cases escape sequences. Otherwise,
    # the N-Triples recommendation includes `\b` and `\f` escape sequences.
    #
    # @param  [Integer, #ord] u
    # @return [String]
    # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
    # @see    http://www.w3.org/TR/n-triples/
    def self.escape_ascii(u, encoding)
      case (u = u.ord)
        when (0x00..0x07) then escape_utf16(u)
        when (0x08)       then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\b")
        when (0x09)       then "\\t"
        when (0x0A)       then "\\n"
        when (0x0B)       then escape_utf16(u)
        when (0x0C)       then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\f")
        when (0x0D)       then "\\r"
        when (0x0E..0x1F) then escape_utf16(u)
        when (0x22)       then "\\\""
        when (0x5C)       then "\\\\"
        when (0x7F)       then escape_utf16(u)
        when (0x00..0x7F) then u.chr
        else
          raise ArgumentError.new("expected an ASCII character in (0x00..0x7F), but got 0x#{u.to_s(16)}")
      end
    end

    ##
    # @param  [Integer, #ord] u
    # @return [String]
    # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
    def self.escape_utf16(u)
      sprintf("\\u%04X", u.ord)
    end

    ##
    # @param  [Integer, #ord] u
    # @return [String]
    # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
    def self.escape_utf32(u)
      sprintf("\\U%08X", u.ord)
    end

    ##
    # Returns the serialized N-Triples representation of the given RDF
    # value.
    #
    # @param  [RDF::Value] value
    # @return [String]
    # @raise  [ArgumentError] if `value` is not an `RDF::Statement` or `RDF::Term`
    def self.serialize(value)
      writer = self.new
      case value
        when nil then nil
        when FalseClass then value.to_s
        when RDF::Statement
          writer.format_statement(value) + "\n"
        when RDF::Term
          writer.format_term(value)
        else
          raise ArgumentError, "expected an RDF::Statement or RDF::Term, but got #{value.inspect}"
      end
    end

    ##
    # Initializes the writer.
    #
    # @param  [IO, File] output
    #   the output stream
    # @param  [Hash{Symbol => Object}] options = ({})
    #   any additional options. See {RDF::Writer#initialize}
    # @option options [Boolean]  :validate (true)
    #   whether to validate terms when serializing
    # @yield  [writer] `self`
    # @yieldparam  [RDF::Writer] writer
    # @yieldreturn [void]
    def initialize(output = $stdout, options = {}, &block)
      options = {:validate => true}.merge(options)
      super
    end

    ##
    # Outputs an N-Triples comment line.
    #
    # @param  [String] text
    # @return [void]
    def write_comment(text)
      puts "# #{text.chomp}" # TODO: correctly output multi-line comments
    end

    ##
    # Outputs the N-Triples representation of a triple.
    #
    # @param  [RDF::Resource] subject
    # @param  [RDF::URI]      predicate
    # @param  [RDF::Term]     object
    # @return [void]
    def write_triple(subject, predicate, object)
      puts format_triple(subject, predicate, object, @options)
    end

    ##
    # Returns the N-Triples representation of a statement.
    #
    # @param  [RDF::Statement] statement
    # @param  [Hash{Symbol => Object}] options = ({})
    # @return [String]
    def format_statement(statement, options = {})
      format_triple(*statement.to_triple, options)
    end

    ##
    # Returns the N-Triples representation of a triple.
    #
    # @param  [RDF::Resource] subject
    # @param  [RDF::URI]      predicate
    # @param  [RDF::Term]     object
    # @param  [Hash{Symbol => Object}] options = ({})
    # @return [String]
    def format_triple(subject, predicate, object, options = {})
      "%s %s %s ." % [subject, predicate, object].map { |value| format_term(value, options) }
    end

    ##
    # Returns the N-Triples representation of a blank node.
    #
    # @param  [RDF::Node] node
    # @param  [Hash{Symbol => Object}] options = ({})
    # @option options [Boolean] :unique_bnodes (false)
    #   Serialize node using unique identifier, rather than any used to create the node.
    # @return [String]
    def format_node(node, options = {})
      options[:unique_bnodes] ? node.to_unique_base : node.to_base
    end

    ##
    # Returns the N-Triples representation of a URI reference.
    #
    # @param  [RDF::URI] uri
    # @param  [Hash{Symbol => Object}] options = ({})
    # @return [String]
    def format_uri(uri, options = {})
      uri.to_base
    end

    ##
    # Returns the N-Triples representation of a literal.
    #
    # @param  [RDF::Literal, String, #to_s] literal
    # @param  [Hash{Symbol => Object}] options = ({})
    # @return [String]
    def format_literal(literal, options = {})
      case literal
        when RDF::Literal
          # Note, escaping here is more robust than in Term
          text = quoted(escaped(literal.value))
          text << "@#{literal.language}" if literal.has_language?
          text << "^^<#{uri_for(literal.datatype)}>" if literal.has_datatype?
          text
        else
          quoted(escaped(literal.to_s))
      end
    end

    ##
    # @private
    def escaped(string)
      self.class.escape(string, encoding)
    end
  end
end