require 'rdf'
require 'rdf/ntriples'
require 'rdf/nquads'
require 'rdf/vocab/writer'
require 'logger'
require 'optparse'
begin
  require 'linkeddata'
rescue LoadError
  # Silently load without linkeddata, but try some others
  %w(reasoner rdfa rdfxml turtle vocab json/ld ld/patch).each do |ser|
    begin
      require ser.include?('/') ? ser : "rdf/#{ser}"
    rescue LoadError
    end
  end
end

class OptionParser
  def options; @options || {}; end
  def options=(value); @options = value; end
end

module RDF
  # Individual formats can modify options by updating {Reader.options} or {Writer.options}. Format-specific commands are taken from {Format.cli_commands} for each loaded format, which returns an array of lambdas taking arguments and options.
  #
  # Other than `help`, all commands parse an input file.
  #
  # Multiple commands may be added in sequence to execute a pipeline.
  #
  # @example Creating Reader-specific options:
  #   class Reader
  #     def self.options
  #       [
  #         RDF::CLI::Option.new(
  #           symbol: :canonicalize,
  #           datatype: TrueClass,
  #           on: ["--canonicalize"],
  #           description: "Canonicalize input/output.") {true},
  #         RDF::CLI::Option.new(
  #           symbol: :uri,
  #           datatype: RDF::URI,
  #           on: ["--uri STRING"],
  #           description: "URI.") {|v| RDF::URI(v)},
  #       ]
  #     end
  #
  # @example Creating Format-specific commands:
  #   class Format
  #     def self.cli_commands
  #       {
  #         count: {
  #           description: "",
  #           parse: true,
  #           lambda: ->(argv, opts) {}
  #         },
  #       }
  #     end
  #
  # @example Adding a command manually
  #   class MyCommand
  #     RDF::CLI.add_command(:count, description: "Count statements") do |argv, opts|
  #       count = 0
  #       RDF::CLI.parse(argv, opts) do |reader|
  #         reader.each_statement do |statement|
  #           count += 1
  #         end
  #       end
  #       $stdout.puts "Parsed #{count} statements"
  #     end
  #   end
  #     
  # Format-specific commands should verify that the reader and/or output format are appropriate for the command.
  class CLI

    # Option description for use within Readers/Writers. See {RDF::Reader.options} and {RDF::Writer.options} for example usage.
    class Option
      # Symbol used for this option when calling `Reader.new`
      # @return [Symbol]
      attr_reader :symbol

      # Arguments passed to OptionParser#on
      # @return [Array<String>]
      attr_reader :on

      # Description of this option (optional)
      # @return [String]
      attr_reader :description

      # Argument datatype, which may be enumerated string values
      # @return [Class, Array<String>]
      attr_reader :datatype

      # Allows multiple comma-spearated values.
      # @return [Boolean]
      attr_reader :multiple

      ##
      # Create a new option with optional callback.
      #
      # @param [Symbol] symbol
      # @param [Array<String>] on
      # @param [String] description
      # @param [Class, Array<String>] datatype of value
      # @param [Boolean] multiple can have multiple comma-separated values
      # @yield value which may be used within `OptionParser#on`
      # @yieldparam [Object] value The option value as parsed using `on` argument
      # @yieldreturn [Object] a possibly modified input value
      def initialize(symbol: nil, on: nil, description: nil, datatype: String, multiple: false, &block)
        raise ArgumentError, "symbol is a required argument" unless symbol
        raise ArgumentError, "on is a required argument" unless on
        @symbol, @on, @description, @datatype, @multiple, @callback = symbol.to_sym, Array(on), description, datatype, multiple, block
      end

      def call(arg)
        @callback ? @callback.call(arg) : arg
      end
    end

    # @private
    COMMANDS = {
      count: {
        description: "Count statements in parsed input",
        parse: false,
        help: "count [options] [args...]\nreturns number of parsed statements",
        lambda: ->(argv, opts) do
          unless repository.count > 0
            start = Time.new
            count = 0
            self.parse(argv, opts) do |reader|
              reader.each_statement do |statement|
                count += 1
              end
            end
            secs = Time.new - start
            $stdout.puts "Parsed #{count} statements with #{@readers.join(', ')} in #{secs} seconds @ #{count/secs} statements/second."
          end
        end
      },
      help: {
        description: "This message",
        parse: false,
        lambda: ->(argv, opts) {self.usage(self.options)}
      },
      lengths: {
        description: "Lengths of each parsed statement",
        parse: true,
        help: "lengths [options] [args...]\nreturns statement lengths",
        lambda: ->(argv, opts) do
          repository.each_statement do |statement|
            $stdout.puts statement.to_s.size
          end
        end
      },
      objects: {
        description: "Serialize each parsed object to N-Triples",
        parse: true,
        help: "objects [options] [args...]\nreturns unique objects",
        lambda: ->(argv, opts) do
          $stdout.puts "Objects"
          repository.each_object do |object|
            $stdout.puts object.to_ntriples
          end
        end
      },
      predicates: {
        description: "Serialize each parsed predicate to N-Triples",
        parse: true,
        help: "predicates [options] [args...]\nreturns unique predicates",
        lambda: ->(argv, opts) do
          $stdout.puts "Predicates"
          repository.each_predicate do |predicate|
            $stdout.puts predicate.to_ntriples
          end
        end
      },
      serialize: {
        description: "Serialize each parsed statement to N-Triples, or the specified output format",
        parse: true,
        help: "serialize [options] [args...]\nserialize output using specified format (or n-triples if not specified)",
        lambda: ->(argv, opts) do
          writer_class = RDF::Writer.for(opts[:output_format]) || RDF::NTriples::Writer
          out = opts[:output] || $stdout
          opts = opts.merge(prefixes: {})
          writer_opts = opts.merge(standard_prefixes: true)
          writer_class.new(out, writer_opts) do |writer|
            writer << repository
          end
        end
      },
      subjects: {
        description: "Serialize each parsed subject to N-Triples",
        parse: true,
        help: "subjects [options] [args...]\nreturns unique subjects",
        lambda: ->(argv, opts) do
          $stdout.puts "Subjects"
          repository.each_subject do |subject|
            $stdout.puts subject.to_ntriples
          end
        end
      },
      validate: {
        description: "Validate parsed input",
        parse: true,
        help: "validate [options] [args...]\nvalidates parsed input (may also be used with --validate)",
        lambda: ->(argv, opts) do
          $stdout.puts "Input is " + (repository.valid? ? "" : "in") + "valid"
        end
      }
    }

    class << self
      # Repository containing parsed statements
      # @return [RDF::Repository]
      attr_accessor :repository
    end

    ##
    # @return [String]
    def self.basename() File.basename($0) end

    ##
    # @yield  [options]
    # @yieldparam [OptionParser]
    # @return [OptionParser]
    def self.options(&block)
      options = OptionParser.new
      logger = Logger.new($stderr)
      logger.level = Logger::ERROR
      logger.formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
      opts = options.options = {
        debug:          false,
        evaluate:       nil,
        format:         nil,
        output:         $stdout,
        output_format:  :ntriples,
        logger:         logger
      }

      # Add default Reader and Writer options
      RDF::Reader.options.each do |cli_opt|
        next if opts.has_key?(cli_opt.symbol)
        on_args = cli_opt.on || []
        on_args << cli_opt.description if cli_opt.description
        options.on(*on_args) do |arg|
          opts[cli_opt.symbol] = cli_opt.call(arg)
        end
      end
      RDF::Writer.options.each do |cli_opt|
        next if opts.has_key?(cli_opt.symbol)
        on_args = cli_opt.on || []
        on_args << cli_opt.description if cli_opt.description
        options.on(*on_args) do |arg|
          opts[cli_opt.symbol] = cli_opt.call(arg)
        end
      end

      # Command-specific options
      if block_given?
        case block.arity
          when 1 then block.call(options)
          else options.instance_eval(&block)
        end
      end
      options.banner = "Usage: #{self.basename} command+ [options] [args...]"

      options.on('-d', '--debug',   'Enable debug output for troubleshooting.') do
        opts[:logger].level = Logger::DEBUG
      end

      options.on("-e", "--evaluate STRING", "Evaluate argument as RDF input, if no files are specified") do |arg|
        opts[:evaluate] = arg
      end

      options.on("--input-format FORMAT", "--format FORMAT", "Format of input file, uses heuristic if not specified") do |arg|
        unless reader = RDF::Reader.for(arg.downcase.to_sym)
          self.abort "No reader found for #{arg.downcase.to_sym}. Available readers:\n  #{self.formats(reader: true).join("\n  ")}"
        end

        # Add format-specific reader options
        reader.options.each do |cli_opt|
          next if opts.has_key?(cli_opt.symbol)
          on_args = cli_opt.on || []
          on_args << cli_opt.description if cli_opt.description
          options.on(*on_args) do |arg|
            opts[cli_opt.symbol] = cli_opt.call(arg)
          end
        end
        opts[:format] = arg.downcase.to_sym
      end

      options.on("-o", "--output FILE", "File to write output, defaults to STDOUT") do |arg|
        opts[:output] = File.open(arg, "w")
      end

      options.on("--output-format FORMAT", "Format of output file, defaults to NTriples") do |arg|
        unless writer = RDF::Writer.for(arg.downcase.to_sym)
          self.abort "No writer found for #{arg.downcase.to_sym}. Available writers:\n  #{self.formats(writer: true).join("\n  ")}"
        end

        # Add format-specific writer options
        writer.options.each do |cli_opt|
          next if opts.has_key?(cli_opt.symbol)
          on_args = cli_opt.on || []
          on_args << cli_opt.description if cli_opt.description
          options.on(*on_args) do |arg|
            opts[cli_opt.symbol] = cli_opt.call(arg)
          end
        end
        opts[:output_format] = arg.downcase.to_sym
      end

      options.on_tail("-h", "--help", "Show this message") do
        self.usage(options)
        exit(0)
      end

      begin
        options.parse!
      rescue OptionParser::InvalidOption => e
        abort e
      end

      options
    end

    ##
    # Output usage message
    def self.usage(options, banner: nil)
      options.banner = banner if banner
      $stdout.puts options
      $stdout.puts "Note: available commands and options may be different depending on selected --input-format and/or --output-format."
      $stdout.puts "Available commands:\n\t#{self.commands.join("\n\t")}"
      $stdout.puts "Available formats:\n\t#{(self.formats).join("\n\t")}"
    end

    ##
    # Execute one or more commands, parsing input as necessary
    #
    # @param  [Array<String>] args
    # @param  [IO] output
    # @param  [Hash{Symbol => Object}] options
    # @return [Boolean]
    def self.exec(args, output: $stdout, option_parser: self.options, **options)
      output.set_encoding(Encoding::UTF_8) if output.respond_to?(:set_encoding) && RUBY_PLATFORM == "java"
      cmds, args = args.partition {|e| commands.include?(e.to_s)}

      if cmds.empty?
        usage(option_parser)
        abort "No command given"
      end

      if cmds.first == 'help'
        on_cmd = cmds[1]
        if on_cmd && COMMANDS.fetch(on_cmd.to_sym, {})[:help]
          usage(option_parser, banner: "Usage: #{self.basename.split('/').last} #{COMMANDS[on_cmd.to_sym][:help]}")
        else
          usage(option_parser)
        end
        return
      end

      @repository = RDF::Repository.new

      # Parse input files if any command requires it
      if cmds.any? {|c| COMMANDS[c.to_sym][:parse]}
        start = Time.new
        count = 0
        self.parse(args, options) do |reader|
          @repository << reader
        end
        secs = Time.new - start
        $stdout.puts "Parsed #{repository.count} statements with #{@readers.join(', ')} in #{secs} seconds @ #{count/secs} statements/second."
      end

      # Run each command in sequence
      cmds.each do |command|
        COMMANDS[command.to_sym][:lambda].call(args, output: output, **options)
      end
    rescue ArgumentError => e
      abort e.message
    end

    ##
    # @return [Array<String>] list of executable commands
    def self.commands
      # First, load commands from other formats
      unless @commands_loaded
        RDF::Format.each do |format|
          format.cli_commands.each do |command, options|
            options = {lambda: options} unless options.is_a?(Hash)
            add_command(command, options)
          end
        end
        @commands_loaded = true
      end
      COMMANDS.keys.map(&:to_s).sort
    end

    ##
    # Add a command.
    #
    # @param [#to_sym] command
    # @param [Hash{Symbol => String}] options
    # @option options [String] description
    # @option options [String] help string to display for help
    # @option options [Boolean] parse parse input files in to Repository, or not.
    # @option options [Array<RDF::CLI::Option>] options specific to this command
    # @yield argv, opts
    # @yieldparam [Array<String>] argv
    # @yieldparam [Hash] opts
    # @yieldreturn [void]
    def self.add_command(command, **options, &block)
      options[:lambda] = block if block_given?
      COMMANDS[command.to_sym] ||= options
    end

    ##
    # @return [Array<String>] list of available formats
    def self.formats(reader: false, writer: false)
      f = RDF::Format.sort_by(&:to_sym).each.
        select {|f| (reader ? f.reader : (writer ? f.writer : (f.reader || f.writer)))}.
        inject({}) do |memo, reader|
          memo.merge(reader.to_sym => reader.name)
      end
      sym_len = f.keys.map {|k| k.to_s.length}.max
      f.map {|s, t| "%*s: %s" % [sym_len, s, t]}
    end

    ##
    # Parse each file, $stdin or specified string in `options[:evaluate]`
    # yielding a reader
    #
    # @param  [Array<String>] files
    # @param  [String] evaluate from command-line, rather than referenced file
    # @param  [Symbol] format (:ntriples) Reader symbol for finding reader
    # @param  [Encoding] encoding set on the input
    # @param  [Hash{Symbol => Object}] options sent to reader
    # @yield  [reader]
    # @yieldparam [RDF::Reader]
    # @return [nil]
    def self.parse(files, evaluate: nil, format: :ntriples, encoding: Encoding::UTF_8, **options, &block)
      if files.empty?
        # If files are empty, either use options[:execute]
        input = evaluate ? StringIO.new(evaluate) : $stdin
        input.set_encoding(encoding)
        r = RDF::Reader.for(format)
        (@readers ||= []) << r
        r.new(input, options) do |reader|
          yield(reader)
        end
      else
        options[:format] = format if format
        files.each do |file|
          RDF::Reader.open(file, options) do |reader|
            (@readers ||= []) << reader.class.to_s
            yield(reader)
          end
        end
      end
    end

    ##
    # @param  [String] msg
    # @return [void]
    def self.abort(msg)
      Kernel.abort "#{basename}: #{msg}"
    end
  end
end