module Torch
  module NN
    class RNNBase < Module
      def initialize(mode, input_size, hidden_size, num_layers: 1, bias: true,
        batch_first: false, dropout: 0.0, bidirectional: false)

        super()
        @mode = mode
        @input_size = input_size
        @hidden_size = hidden_size
        @num_layers = num_layers
        @bias = bias
        @batch_first = batch_first
        @dropout = dropout.to_f
        @bidirectional = bidirectional
        num_directions = bidirectional ? 2 : 1

        if !dropout.is_a?(Numeric) || !(dropout >= 0 && dropout <= 1)
          raise ArgumentError, "dropout should be a number in range [0, 1] " +
                               "representing the probability of an element being " +
                               "zeroed"
        end
        if dropout > 0 && num_layers == 1
          warn "dropout option adds dropout after all but last " +
               "recurrent layer, so non-zero dropout expects " +
               "num_layers greater than 1, but got dropout=#{dropout} and " +
               "num_layers=#{num_layers}"
        end

        gate_size =
          case mode
          when "LSTM"
            4 * hidden_size
          when "GRU"
            3 * hidden_size
          when "RNN_TANH"
            hidden_size
          when "RNN_RELU"
            hidden_size
          else
            raise ArgumentError, "Unrecognized RNN mode: #{mode}"
          end

        @all_weights = []
        num_layers.times do |layer|
          num_directions.times do |direction|
            layer_input_size = layer == 0 ? input_size : hidden_size * num_directions

            w_ih = Parameter.new(Torch::Tensor.new(gate_size, layer_input_size))
            w_hh = Parameter.new(Torch::Tensor.new(gate_size, hidden_size))
            b_ih = Parameter.new(Torch::Tensor.new(gate_size))
            # Second bias vector included for CuDNN compatibility. Only one
            # bias vector is needed in standard definition.
            b_hh = Parameter.new(Torch::Tensor.new(gate_size))
            layer_params = [w_ih, w_hh, b_ih, b_hh]

            suffix = direction == 1 ? "_reverse" : ""
            param_names = ["weight_ih_l%s%s", "weight_hh_l%s%s"]
            if bias
              param_names += ["bias_ih_l%s%s", "bias_hh_l%s%s"]
            end
            param_names.map! { |x| x % [layer, suffix] }

            param_names.zip(layer_params) do |name, param|
              instance_variable_set("@#{name}", param)
            end
            @all_weights << param_names
          end
        end

        flatten_parameters
        reset_parameters
      end

      def flatten_parameters
        # no-op unless module is on the GPU and cuDNN is enabled
      end

      def _apply(fn)
        ret = super
        flatten_parameters
        ret
      end

      def reset_parameters
        stdv = 1.0 / Math.sqrt(@hidden_size)
        parameters.each do |weight|
          Init.uniform!(weight, a: -stdv, b: stdv)
        end
      end

      def permute_hidden(hx, permutation)
        raise NotImplementedYet
      end

      def forward(input, hx: nil)
        raise NotImplementedYet

        is_packed = false # TODO isinstance(input, PackedSequence)
        if is_packed
          input, batch_sizes, sorted_indices, unsorted_indices = input
          max_batch_size = batch_sizes[0]
          max_batch_size = max_batch_size.to_i
        else
          batch_sizes = nil
          max_batch_size = @batch_first ? input.size(0) : input.size(1)
          sorted_indices = nil
          unsorted_indices = nil
        end

        if hx.nil?
          num_directions = @bidirectional ? 2 : 1
          hx = Torch.zeros(@num_layers * num_directions, max_batch_size,
            @hidden_size, dtype: input.dtype, device: input.device)
        else
          # Each batch of the hidden state should match the input sequence that
          # the user believes he/she is passing in.
          hx = permute_hidden(hx, sorted_indices)
        end

        check_forward_args(input, hx, batch_sizes)
        _rnn_impls = {
          "RNN_TANH" => Torch.method(:_rnn_tanh),
          "RNN_RELU" => Torch.method(:_rnn_relu)
        }
        _impl = _rnn_impls[@mode]
        if batch_sizes.nil?
          result = _impl.call(input, hx, _get_flat_weights, @bias, @num_layers,
                           @dropout, @training, @bidirectional, @batch_first)
        else
          result = _impl.call(input, batch_sizes, hx, _get_flat_weights, @bias,
                           @num_layers, @dropout, @training, @bidirectional)
        end
        output = result[0]
        hidden = result[1]

        if is_packed
          raise NotImplementedYet
          # output = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
        end
        [output, permute_hidden(hidden, unsorted_indices)]
      end

      # TODO add more parameters
      def extra_inspect
        s = String.new("%{input_size}, %{hidden_size}")
        if @num_layers != 1
          s += ", num_layers: %{num_layers}"
        end
        format(s, input_size: @input_size, hidden_size: @hidden_size, num_layers: @num_layers)
      end
    end
  end
end