#
# Fluentd
#
#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.
#

require 'fluent/plugin/base'
require 'fluent/plugin/owned_by_mixin'
require 'fluent/unique_id'

require 'monitor'

module Fluent
  module Plugin
    class Buffer < Base
      include OwnedByMixin
      include UniqueId::Mixin
      include MonitorMixin

      class BufferError < StandardError; end
      class BufferOverflowError < BufferError; end
      class BufferChunkOverflowError < BufferError; end # A record size is larger than chunk size limit

      MINIMUM_APPEND_ATTEMPT_RECORDS = 10

      DEFAULT_CHUNK_LIMIT_SIZE =   8 * 1024 * 1024 # 8MB
      DEFAULT_TOTAL_LIMIT_SIZE = 512 * 1024 * 1024 # 512MB, same with v0.12 (BufferedOutput + buf_memory: 64 x 8MB)

      DEFAULT_CHUNK_FULL_THRESHOLD = 0.95

      configured_in :buffer

      # TODO: system total buffer limit size in bytes by SystemConfig

      config_param :chunk_limit_size, :size, default: DEFAULT_CHUNK_LIMIT_SIZE
      config_param :total_limit_size, :size, default: DEFAULT_TOTAL_LIMIT_SIZE

      # If user specify this value and (chunk_size * queue_length) is smaller than total_size,
      # then total_size is automatically configured to that value
      config_param :queue_length_limit, :integer, default: nil

      # optional new limitations
      config_param :chunk_records_limit, :integer, default: nil

      # if chunk size (or records) is 95% or more after #write, then that chunk will be enqueued
      config_param :chunk_full_threshold, :float, default: DEFAULT_CHUNK_FULL_THRESHOLD

      Metadata = Struct.new(:timekey, :tag, :variables)

      # for tests
      attr_accessor :stage_size, :queue_size
      attr_reader :stage, :queue, :dequeued, :queued_num

      def initialize
        super

        @chunk_limit_size = nil
        @total_limit_size = nil
        @queue_length_limit = nil
        @chunk_records_limit = nil

        @stage = {}    #=> Hash (metadata -> chunk) : not flushed yet
        @queue = []    #=> Array (chunks)           : already flushed (not written)
        @dequeued = {} #=> Hash (unique_id -> chunk): already written (not purged)
        @queued_num = {} # metadata => int (number of queued chunks)

        @stage_size = @queue_size = 0
        @metadata_list = [] # keys of @stage
      end

      def persistent?
        false
      end

      def configure(conf)
        super

        unless @queue_length_limit.nil?
          @total_limit_size = @chunk_limit_size * @queue_length_limit
        end
      end

      def start
        super

        @stage, @queue = resume
        @stage.each_pair do |metadata, chunk|
          @metadata_list << metadata unless @metadata_list.include?(metadata)
          @stage_size += chunk.bytesize
        end
        @queue.each do |chunk|
          @metadata_list << chunk.metadata unless @metadata_list.include?(chunk.metadata)
          @queued_num[chunk.metadata] ||= 0
          @queued_num[chunk.metadata] += 1
          @queue_size += chunk.bytesize
        end
      end

      def close
        super
        synchronize do
          @dequeued.each_pair do |chunk_id, chunk|
            chunk.close
          end
          until @queue.empty?
            @queue.shift.close
          end
          @stage.each_pair do |metadata, chunk|
            chunk.close
          end
        end
      end

      def terminate
        super
        @dequeued = @stage = @queue = @queued_num = @metadata_list = nil
        @stage_size = @queue_size = 0
      end

      def storable?
        @total_limit_size > @stage_size + @queue_size
      end

      ## TODO: for back pressure feature
      # def used?(ratio)
      #   @total_size_limit * ratio > @stage_size + @queue_size
      # end

      def resume
        # return {}, []
        raise NotImplementedError, "Implement this method in child class"
      end

      def generate_chunk(metadata)
        raise NotImplementedError, "Implement this method in child class"
      end

      def metadata_list
        synchronize do
          @metadata_list.dup
        end
      end

      def new_metadata(timekey: nil, tag: nil, variables: nil)
        Metadata.new(timekey, tag, variables)
      end

      def add_metadata(metadata)
        synchronize do
          if i = @metadata_list.index(metadata)
            @metadata_list[i]
          else
            @metadata_list << metadata
            metadata
          end
        end
      end

      def metadata(timekey: nil, tag: nil, variables: nil)
        meta = new_metadata(timekey: timekey, tag: tag, variables: variables)
        add_metadata(meta)
      end

      # metadata MUST have consistent object_id for each variation
      # data MUST be Array of serialized events
      # metadata_and_data MUST be a hash of { metadata => data }
      def write(metadata_and_data, bulk: false, enqueue: false)
        return if metadata_and_data.size < 1
        raise BufferOverflowError, "buffer space has too many data" unless storable?

        staged_bytesize = 0
        operated_chunks = []

        begin
          metadata_and_data.each do |metadata, data|
            write_once(metadata, data, bulk: bulk) do |chunk, adding_bytesize|
              chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads
              operated_chunks << chunk
              staged_bytesize += adding_bytesize
            end
          end

          return if operated_chunks.empty?

          first_chunk = operated_chunks.shift
          # Following commits for other chunks also can finish successfully if the first commit operation
          # finishes without any exceptions.
          # In most cases, #commit just requires very small disk spaces, so major failure reason are
          # permission errors, disk failures and other permanent(fatal) errors.
          begin
            first_chunk.commit
            enqueue_chunk(first_chunk.metadata) if enqueue || chunk_size_full?(first_chunk)
            first_chunk.mon_exit
          rescue
            operated_chunks.unshift(first_chunk)
            raise
          end

          errors = []
          # Buffer plugin estimates there's no serious error cause: will commit for all chunks eigher way
          operated_chunks.each do |chunk|
            begin
              chunk.commit
              enqueue_chunk(chunk.metadata) if enqueue || chunk_size_full?(chunk)
              chunk.mon_exit
            rescue => e
              chunk.rollback
              chunk.mon_exit
              errors << e
            end
          end
          operated_chunks.clear if errors.empty?

          @stage_size += staged_bytesize

          if errors.size > 0
            log.warn "error occurs in committing chunks: only first one raised", errors: errors.map(&:class)
            raise errors.first
          end
        ensure
          operated_chunks.each do |chunk|
            chunk.rollback rescue nil # nothing possible to do for #rollback failure
            chunk.mon_exit rescue nil # this may raise ThreadError for chunks already committed
          end
        end
      end

      def queued_records
        synchronize { @queue.reduce(0){|r, chunk| r + chunk.size } }
      end

      def queued?(metadata=nil)
        synchronize do
          if metadata
            n = @queued_num[metadata]
            n && n.nonzero?
          else
            !@queue.empty?
          end
        end
      end

      def enqueue_chunk(metadata)
        synchronize do
          chunk = @stage.delete(metadata)
          return nil unless chunk

          chunk.synchronize do
            if chunk.empty?
              chunk.close
            else
              @queue << chunk
              @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1
              chunk.enqueued! if chunk.respond_to?(:enqueued!)
            end
          end
          bytesize = chunk.bytesize
          @stage_size -= bytesize
          @queue_size += bytesize
        end
        nil
      end

      def enqueue_all
        synchronize do
          if block_given?
            @stage.keys.each do |metadata|
              chunk = @stage[metadata]
              v = yield metadata, chunk
              enqueue_chunk(metadata) if v
            end
          else
            @stage.keys.each do |metadata|
              enqueue_chunk(metadata)
            end
          end
        end
      end

      def dequeue_chunk
        return nil if @queue.empty?
        synchronize do
          chunk = @queue.shift

          # this buffer is dequeued by other thread just before "synchronize" in this thread
          return nil unless chunk

          @dequeued[chunk.unique_id] = chunk
          @queued_num[chunk.metadata] -= 1 # BUG if nil, 0 or subzero
          chunk
        end
      end

      def takeback_chunk(chunk_id)
        synchronize do
          chunk = @dequeued.delete(chunk_id)
          return false unless chunk # already purged by other thread
          @queue.unshift(chunk)
          @queued_num[chunk.metadata] += 1 # BUG if nil
        end
        true
      end

      def purge_chunk(chunk_id)
        synchronize do
          chunk = @dequeued.delete(chunk_id)
          return nil unless chunk # purged by other threads

          metadata = chunk.metadata
          begin
            bytesize = chunk.bytesize
            chunk.purge
            @queue_size -= bytesize
          rescue => e
            log.error "failed to purge buffer chunk", chunk_id: dump_unique_id_hex(chunk_id), error_class: e.class, error: e
          end

          if metadata && !@stage[metadata] && (!@queued_num[metadata] || @queued_num[metadata] < 1)
            @metadata_list.delete(metadata)
          end
        end
        nil
      end

      def clear_queue!
        synchronize do
          until @queue.empty?
            begin
              q = @queue.shift
              log.debug("purging a chunk in queue"){ {id: dump_unique_id_hex(chunk.unique_id), bytesize: chunk.bytesize, size: chunk.size} }
              q.purge
            rescue => e
              log.error "unexpected error while clearing buffer queue", error_class: e.class, error: e
            end
          end
          @queue_size = 0
        end
      end

      def chunk_size_over?(chunk)
        chunk.bytesize > @chunk_limit_size || (@chunk_records_limit && chunk.size > @chunk_records_limit)
      end

      def chunk_size_full?(chunk)
        chunk.bytesize >= @chunk_limit_size * @chunk_full_threshold || (@chunk_records_limit && chunk.size >= @chunk_records_limit * @chunk_full_threshold)
      end

      class ShouldRetry < StandardError; end

      def write_once(metadata, data, bulk: false, &block)
        return if !bulk && (data.nil? || data.empty?)
        return if bulk && (data.empty? || data.first.nil? || data.first.empty?)

        stored = false
        adding_bytesize = nil

        chunk = synchronize { @stage[metadata] ||= generate_chunk(metadata) }
        enqueue_list = []

        chunk.synchronize do
          # retry this method if chunk is already queued (between getting chunk and entering critical section)
          raise ShouldRetry unless chunk.staged?

          empty_chunk = chunk.empty?

          original_bytesize = chunk.bytesize
          begin
            if bulk
              content, size = data
              chunk.concat(content, size)
            else
              chunk.append(data)
            end
            adding_bytesize = chunk.bytesize - original_bytesize

            if chunk_size_over?(chunk)
              if empty_chunk && bulk
                log.warn "chunk bytes limit exceeds for a bulk event stream: #{bulk.bytesize}bytes"
                stored = true
              else
                chunk.rollback
              end
            else
              stored = true
            end
          rescue
            chunk.rollback
            raise
          end

          if stored
            block.call(chunk, adding_bytesize)
          elsif bulk
            # this metadata might be enqueued already by other threads
            # but #enqueue_chunk does nothing in such case
            enqueue_list << metadata
            raise ShouldRetry
          end
        end

        unless stored
          # try step-by-step appending if data can't be stored into existing a chunk in non-bulk mode
          write_step_by_step(metadata, data, data.size / 3, &block)
        end
      rescue ShouldRetry
        enqueue_list.each do |m|
          enqueue_chunk(m)
        end
        retry
      end

      def write_step_by_step(metadata, data, attempt_records, &block)
        while data.size > 0
          if attempt_records < MINIMUM_APPEND_ATTEMPT_RECORDS
            attempt_records = MINIMUM_APPEND_ATTEMPT_RECORDS
          end

          chunk = synchronize{ @stage[metadata] ||= generate_chunk(metadata) }
          chunk.synchronize do # critical section for chunk (chunk append/commit/rollback)
            raise ShouldRetry unless chunk.staged?
            begin
              empty_chunk = chunk.empty?
              original_bytesize = chunk.bytesize

              attempt = data.slice(0, attempt_records)
              chunk.append(attempt)
              adding_bytesize = (chunk.bytesize - original_bytesize)

              if chunk_size_over?(chunk)
                chunk.rollback

                if attempt_records <= MINIMUM_APPEND_ATTEMPT_RECORDS
                  if empty_chunk # record is too large even for empty chunk
                    raise BufferChunkOverflowError, "minimum append butch exceeds chunk bytes limit"
                  end
                  # no more records for this chunk -> enqueue -> to be flushed
                  enqueue_chunk(metadata) # `chunk` will be removed from stage
                  attempt_records = data.size # fresh chunk may have enough space
                else
                  # whole data can be processed by twice operation
                  #  ( by using apttempt /= 2, 3 operations required for odd numbers of data)
                  attempt_records = (attempt_records / 2) + 1
                end

                next
              end

              block.call(chunk, adding_bytesize)
              data.slice!(0, attempt_records)
              # same attempt size
              nil # discard return value of data.slice!() immediately
            rescue
              chunk.rollback
              raise
            end
          end
        end
      rescue ShouldRetry
        retry
      end # write_step_by_step
    end
  end
end