# # Fluentd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # require 'fluent/plugin/base' require 'fluent/plugin/owned_by_mixin' require 'fluent/plugin_id' require 'fluent/plugin_helper' require 'fluent/unique_id' require 'fluent/ext_monitor_require' module Fluent module Plugin class Buffer < Base include OwnedByMixin include UniqueId::Mixin include PluginId include MonitorMixin include PluginHelper::Mixin # for metrics class BufferError < StandardError; end class BufferOverflowError < BufferError; end class BufferChunkOverflowError < BufferError; end # A record size is larger than chunk size limit MINIMUM_APPEND_ATTEMPT_RECORDS = 10 DEFAULT_CHUNK_LIMIT_SIZE = 8 * 1024 * 1024 # 8MB DEFAULT_TOTAL_LIMIT_SIZE = 512 * 1024 * 1024 # 512MB, same with v0.12 (BufferedOutput + buf_memory: 64 x 8MB) DEFAULT_CHUNK_FULL_THRESHOLD = 0.95 configured_in :buffer helpers_internal :metrics # TODO: system total buffer limit size in bytes by SystemConfig config_param :chunk_limit_size, :size, default: DEFAULT_CHUNK_LIMIT_SIZE config_param :total_limit_size, :size, default: DEFAULT_TOTAL_LIMIT_SIZE # If user specify this value and (chunk_size * queue_length) is smaller than total_size, # then total_size is automatically configured to that value config_param :queue_limit_length, :integer, default: nil # optional new limitations config_param :chunk_limit_records, :integer, default: nil # if chunk size (or records) is 95% or more after #write, then that chunk will be enqueued config_param :chunk_full_threshold, :float, default: DEFAULT_CHUNK_FULL_THRESHOLD desc 'The max number of queued chunks.' config_param :queued_chunks_limit_size, :integer, default: nil desc 'Compress buffered data.' config_param :compress, :enum, list: [:text, :gzip], default: :text desc 'If true, chunks are thrown away when unrecoverable error happens' config_param :disable_chunk_backup, :bool, default: false Metadata = Struct.new(:timekey, :tag, :variables, :seq) do def initialize(timekey, tag, variables) super(timekey, tag, variables, 0) end def dup_next m = dup m.seq = seq + 1 m end def empty? timekey.nil? && tag.nil? && variables.nil? end def cmp_variables(v1, v2) if v1.nil? && v2.nil? return 0 elsif v1.nil? # v2 is non-nil return -1 elsif v2.nil? # v1 is non-nil return 1 end # both of v1 and v2 are non-nil v1_sorted_keys = v1.keys.sort v2_sorted_keys = v2.keys.sort if v1_sorted_keys != v2_sorted_keys if v1_sorted_keys.size == v2_sorted_keys.size v1_sorted_keys <=> v2_sorted_keys else v1_sorted_keys.size <=> v2_sorted_keys.size end else v1_sorted_keys.each do |k| a = v1[k] b = v2[k] if a && b && a != b return a <=> b elsif a && b || (!a && !b) # same value (including both are nil) next elsif a # b is nil return 1 else # a is nil (but b is non-nil) return -1 end end 0 end end def <=>(o) timekey2 = o.timekey tag2 = o.tag variables2 = o.variables if (!!timekey ^ !!timekey2) || (!!tag ^ !!tag2) || (!!variables ^ !!variables2) # One has value in a field, but another doesn't have value in same field # This case occurs very rarely if timekey == timekey2 # including the case of nil == nil if tag == tag2 cmp_variables(variables, variables2) elsif tag.nil? -1 elsif tag2.nil? 1 else tag <=> tag2 end elsif timekey.nil? -1 elsif timekey2.nil? 1 else timekey <=> timekey2 end else # objects have values in same field pairs (comparison with non-nil and nil doesn't occur here) (timekey <=> timekey2 || 0).nonzero? || # if `a <=> b` is nil, then both are nil (tag <=> tag2 || 0).nonzero? || cmp_variables(variables, variables2) end end # This is an optimization code. Current Struct's implementation is comparing all data. # https://github.com/ruby/ruby/blob/0623e2b7cc621b1733a760b72af246b06c30cf96/struct.c#L1200-L1203 # Actually this overhead is very small but this class is generated *per chunk* (and used in hash object). # This means that this class is one of the most called object in Fluentd. # See https://github.com/fluent/fluentd/pull/2560 def hash timekey.hash end end # for metrics attr_reader :stage_size_metrics, :stage_length_metrics, :queue_size_metrics, :queue_length_metrics attr_reader :available_buffer_space_ratios_metrics, :total_queued_size_metrics attr_reader :newest_timekey_metrics, :oldest_timekey_metrics # for tests attr_reader :stage, :queue, :dequeued, :queued_num def initialize super @chunk_limit_size = nil @total_limit_size = nil @queue_limit_length = nil @chunk_limit_records = nil @stage = {} #=> Hash (metadata -> chunk) : not flushed yet @queue = [] #=> Array (chunks) : already flushed (not written) @dequeued = {} #=> Hash (unique_id -> chunk): already written (not purged) @queued_num = {} # metadata => int (number of queued chunks) @dequeued_num = {} # metadata => int (number of dequeued chunks) @stage_length_metrics = nil @stage_size_metrics = nil @queue_length_metrics = nil @queue_size_metrics = nil @available_buffer_space_ratios_metrics = nil @total_queued_size_metrics = nil @newest_timekey_metrics = nil @oldest_timekey_metrics = nil @timekeys = Hash.new(0) @enable_update_timekeys = false @mutex = Mutex.new end def stage_size @stage_size_metrics.get end def stage_size=(value) @stage_size_metrics.set(value) end def queue_size @queue_size_metrics.get end def queue_size=(value) @queue_size_metrics.set(value) end def persistent? false end def configure(conf) super unless @queue_limit_length.nil? @total_limit_size = @chunk_limit_size * @queue_limit_length end @stage_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_length", help_text: 'Length of stage buffers', prefer_gauge: true) @stage_length_metrics.set(0) @stage_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "stage_byte_size", help_text: 'Total size of stage buffers', prefer_gauge: true) @stage_size_metrics.set(0) # Ensure zero. @queue_length_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_length", help_text: 'Length of queue buffers', prefer_gauge: true) @queue_length_metrics.set(0) @queue_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "queue_byte_size", help_text: 'Total size of queue buffers', prefer_gauge: true) @queue_size_metrics.set(0) # Ensure zero. @available_buffer_space_ratios_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "available_buffer_space_ratios", help_text: 'Ratio of available space in buffer', prefer_gauge: true) @available_buffer_space_ratios_metrics.set(100) # Default is 100%. @total_queued_size_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "total_queued_size", help_text: 'Total size of stage and queue buffers', prefer_gauge: true) @total_queued_size_metrics.set(0) @newest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "newest_timekey", help_text: 'Newest timekey in buffer', prefer_gauge: true) @oldest_timekey_metrics = metrics_create(namespace: "fluentd", subsystem: "buffer", name: "oldest_timekey", help_text: 'Oldest timekey in buffer', prefer_gauge: true) end def enable_update_timekeys @enable_update_timekeys = true end def start super @stage, @queue = resume @stage.each_pair do |metadata, chunk| @stage_size_metrics.add(chunk.bytesize) end @queue.each do |chunk| @queued_num[chunk.metadata] ||= 0 @queued_num[chunk.metadata] += 1 @queue_size_metrics.add(chunk.bytesize) end update_timekeys log.debug "buffer started", instance: self.object_id, stage_size: @stage_size_metrics.get, queue_size: @queue_size_metrics.get end def close super synchronize do log.debug "closing buffer", instance: self.object_id @dequeued.each_pair do |chunk_id, chunk| chunk.close end until @queue.empty? @queue.shift.close end @stage.each_pair do |metadata, chunk| chunk.close end end end def terminate super @dequeued = @stage = @queue = @queued_num = nil @stage_length_metrics = @stage_size_metrics = @queue_length_metrics = @queue_size_metrics = nil @available_buffer_space_ratios_metrics = @total_queued_size_metrics = nil @newest_timekey_metrics = @oldest_timekey_metrics = nil @timekeys.clear end def storable? @total_limit_size > @stage_size_metrics.get + @queue_size_metrics.get end ## TODO: for back pressure feature # def used?(ratio) # @total_limit_size * ratio > @stage_size_metrics.get + @queue_size_metrics.get # end def resume # return {}, [] raise NotImplementedError, "Implement this method in child class" end def generate_chunk(metadata) raise NotImplementedError, "Implement this method in child class" end def new_metadata(timekey: nil, tag: nil, variables: nil) Metadata.new(timekey, tag, variables) end # Keep this method for existing code def metadata(timekey: nil, tag: nil, variables: nil) Metadata.new(timekey, tag, variables) end def timekeys @timekeys.keys end # metadata MUST have consistent object_id for each variation # data MUST be Array of serialized events, or EventStream # metadata_and_data MUST be a hash of { metadata => data } def write(metadata_and_data, format: nil, size: nil, enqueue: false) return if metadata_and_data.size < 1 raise BufferOverflowError, "buffer space has too many data" unless storable? log.on_trace { log.trace "writing events into buffer", instance: self.object_id, metadata_size: metadata_and_data.size } operated_chunks = [] unstaged_chunks = {} # metadata => [chunk, chunk, ...] chunks_to_enqueue = [] staged_bytesizes_by_chunk = {} # track internal BufferChunkOverflowError in write_step_by_step buffer_chunk_overflow_errors = [] begin # sort metadata to get lock of chunks in same order with other threads metadata_and_data.keys.sort.each do |metadata| data = metadata_and_data[metadata] write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize, error| chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads operated_chunks << chunk if chunk.staged? # # https://github.com/fluent/fluentd/issues/2712 # write_once is supposed to write to a chunk only once # but this block **may** run multiple times from write_step_by_step and previous write may be rollbacked # So we should be counting the stage_size only for the last successful write # staged_bytesizes_by_chunk[chunk] = adding_bytesize elsif chunk.unstaged? unstaged_chunks[metadata] ||= [] unstaged_chunks[metadata] << chunk end if error && !error.empty? buffer_chunk_overflow_errors << error end end end return if operated_chunks.empty? # Now, this thread acquires many locks of chunks... getting buffer-global lock causes dead lock. # Any operations needs buffer-global lock (including enqueueing) should be done after releasing locks. first_chunk = operated_chunks.shift # Following commits for other chunks also can finish successfully if the first commit operation # finishes without any exceptions. # In most cases, #commit just requires very small disk spaces, so major failure reason are # permission errors, disk failures and other permanent(fatal) errors. begin first_chunk.commit if enqueue || first_chunk.unstaged? || chunk_size_full?(first_chunk) chunks_to_enqueue << first_chunk end first_chunk.mon_exit rescue operated_chunks.unshift(first_chunk) raise end errors = [] # Buffer plugin estimates there's no serious error cause: will commit for all chunks eigher way operated_chunks.each do |chunk| begin chunk.commit if enqueue || chunk.unstaged? || chunk_size_full?(chunk) chunks_to_enqueue << chunk end chunk.mon_exit rescue => e chunk.rollback chunk.mon_exit errors << e end end # All locks about chunks are released. # # Now update the stage, stage_size with proper locking # FIX FOR stage_size miscomputation - https://github.com/fluent/fluentd/issues/2712 # staged_bytesizes_by_chunk.each do |chunk, bytesize| chunk.synchronize do synchronize { @stage_size_metrics.add(bytesize) } log.on_trace { log.trace { "chunk #{chunk.path} size_added: #{bytesize} new_size: #{chunk.bytesize}" } } end end chunks_to_enqueue.each do |c| if c.staged? && (enqueue || chunk_size_full?(c)) m = c.metadata enqueue_chunk(m) if unstaged_chunks[m] && !unstaged_chunks[m].empty? u = unstaged_chunks[m].pop u.synchronize do if u.unstaged? && !chunk_size_full?(u) # `u.metadata.seq` and `m.seq` can be different but Buffer#enqueue_chunk expect them to be the same value u.metadata.seq = 0 synchronize { @stage[m] = u.staged! @stage_size_metrics.add(u.bytesize) } end end end elsif c.unstaged? enqueue_unstaged_chunk(c) else # previously staged chunk is already enqueued, closed or purged. # no problem. end end operated_chunks.clear if errors.empty? if errors.size > 0 log.warn "error occurs in committing chunks: only first one raised", errors: errors.map(&:class) raise errors.first end ensure operated_chunks.each do |chunk| chunk.rollback rescue nil # nothing possible to do for #rollback failure if chunk.unstaged? chunk.purge rescue nil # to prevent leakage of unstaged chunks end chunk.mon_exit rescue nil # this may raise ThreadError for chunks already committed end unless buffer_chunk_overflow_errors.empty? # Notify delayed BufferChunkOverflowError here raise BufferChunkOverflowError, buffer_chunk_overflow_errors.join(", ") end end end def queue_full? synchronize { @queue.size } >= @queued_chunks_limit_size end def queued_records synchronize { @queue.reduce(0){|r, chunk| r + chunk.size } } end def queued?(metadata = nil, optimistic: false) if optimistic optimistic_queued?(metadata) else synchronize do optimistic_queued?(metadata) end end end def enqueue_chunk(metadata) log.on_trace { log.trace "enqueueing chunk", instance: self.object_id, metadata: metadata } chunk = synchronize do @stage.delete(metadata) end return nil unless chunk chunk.synchronize do synchronize do if chunk.empty? chunk.close else chunk.metadata.seq = 0 # metadata.seq should be 0 for counting @queued_num @queue << chunk @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1 chunk.enqueued! end bytesize = chunk.bytesize @stage_size_metrics.sub(bytesize) @queue_size_metrics.add(bytesize) end end nil end def enqueue_unstaged_chunk(chunk) log.on_trace { log.trace "enqueueing unstaged chunk", instance: self.object_id, metadata: chunk.metadata } synchronize do chunk.synchronize do metadata = chunk.metadata metadata.seq = 0 # metadata.seq should be 0 for counting @queued_num @queue << chunk @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1 chunk.enqueued! end @queue_size_metrics.add(chunk.bytesize) end end def update_timekeys synchronize do chunks = @stage.values chunks.concat(@queue) @timekeys = chunks.each_with_object({}) do |chunk, keys| if chunk.metadata && chunk.metadata.timekey t = chunk.metadata.timekey keys[t] = keys.fetch(t, 0) + 1 end end end end # At flush_at_shutdown, all staged chunks should be enqueued for buffer flush. Set true to force_enqueue for it. def enqueue_all(force_enqueue = false) log.on_trace { log.trace "enqueueing all chunks in buffer", instance: self.object_id } update_timekeys if @enable_update_timekeys if block_given? synchronize{ @stage.keys }.each do |metadata| return if !force_enqueue && queue_full? # NOTE: The following line might cause data race depending on Ruby implementations except CRuby # cf. https://github.com/fluent/fluentd/pull/1721#discussion_r146170251 chunk = @stage[metadata] next unless chunk v = yield metadata, chunk enqueue_chunk(metadata) if v end else synchronize{ @stage.keys }.each do |metadata| return if !force_enqueue && queue_full? enqueue_chunk(metadata) end end end def dequeue_chunk return nil if @queue.empty? log.on_trace { log.trace "dequeueing a chunk", instance: self.object_id } synchronize do chunk = @queue.shift # this buffer is dequeued by other thread just before "synchronize" in this thread return nil unless chunk @dequeued[chunk.unique_id] = chunk @queued_num[chunk.metadata] -= 1 # BUG if nil, 0 or subzero @dequeued_num[chunk.metadata] ||= 0 @dequeued_num[chunk.metadata] += 1 log.trace "chunk dequeued", instance: self.object_id, metadata: chunk.metadata chunk end end def takeback_chunk(chunk_id) log.on_trace { log.trace "taking back a chunk", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id) } synchronize do chunk = @dequeued.delete(chunk_id) return false unless chunk # already purged by other thread @queue.unshift(chunk) log.on_trace { log.trace "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata } @queued_num[chunk.metadata] += 1 # BUG if nil @dequeued_num[chunk.metadata] -= 1 end true end def purge_chunk(chunk_id) metadata = nil synchronize do chunk = @dequeued.delete(chunk_id) return nil unless chunk # purged by other threads metadata = chunk.metadata log.on_trace { log.trace "purging a chunk", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata } begin bytesize = chunk.bytesize chunk.purge @queue_size_metrics.sub(bytesize) rescue => e log.error "failed to purge buffer chunk", chunk_id: dump_unique_id_hex(chunk_id), error_class: e.class, error: e log.error_backtrace end @dequeued_num[chunk.metadata] -= 1 if metadata && !@stage[metadata] && (!@queued_num[metadata] || @queued_num[metadata] < 1) && @dequeued_num[metadata].zero? @queued_num.delete(metadata) @dequeued_num.delete(metadata) end log.on_trace { log.trace "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata } end nil end def clear_queue! log.on_trace { log.trace "clearing queue", instance: self.object_id } synchronize do until @queue.empty? begin q = @queue.shift log.trace("purging a chunk in queue"){ {id: dump_unique_id_hex(chunk.unique_id), bytesize: chunk.bytesize, size: chunk.size} } q.purge rescue => e log.error "unexpected error while clearing buffer queue", error_class: e.class, error: e log.error_backtrace end end @queue_size_metrics.set(0) end end def chunk_size_over?(chunk) chunk.bytesize > @chunk_limit_size || (@chunk_limit_records && chunk.size > @chunk_limit_records) end def chunk_size_full?(chunk) chunk.bytesize >= @chunk_limit_size * @chunk_full_threshold || (@chunk_limit_records && chunk.size >= @chunk_limit_records * @chunk_full_threshold) end class ShouldRetry < StandardError; end # write once into a chunk # 1. append whole data into existing chunk # 2. commit it & return unless chunk_size_over? # 3. enqueue existing chunk & retry whole method if chunk was not empty # 4. go to step_by_step writing def write_once(metadata, data, format: nil, size: nil, &block) return if data.empty? stored = false adding_bytesize = nil chunk = synchronize { @stage[metadata] ||= generate_chunk(metadata).staged! } enqueue_chunk_before_retry = false chunk.synchronize do # retry this method if chunk is already queued (between getting chunk and entering critical section) raise ShouldRetry unless chunk.staged? empty_chunk = chunk.empty? original_bytesize = chunk.bytesize begin if format serialized = format.call(data) chunk.concat(serialized, size ? size.call : data.size) else chunk.append(data, compress: @compress) end adding_bytesize = chunk.bytesize - original_bytesize if chunk_size_over?(chunk) if format && empty_chunk if chunk.bytesize > @chunk_limit_size log.warn "chunk bytes limit exceeds for an emitted event stream: #{adding_bytesize}bytes" else log.warn "chunk size limit exceeds for an emitted event stream: #{chunk.size}records" end end chunk.rollback if format && !empty_chunk # Event streams should be appended into a chunk at once # as far as possible, to improve performance of formatting. # Event stream may be a MessagePackEventStream. We don't want to split it into # 2 or more chunks (except for a case that the event stream is larger than chunk limit). enqueue_chunk_before_retry = true raise ShouldRetry end else stored = true end rescue chunk.rollback raise end if stored block.call(chunk, adding_bytesize) end end unless stored # try step-by-step appending if data can't be stored into existing a chunk in non-bulk mode # # 1/10 size of original event stream (splits_count == 10) seems enough small # to try emitting events into existing chunk. # it does not matter to split event stream into very small splits, because chunks have less # overhead to write data many times (even about file buffer chunks). write_step_by_step(metadata, data, format, 10, &block) end rescue ShouldRetry enqueue_chunk(metadata) if enqueue_chunk_before_retry retry end # EventStream can be split into many streams # because (es1 + es2).to_msgpack_stream == es1.to_msgpack_stream + es2.to_msgpack_stream # 1. split event streams into many (10 -> 100 -> 1000 -> ...) chunks # 2. append splits into the staged chunks as much as possible # 3. create unstaged chunk and append rest splits -> repeat it for all splits def write_step_by_step(metadata, data, format, splits_count, &block) splits = [] if splits_count > data.size splits_count = data.size end slice_size = if data.size % splits_count == 0 data.size / splits_count else data.size / (splits_count - 1) end slice_origin = 0 while slice_origin < data.size splits << data.slice(slice_origin, slice_size) slice_origin += slice_size end # This method will append events into the staged chunk at first. # Then, will generate chunks not staged (not queued) to append rest data. staged_chunk_used = false modified_chunks = [] modified_metadata = metadata get_next_chunk = ->(){ if staged_chunk_used # Staging new chunk here is bad idea: # Recovering whole state including newly staged chunks is much harder than current implementation. modified_metadata = modified_metadata.dup_next generate_chunk(modified_metadata) else synchronize { @stage[modified_metadata] ||= generate_chunk(modified_metadata).staged! } end } writing_splits_index = 0 enqueue_chunk_before_retry = false while writing_splits_index < splits.size chunk = get_next_chunk.call errors = [] modified_chunks << {chunk: chunk, adding_bytesize: 0, errors: errors} chunk.synchronize do raise ShouldRetry unless chunk.writable? staged_chunk_used = true if chunk.staged? original_bytesize = committed_bytesize = chunk.bytesize begin while writing_splits_index < splits.size split = splits[writing_splits_index] formatted_split = format ? format.call(split) : nil if split.size == 1 # Check BufferChunkOverflowError determined_bytesize = nil if @compress != :text determined_bytesize = nil elsif formatted_split determined_bytesize = formatted_split.bytesize elsif split.first.respond_to?(:bytesize) determined_bytesize = split.first.bytesize end if determined_bytesize && determined_bytesize > @chunk_limit_size # It is a obvious case that BufferChunkOverflowError should be raised here. # But if it raises here, already processed 'split' or # the proceeding 'split' will be lost completely. # So it is a last resort to delay raising such a exception errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" writing_splits_index += 1 next end if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size # The split will (might) cause size over so keep already processed # 'split' content here (allow performance regression a bit). chunk.commit committed_bytesize = chunk.bytesize end end if format chunk.concat(formatted_split, split.size) else chunk.append(split, compress: @compress) end adding_bytes = chunk.bytesize - committed_bytesize if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over? chunk.rollback committed_bytesize = chunk.bytesize if split.size == 1 # Check BufferChunkOverflowError again if adding_bytes > @chunk_limit_size errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" writing_splits_index += 1 next else # As already processed content is kept after rollback, then unstaged chunk should be queued. # After that, re-process current split again. # New chunk should be allocated, to do it, modify @stage and so on. synchronize { @stage.delete(modified_metadata) } staged_chunk_used = false chunk.unstaged! break end end if chunk_size_full?(chunk) || split.size == 1 enqueue_chunk_before_retry = true else splits_count *= 10 end raise ShouldRetry end writing_splits_index += 1 if chunk_size_full?(chunk) break end end rescue chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it raise end modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize end end modified_chunks.each do |data| block.call(data[:chunk], data[:adding_bytesize], data[:errors]) end rescue ShouldRetry modified_chunks.each do |data| chunk = data[:chunk] chunk.rollback rescue nil if chunk.unstaged? chunk.purge rescue nil end end enqueue_chunk(metadata) if enqueue_chunk_before_retry retry end STATS_KEYS = [ 'stage_length', 'stage_byte_size', 'queue_length', 'queue_byte_size', 'available_buffer_space_ratios', 'total_queued_size', 'oldest_timekey', 'newest_timekey' ] def statistics stage_size, queue_size = @stage_size_metrics.get, @queue_size_metrics.get buffer_space = 1.0 - ((stage_size + queue_size * 1.0) / @total_limit_size) @stage_length_metrics.set(@stage.size) @queue_length_metrics.set(@queue.size) @available_buffer_space_ratios_metrics.set(buffer_space * 100) @total_queued_size_metrics.set(stage_size + queue_size) stats = { 'stage_length' => @stage_length_metrics.get, 'stage_byte_size' => stage_size, 'queue_length' => @queue_length_metrics.get, 'queue_byte_size' => queue_size, 'available_buffer_space_ratios' => @available_buffer_space_ratios_metrics.get.round(1), 'total_queued_size' => @total_queued_size_metrics.get, } tkeys = timekeys if (m = tkeys.min) @oldest_timekey_metrics.set(m) stats['oldest_timekey'] = @oldest_timekey_metrics.get end if (m = tkeys.max) @newest_timekey_metrics.set(m) stats['newest_timekey'] = @newest_timekey_metrics.get end { 'buffer' => stats } end def backup(chunk_unique_id) unique_id = dump_unique_id_hex(chunk_unique_id) if @disable_chunk_backup log.warn "disable_chunk_backup is true. #{unique_id} chunk is not backed up." return end safe_owner_id = owner.plugin_id.gsub(/[ "\/\\:;|*<>?]/, '_') backup_base_dir = system_config.root_dir || DEFAULT_BACKUP_DIR backup_file = File.join(backup_base_dir, 'backup', "worker#{fluentd_worker_id}", safe_owner_id, "#{unique_id}.log") backup_dir = File.dirname(backup_file) log.warn "bad chunk is moved to #{backup_file}" FileUtils.mkdir_p(backup_dir, mode: system_config.dir_permission || Fluent::DEFAULT_DIR_PERMISSION) unless Dir.exist?(backup_dir) File.open(backup_file, 'ab', system_config.file_permission || Fluent::DEFAULT_FILE_PERMISSION) { |f| yield f } end private def optimistic_queued?(metadata = nil) if metadata n = @queued_num[metadata] n && n.nonzero? else !@queue.empty? end end end end end