lib/red_amber/subframes.rb in red_amber-0.4.1 vs lib/red_amber/subframes.rb in red_amber-0.4.2

- old
+ new

@@ -8,10 +8,42 @@ include Helper using RefineArray using RefineArrayLike + # Entity to select sub-dataframes + class Selectors + attr_reader :selectors, :size, :sizes + + def initialize(selectors) + @selectors = selectors + @size = selectors.size + @sizes = [] + end + + def each + @selectors.each + end + end + + # Boolean selectors of sub-dataframes + class Filters < Selectors + def sizes + # count true + @sizes = @selectors.map { |s| s.to_a.count { _1 } } # rubocop:disable Performance/Size + end + end + + # Index selectors of sub-dataframes + class Indices < Selectors + def sizes + @sizes = @selectors.map(&:size) + end + end + + private_constant :Selectors, :Filters, :Indices + class << self # Create SubFrames from a Group. # # [Experimental feature] this method may be removed or be changed in the future. # @param group [Group] @@ -77,17 +109,12 @@ # @since 0.4.0 # def by_indices(dataframe, subset_indices) instance = allocate instance.instance_variable_set(:@baseframe, dataframe) - enum = - Enumerator.new(subset_indices.size) do |y| - subset_indices.each do |i| - y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.take(i)) - end - end - instance.instance_variable_set(:@enum, enum) + instance.instance_variable_set(:@selectors, Indices.new(subset_indices)) + instance.instance_variable_set(:@frames, []) instance end # Create a new SubFrames object from a DataFrame and an array of filters. # @@ -103,17 +130,12 @@ # @since 0.4.0 # def by_filters(dataframe, subset_filters) instance = allocate instance.instance_variable_set(:@baseframe, dataframe) - enum = - Enumerator.new(subset_filters.size) do |y| - subset_filters.each do |i| - y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.filter(i)) - end - end - instance.instance_variable_set(:@enum, enum) + instance.instance_variable_set(:@selectors, Filters.new(subset_filters)) + instance.instance_variable_set(:@frames, []) instance end # Create a new SubFrames from an Array of DataFrames. # @@ -128,22 +150,17 @@ def by_dataframes(dataframes) instance = allocate case Array(dataframes) when [] || [nil] instance.instance_variable_set(:@baseframe, DataFrame.new) + instance.instance_variable_set(:@selectors, []) instance.instance_variable_set(:@frames, []) - enum = [].each else - enum = - Enumerator.new(dataframes.size) do |y| - dataframes.each do |i| - y.yield i - end - end - instance.instance_variable_set(:@baseframe, enum.lazy) + instance.instance_variable_set(:@baseframe, nil) + instance.instance_variable_set(:@selectors, nil) + instance.instance_variable_set(:@frames, dataframes) end - instance.instance_variable_set(:@enum, enum) instance end private @@ -259,59 +276,49 @@ # 1 3 B false # 2 6 C false # # @since 0.4.0 # - def initialize(dataframe, subset_specifier = nil, &block) + def initialize(dataframe, selectors = nil, &block) unless dataframe.is_a?(DataFrame) raise SubFramesArgumentError, "not a DataFrame: #{dataframe}" end if block - unless subset_specifier.nil? + unless selectors.nil? raise SubFramesArgumentError, 'Must not specify both arguments and block.' end - subset_specifier = yield(dataframe) + selectors = yield(dataframe) end - if dataframe.empty? || subset_specifier.nil? || subset_specifier.empty? + if dataframe.empty? || selectors.nil? || selectors.empty? @baseframe = DataFrame.new - @frames = [] - @enum = @frames.each + @selectors = Selectors.new([]) else - @baseframe = nil - @enum = - Enumerator.new(subset_specifier.size) do |yielder| - subset_specifier.map do |i| - df = - if i.numeric? - dataframe.take(i) - elsif i.boolean? - dataframe.filter(i) - else - raise SubFramesArgumentError, "illegal type: #{i}" - end - yielder.yield DataFrame.new_dataframe_with_schema(dataframe, df) - end + @baseframe = dataframe + @selectors = + if selectors[0].boolean? + Filters.new(selectors) + elsif selectors[0].numeric? + Indices.new(selectors) + else + raise SubFramesArgumentError, "illegal type: #{selectors}" end end + @frames = [] end # Return concatenated SubFrames as a DataFrame. # # Once evaluated, memorize it as @baseframe. # @return [DataFrame] # a concatenated DataFrame. # @since 0.4.0 # def baseframe - if @baseframe.nil? || @baseframe.is_a?(Enumerator) - @baseframe = reduce(&:concatenate) - else - @baseframe - end + @baseframe ||= reduce(&:concatenate) end alias_method :concatenate, :baseframe alias_method :concat, :baseframe # Iterates over sub DataFrames or returns an Enumerator. @@ -382,11 +389,23 @@ # @since 0.4.0 # def each(&block) return enum_for(__method__) { size } unless block - frames.each(&block) + if @selectors + @selectors.each.with_index do |selector, i| + if i < @frames.size + yield @frames[i] + else + frame = get_subframe(selector) + @frames << frame + yield frame + end + end + else + @frames.each(&block) + end nil end # Aggregate SubFrames to create a DataFrame. # @@ -914,28 +933,58 @@ # # @since 0.4.0 # define_subframable_method :filter_map + # Return 0...num sub-dataframes in self. + # + # @param num [Integer, Float] + # num of sub-dataframes to pick up. `num`` must be positive or zero. + # @return [SubFrames] + # A new SubFrames. + # If n == 0, it returns empty SubFrames. + # If n >= size, it returns self. + # @since 0.4.2 + # + def take(num) + if num.zero? + SubFrames.new(DataFrame.new, []) + elsif num >= size + self + else + SubFrames.by_dataframes(frames(num)) + end + end + # Number of subsets. # # @return [Integer] # number of subsets in self. # @since 0.4.0 # def size - @size ||= @enum.size + @size ||= + if @selectors + @selectors.size + else + @frames.size + end end # Size list of subsets. # # @return [Array<Integer>] # sizes of sub DataFrames. # @since 0.4.0 # def sizes - @sizes ||= @enum.map(&:size) + @sizes ||= + if @selectors + @selectors.sizes + else + @frames.map(&:size) + end end # Indices at the top of each sub DataFrames. # # @return [Array<Integer>] @@ -943,14 +992,21 @@ # @example When `sizes` is [2, 3, 1]. # subframes.offset_indices # => [0, 2, 5] # @since 0.4.0 # def offset_indices - sum = 0 - sizes.map do |size| - sum += size - sum - size + case @selectors + when Filters + @selectors.selectors.map do |selector| + selector.each.with_index.find { |x, _| x }[1] + end + else # Indices, nil + sum = 0 + sizes.map do |size| + sum += size + sum - size + end end end # Test if subset is empty?. # @@ -963,15 +1019,15 @@ end # Test if self has only one subset and it is comprehensive. # # @return [true, false] - # true if only member of self is equal to universal DataFrame. + # true if the only member of self is equal to universal DataFrame. # @since 0.4.0 # def universal? - size == 1 && @enum.first == baseframe + size == 1 && first == @baseframe end # Return string representation of self. # # @param limit [Integer] @@ -1010,11 +1066,11 @@ # <uint8> <string> <boolean> # 0 6 C false # # @since 0.4.0 # - def to_s(limit: 16) + def to_s(limit: 5) _to_s(limit: limit) end # Return summary information of self. # @@ -1062,14 +1118,14 @@ # <uint8> <string> <boolean> # 0 6 C false # # @since 0.4.0 # - def inspect(limit: 16) + def inspect(limit: 5) shape = - if @baseframe.is_a?(Enumerator) - "Enumerator::Lazy:size=#{@baseframe.size}" + if @baseframe.nil? + '(Not prepared)' else baseframe.shape_str(with_id: true) end sizes_truncated = (size > limit ? sizes.take(limit) << '...' : sizes).join(', ') "#<#{self.class} : #{format('0x%016x', object_id)}>\n" \ @@ -1077,17 +1133,54 @@ "#{size} SubFrame#{pl(size)}: " \ "[#{sizes_truncated}] in size#{pl(size)}.\n" \ "---\n#{_to_s(limit: limit, with_id: true)}" end + # Return an Array of sub DataFrames + # + # @overload frames + # Returns all sub dataframes. + # + # @return [Array<DataFrame>] + # sub DataFrames. + # + # @overload frames(n_frames) + # Returns partial sub dataframes. + # + # @param n_frames [Integer] + # num of dataframes to retrieve. + # @return [Array<DataFrame>] + # sub DataFrames. + # + # @since 0.4.2 + # + def frames(n_frames = nil) + n_frames = size if n_frames.nil? + + if @frames.size < n_frames + @frames = each.take(n_frames) + else + @frames.take(n_frames) + end + end + private - def frames - @frames ||= @enum.to_a + # Get sub dataframe specified by 'selector' + def get_subframe(selector) + df = + case @selectors + when Filters + @baseframe.filter(selector) + when Indices + @baseframe.take(selector) + end + DataFrame.new_dataframe_with_schema(@baseframe, df) end - def _to_s(limit: 16, with_id: false) - a = take(limit).map do |df| + # Subcontractor of to_s + def _to_s(limit: 5, with_id: false) + a = each.take(limit).map do |df| if with_id "#<#{df.shape_str(with_id: with_id)}>\n" \ "#{df.to_s(head: 2, tail: 2)}" else df.to_s(head: 2, tail: 2)