lib/red_amber/subframes.rb in red_amber-0.4.1 vs lib/red_amber/subframes.rb in red_amber-0.4.2
- old
+ new
@@ -8,10 +8,42 @@
include Helper
using RefineArray
using RefineArrayLike
+ # Entity to select sub-dataframes
+ class Selectors
+ attr_reader :selectors, :size, :sizes
+
+ def initialize(selectors)
+ @selectors = selectors
+ @size = selectors.size
+ @sizes = []
+ end
+
+ def each
+ @selectors.each
+ end
+ end
+
+ # Boolean selectors of sub-dataframes
+ class Filters < Selectors
+ def sizes
+ # count true
+ @sizes = @selectors.map { |s| s.to_a.count { _1 } } # rubocop:disable Performance/Size
+ end
+ end
+
+ # Index selectors of sub-dataframes
+ class Indices < Selectors
+ def sizes
+ @sizes = @selectors.map(&:size)
+ end
+ end
+
+ private_constant :Selectors, :Filters, :Indices
+
class << self
# Create SubFrames from a Group.
#
# [Experimental feature] this method may be removed or be changed in the future.
# @param group [Group]
@@ -77,17 +109,12 @@
# @since 0.4.0
#
def by_indices(dataframe, subset_indices)
instance = allocate
instance.instance_variable_set(:@baseframe, dataframe)
- enum =
- Enumerator.new(subset_indices.size) do |y|
- subset_indices.each do |i|
- y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.take(i))
- end
- end
- instance.instance_variable_set(:@enum, enum)
+ instance.instance_variable_set(:@selectors, Indices.new(subset_indices))
+ instance.instance_variable_set(:@frames, [])
instance
end
# Create a new SubFrames object from a DataFrame and an array of filters.
#
@@ -103,17 +130,12 @@
# @since 0.4.0
#
def by_filters(dataframe, subset_filters)
instance = allocate
instance.instance_variable_set(:@baseframe, dataframe)
- enum =
- Enumerator.new(subset_filters.size) do |y|
- subset_filters.each do |i|
- y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.filter(i))
- end
- end
- instance.instance_variable_set(:@enum, enum)
+ instance.instance_variable_set(:@selectors, Filters.new(subset_filters))
+ instance.instance_variable_set(:@frames, [])
instance
end
# Create a new SubFrames from an Array of DataFrames.
#
@@ -128,22 +150,17 @@
def by_dataframes(dataframes)
instance = allocate
case Array(dataframes)
when [] || [nil]
instance.instance_variable_set(:@baseframe, DataFrame.new)
+ instance.instance_variable_set(:@selectors, [])
instance.instance_variable_set(:@frames, [])
- enum = [].each
else
- enum =
- Enumerator.new(dataframes.size) do |y|
- dataframes.each do |i|
- y.yield i
- end
- end
- instance.instance_variable_set(:@baseframe, enum.lazy)
+ instance.instance_variable_set(:@baseframe, nil)
+ instance.instance_variable_set(:@selectors, nil)
+ instance.instance_variable_set(:@frames, dataframes)
end
- instance.instance_variable_set(:@enum, enum)
instance
end
private
@@ -259,59 +276,49 @@
# 1 3 B false
# 2 6 C false
#
# @since 0.4.0
#
- def initialize(dataframe, subset_specifier = nil, &block)
+ def initialize(dataframe, selectors = nil, &block)
unless dataframe.is_a?(DataFrame)
raise SubFramesArgumentError, "not a DataFrame: #{dataframe}"
end
if block
- unless subset_specifier.nil?
+ unless selectors.nil?
raise SubFramesArgumentError, 'Must not specify both arguments and block.'
end
- subset_specifier = yield(dataframe)
+ selectors = yield(dataframe)
end
- if dataframe.empty? || subset_specifier.nil? || subset_specifier.empty?
+ if dataframe.empty? || selectors.nil? || selectors.empty?
@baseframe = DataFrame.new
- @frames = []
- @enum = @frames.each
+ @selectors = Selectors.new([])
else
- @baseframe = nil
- @enum =
- Enumerator.new(subset_specifier.size) do |yielder|
- subset_specifier.map do |i|
- df =
- if i.numeric?
- dataframe.take(i)
- elsif i.boolean?
- dataframe.filter(i)
- else
- raise SubFramesArgumentError, "illegal type: #{i}"
- end
- yielder.yield DataFrame.new_dataframe_with_schema(dataframe, df)
- end
+ @baseframe = dataframe
+ @selectors =
+ if selectors[0].boolean?
+ Filters.new(selectors)
+ elsif selectors[0].numeric?
+ Indices.new(selectors)
+ else
+ raise SubFramesArgumentError, "illegal type: #{selectors}"
end
end
+ @frames = []
end
# Return concatenated SubFrames as a DataFrame.
#
# Once evaluated, memorize it as @baseframe.
# @return [DataFrame]
# a concatenated DataFrame.
# @since 0.4.0
#
def baseframe
- if @baseframe.nil? || @baseframe.is_a?(Enumerator)
- @baseframe = reduce(&:concatenate)
- else
- @baseframe
- end
+ @baseframe ||= reduce(&:concatenate)
end
alias_method :concatenate, :baseframe
alias_method :concat, :baseframe
# Iterates over sub DataFrames or returns an Enumerator.
@@ -382,11 +389,23 @@
# @since 0.4.0
#
def each(&block)
return enum_for(__method__) { size } unless block
- frames.each(&block)
+ if @selectors
+ @selectors.each.with_index do |selector, i|
+ if i < @frames.size
+ yield @frames[i]
+ else
+ frame = get_subframe(selector)
+ @frames << frame
+ yield frame
+ end
+ end
+ else
+ @frames.each(&block)
+ end
nil
end
# Aggregate SubFrames to create a DataFrame.
#
@@ -914,28 +933,58 @@
#
# @since 0.4.0
#
define_subframable_method :filter_map
+ # Return 0...num sub-dataframes in self.
+ #
+ # @param num [Integer, Float]
+ # num of sub-dataframes to pick up. `num`` must be positive or zero.
+ # @return [SubFrames]
+ # A new SubFrames.
+ # If n == 0, it returns empty SubFrames.
+ # If n >= size, it returns self.
+ # @since 0.4.2
+ #
+ def take(num)
+ if num.zero?
+ SubFrames.new(DataFrame.new, [])
+ elsif num >= size
+ self
+ else
+ SubFrames.by_dataframes(frames(num))
+ end
+ end
+
# Number of subsets.
#
# @return [Integer]
# number of subsets in self.
# @since 0.4.0
#
def size
- @size ||= @enum.size
+ @size ||=
+ if @selectors
+ @selectors.size
+ else
+ @frames.size
+ end
end
# Size list of subsets.
#
# @return [Array<Integer>]
# sizes of sub DataFrames.
# @since 0.4.0
#
def sizes
- @sizes ||= @enum.map(&:size)
+ @sizes ||=
+ if @selectors
+ @selectors.sizes
+ else
+ @frames.map(&:size)
+ end
end
# Indices at the top of each sub DataFrames.
#
# @return [Array<Integer>]
@@ -943,14 +992,21 @@
# @example When `sizes` is [2, 3, 1].
# subframes.offset_indices # => [0, 2, 5]
# @since 0.4.0
#
def offset_indices
- sum = 0
- sizes.map do |size|
- sum += size
- sum - size
+ case @selectors
+ when Filters
+ @selectors.selectors.map do |selector|
+ selector.each.with_index.find { |x, _| x }[1]
+ end
+ else # Indices, nil
+ sum = 0
+ sizes.map do |size|
+ sum += size
+ sum - size
+ end
end
end
# Test if subset is empty?.
#
@@ -963,15 +1019,15 @@
end
# Test if self has only one subset and it is comprehensive.
#
# @return [true, false]
- # true if only member of self is equal to universal DataFrame.
+ # true if the only member of self is equal to universal DataFrame.
# @since 0.4.0
#
def universal?
- size == 1 && @enum.first == baseframe
+ size == 1 && first == @baseframe
end
# Return string representation of self.
#
# @param limit [Integer]
@@ -1010,11 +1066,11 @@
# <uint8> <string> <boolean>
# 0 6 C false
#
# @since 0.4.0
#
- def to_s(limit: 16)
+ def to_s(limit: 5)
_to_s(limit: limit)
end
# Return summary information of self.
#
@@ -1062,14 +1118,14 @@
# <uint8> <string> <boolean>
# 0 6 C false
#
# @since 0.4.0
#
- def inspect(limit: 16)
+ def inspect(limit: 5)
shape =
- if @baseframe.is_a?(Enumerator)
- "Enumerator::Lazy:size=#{@baseframe.size}"
+ if @baseframe.nil?
+ '(Not prepared)'
else
baseframe.shape_str(with_id: true)
end
sizes_truncated = (size > limit ? sizes.take(limit) << '...' : sizes).join(', ')
"#<#{self.class} : #{format('0x%016x', object_id)}>\n" \
@@ -1077,17 +1133,54 @@
"#{size} SubFrame#{pl(size)}: " \
"[#{sizes_truncated}] in size#{pl(size)}.\n" \
"---\n#{_to_s(limit: limit, with_id: true)}"
end
+ # Return an Array of sub DataFrames
+ #
+ # @overload frames
+ # Returns all sub dataframes.
+ #
+ # @return [Array<DataFrame>]
+ # sub DataFrames.
+ #
+ # @overload frames(n_frames)
+ # Returns partial sub dataframes.
+ #
+ # @param n_frames [Integer]
+ # num of dataframes to retrieve.
+ # @return [Array<DataFrame>]
+ # sub DataFrames.
+ #
+ # @since 0.4.2
+ #
+ def frames(n_frames = nil)
+ n_frames = size if n_frames.nil?
+
+ if @frames.size < n_frames
+ @frames = each.take(n_frames)
+ else
+ @frames.take(n_frames)
+ end
+ end
+
private
- def frames
- @frames ||= @enum.to_a
+ # Get sub dataframe specified by 'selector'
+ def get_subframe(selector)
+ df =
+ case @selectors
+ when Filters
+ @baseframe.filter(selector)
+ when Indices
+ @baseframe.take(selector)
+ end
+ DataFrame.new_dataframe_with_schema(@baseframe, df)
end
- def _to_s(limit: 16, with_id: false)
- a = take(limit).map do |df|
+ # Subcontractor of to_s
+ def _to_s(limit: 5, with_id: false)
+ a = each.take(limit).map do |df|
if with_id
"#<#{df.shape_str(with_id: with_id)}>\n" \
"#{df.to_s(head: 2, tail: 2)}"
else
df.to_s(head: 2, tail: 2)