lib/rumale/ensemble/extra_trees_regressor.rb in rumale-0.10.0 vs lib/rumale/ensemble/extra_trees_regressor.rb in rumale-0.11.0
- old
+ new
@@ -41,17 +41,21 @@
# @param max_leaf_nodes [Integer] The maximum number of leaves on extra tree.
# If nil is given, number of leaves is not limited.
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
# @param max_features [Integer] The number of features to consider when searching optimal split point.
# If nil is given, split process considers all features.
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
+ # If nil is given, the methods do not execute in parallel.
+ # If zero or less is given, it becomes equal to the number of processors.
+ # This parameter is ignored if the Parallel gem is not loaded.
# @param random_seed [Integer] The seed value using to initialize the random generator.
# It is used to randomly determine the order of features when deciding spliting point.
def initialize(n_estimators: 10,
criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
- max_features: nil, random_seed: nil)
+ max_features: nil, n_jobs: nil, random_seed: nil)
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
- max_features: max_features, random_seed: random_seed)
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
check_params_string(criterion: criterion)
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
max_features: max_features)
@@ -69,22 +73,23 @@
check_sample_tvalue_size(x, y)
# Initialize some variables.
n_features = x.shape[1]
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
- @feature_importances = Numo::DFloat.zeros(n_features)
# Construct forest.
- @estimators = Array.new(@params[:n_estimators]) do
- tree = Tree::ExtraTreeRegressor.new(
- criterion: @params[:criterion], max_depth: @params[:max_depth],
- max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
- max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
- )
- tree.fit(x, y)
- @feature_importances += tree.feature_importances
- tree
- end
+ rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
+ @estimators = if enable_parallel?
+ parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
+ else
+ Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
+ end
+ @feature_importances =
+ if enable_parallel?
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
+ else
+ @estimators.map(&:feature_importances).reduce(&:+)
+ end
@feature_importances /= @feature_importances.sum
self
end
# Predict values for samples.
@@ -113,9 +118,19 @@
# Load marshal data.
# @return [nil]
def marshal_load(obj)
super
+ end
+
+ private
+
+ def plant_tree(rnd_seed)
+ Tree::ExtraTreeRegressor.new(
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
+ max_features: @params[:max_features], random_seed:rnd_seed
+ )
end
end
end
end