lib/rumale/ensemble/extra_trees_regressor.rb in rumale-0.10.0 vs lib/rumale/ensemble/extra_trees_regressor.rb in rumale-0.11.0

- old
+ new

@@ -41,17 +41,21 @@ # @param max_leaf_nodes [Integer] The maximum number of leaves on extra tree. # If nil is given, number of leaves is not limited. # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node. # @param max_features [Integer] The number of features to consider when searching optimal split point. # If nil is given, split process considers all features. + # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel. + # If nil is given, the methods do not execute in parallel. + # If zero or less is given, it becomes equal to the number of processors. + # This parameter is ignored if the Parallel gem is not loaded. # @param random_seed [Integer] The seed value using to initialize the random generator. # It is used to randomly determine the order of features when deciding spliting point. def initialize(n_estimators: 10, criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, - max_features: nil, random_seed: nil) + max_features: nil, n_jobs: nil, random_seed: nil) check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, - max_features: max_features, random_seed: random_seed) + max_features: max_features, n_jobs: n_jobs, random_seed: random_seed) check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf) check_params_string(criterion: criterion) check_params_positive(n_estimators: n_estimators, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf, max_features: max_features) @@ -69,22 +73,23 @@ check_sample_tvalue_size(x, y) # Initialize some variables. n_features = x.shape[1] @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer) @params[:max_features] = [[1, @params[:max_features]].max, n_features].min - @feature_importances = Numo::DFloat.zeros(n_features) # Construct forest. - @estimators = Array.new(@params[:n_estimators]) do - tree = Tree::ExtraTreeRegressor.new( - criterion: @params[:criterion], max_depth: @params[:max_depth], - max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf], - max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max) - ) - tree.fit(x, y) - @feature_importances += tree.feature_importances - tree - end + rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) } + @estimators = if enable_parallel? + parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) } + else + Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) } + end + @feature_importances = + if enable_parallel? + parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+) + else + @estimators.map(&:feature_importances).reduce(&:+) + end @feature_importances /= @feature_importances.sum self end # Predict values for samples. @@ -113,9 +118,19 @@ # Load marshal data. # @return [nil] def marshal_load(obj) super + end + + private + + def plant_tree(rnd_seed) + Tree::ExtraTreeRegressor.new( + criterion: @params[:criterion], max_depth: @params[:max_depth], + max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf], + max_features: @params[:max_features], random_seed:rnd_seed + ) end end end end