Random Forest

A Random Forest is an ensemble learning method which implements multiple decision trees during training. It predicts by using a combination rule on the outputs of individual decision trees.

See Random Forest for classification with random forest. See [Bre01] for a detailed introduction.

Example

CDenseFeatures (here 64 bit floats aka RealFeatures) and CRegressionLabels are created from training and test data file

features_train = RealFeatures(f_feats_train)
features_test = RealFeatures(f_feats_test)
labels_train = RegressionLabels(f_labels_train)
labels_test = RegressionLabels(f_labels_test)
features_train = RealFeatures(f_feats_train);
features_test = RealFeatures(f_feats_test);
labels_train = RegressionLabels(f_labels_train);
labels_test = RegressionLabels(f_labels_test);
RealFeatures features_train = new RealFeatures(f_feats_train);
RealFeatures features_test = new RealFeatures(f_feats_test);
RegressionLabels labels_train = new RegressionLabels(f_labels_train);
RegressionLabels labels_test = new RegressionLabels(f_labels_test);
features_train = Modshogun::RealFeatures.new f_feats_train
features_test = Modshogun::RealFeatures.new f_feats_test
labels_train = Modshogun::RegressionLabels.new f_labels_train
labels_test = Modshogun::RegressionLabels.new f_labels_test
features_train <- RealFeatures(f_feats_train)
features_test <- RealFeatures(f_feats_test)
labels_train <- RegressionLabels(f_labels_train)
labels_test <- RegressionLabels(f_labels_test)
features_train = modshogun.RealFeatures(f_feats_train)
features_test = modshogun.RealFeatures(f_feats_test)
labels_train = modshogun.RegressionLabels(f_labels_train)
labels_test = modshogun.RegressionLabels(f_labels_test)
RealFeatures features_train = new RealFeatures(f_feats_train);
RealFeatures features_test = new RealFeatures(f_feats_test);
RegressionLabels labels_train = new RegressionLabels(f_labels_train);
RegressionLabels labels_test = new RegressionLabels(f_labels_test);
auto features_train = some<CDenseFeatures<float64_t>>(f_feats_train);
auto features_test = some<CDenseFeatures<float64_t>>(f_feats_test);
auto labels_train = some<CRegressionLabels>(f_labels_train);
auto labels_test = some<CRegressionLabels>(f_labels_test);

Combination rules to be used for prediction are derived form the CCombinationRule class. Here we create a CMeanRule class to be used as a combination rule.

mean_rule = MeanRule()
mean_rule = MeanRule();
MeanRule mean_rule = new MeanRule();
mean_rule = Modshogun::MeanRule.new 
mean_rule <- MeanRule()
mean_rule = modshogun.MeanRule()
MeanRule mean_rule = new MeanRule();
auto mean_rule = some<CMeanRule>();

Next an instance of CRandomForest is created. The parameters provided are the number of attributes to be chosen randomly to select from and the number of trees.

rand_forest = RandomForest(features_train, labels_train, 5)
rand_forest.set_combination_rule(mean_rule)
rand_forest = RandomForest(features_train, labels_train, 5);
rand_forest.set_combination_rule(mean_rule);
RandomForest rand_forest = new RandomForest(features_train, labels_train, 5);
rand_forest.set_combination_rule(mean_rule);
rand_forest = Modshogun::RandomForest.new features_train, labels_train, 5
rand_forest.set_combination_rule mean_rule
rand_forest <- RandomForest(features_train, labels_train, 5)
rand_forest$set_combination_rule(mean_rule)
rand_forest = modshogun.RandomForest(features_train, labels_train, 5)
rand_forest:set_combination_rule(mean_rule)
RandomForest rand_forest = new RandomForest(features_train, labels_train, 5);
rand_forest.set_combination_rule(mean_rule);
auto rand_forest = some<CRandomForest>(features_train, labels_train, 5);
rand_forest->set_combination_rule(mean_rule);

Then we train the random forest and apply it to test data, which here gives CRegressionLabels.

rand_forest.train()
labels_predict = rand_forest.apply_regression(features_test)
rand_forest.train();
labels_predict = rand_forest.apply_regression(features_test);
rand_forest.train();
RegressionLabels labels_predict = rand_forest.apply_regression(features_test);
rand_forest.train 
labels_predict = rand_forest.apply_regression features_test
rand_forest$train()
labels_predict <- rand_forest$apply_regression(features_test)
rand_forest:train()
labels_predict = rand_forest:apply_regression(features_test)
rand_forest.train();
RegressionLabels labels_predict = rand_forest.apply_regression(features_test);
rand_forest->train();
auto labels_predict = rand_forest->apply_regression(features_test);

We can evaluate test performance via e.g. CMeanSquaredError as well as get the “out of bag error”.

mse = MeanSquaredError()
oob = rand_forest.get_oob_error(mse)
mserror = mse.evaluate(labels_predict, labels_test)
mse = MeanSquaredError();
oob = rand_forest.get_oob_error(mse);
mserror = mse.evaluate(labels_predict, labels_test);
MeanSquaredError mse = new MeanSquaredError();
double oob = rand_forest.get_oob_error(mse);
double mserror = mse.evaluate(labels_predict, labels_test);
mse = Modshogun::MeanSquaredError.new 
oob = rand_forest.get_oob_error mse
mserror = mse.evaluate labels_predict, labels_test
mse <- MeanSquaredError()
oob <- rand_forest$get_oob_error(mse)
mserror <- mse$evaluate(labels_predict, labels_test)
mse = modshogun.MeanSquaredError()
oob = rand_forest:get_oob_error(mse)
mserror = mse:evaluate(labels_predict, labels_test)
MeanSquaredError mse = new MeanSquaredError();
double oob = rand_forest.get_oob_error(mse);
double mserror = mse.evaluate(labels_predict, labels_test);
auto mse = some<CMeanSquaredError>();
auto oob = rand_forest->get_oob_error(mse);
auto mserror = mse->evaluate(labels_predict, labels_test);

References

Wikipedia: Random_forest

Wikipedia: Out-of-bag_error

[Bre01]Leo Breiman. Random forests. Machine Learning, 45:5–32, 2001.