library(nadir)
# dependencies ------------------------------------------------------------
library(pacman)
pacman::p_load('nadir', 'dplyr', 'sl3', 'SuperLearner')
# Load a dataset from the MASS package.
data(Boston, package = "MASS")
# Review info on the Boston dataset.
# ?MASS::Boston
# Extract our outcome variable from the dataframe.
outcome = Boston$medv
# Create a dataframe to contain our explanatory variables.
data = subset(Boston, select = -medv)
# use SuperLearner::SuperLearner with Boston data, mean, glmnet, and ranger and 10-folds
a <- Sys.time()
sl = SuperLearner(
Y = outcome,
X = data,
SL.library = c("SL.mean", "SL.glmnet", "SL.ranger"),
cvControl = list(V = 10)
)
#> Loading required namespace: glmnet
b <- Sys.time()
print(b-a)
#> Time difference of 1.578969 secs
sl
#>
#> Call:
#> SuperLearner(Y = outcome, X = data, SL.library = c("SL.mean", "SL.glmnet",
#> "SL.ranger"), cvControl = list(V = 10))
#>
#>
#> Risk Coef
#> SL.mean_All 84.77375 0
#> SL.glmnet_All 23.58582 0
#> SL.ranger_All 10.53051 1
sl$times$everything
#> user system elapsed
#> 1.590 0.086 1.578
# using nadir::super_learner ----------------------------------------------
# run timing for nadir::super_learner
a = Sys.time()
sl_output <- nadir::super_learner(
data = Boston,
formulas = medv ~ ., # regress medv on everything
learners = list(
mean = lnr_mean,
glmnet = lnr_glmnet,
ranger = lnr_ranger
),
n_folds = 10,
verbose_output = TRUE
)
b = Sys.time()
print(b - a)
#> Time difference of 0.5208721 secs
# using sl3 ---------------------------------------------------------------
# run timing for sl3
task <- sl3_Task$new(
data = Boston,
covariates = setdiff(colnames(Boston), 'medv'),
outcome = "medv"
)
# set up learners
glmnet_learner <- Lrnr_glmnet$new()
mean_learner <- Lrnr_mean$new()
ranger_learner <- Lrnr_ranger$new()
# create a stack
stack <- Stack$new(glmnet_learner, mean_learner, ranger_learner)
# construct the learner
sl <- Lrnr_sl$new(learners = stack, metalearner = Lrnr_nnls$new(), cv_control = list(V = 10))
# train the learner
a <- Sys.time()
sl$train(task)
#> [1] "Cross-validated risk:"
#> Key: <learner>
#> learner coefficients MSE se
#> <fctr> <num> <num> <num>
#> 1: Lrnr_glmnet_NULL_deviance_10_1_100_TRUE 0 23.55781 2.860840
#> 2: Lrnr_mean 0 84.90519 7.020111
#> 3: Lrnr_ranger_500_TRUE_none_1 1 10.76457 1.758499
#> fold_sd fold_min_MSE fold_max_MSE
#> <num> <num> <num>
#> 1: 4.744515 17.976672 34.38179
#> 2: 12.700613 58.969258 100.15607
#> 3: 4.210497 6.024291 19.11189
b <- Sys.time()
# report on time taken for sl3
print(b - a)
#> Time difference of 2.5346 secs