Skip to contents
library(nadir)

# dependencies ------------------------------------------------------------
library(pacman)
pacman::p_load('nadir', 'dplyr', 'sl3', 'SuperLearner')

# Load a dataset from the MASS package.
data(Boston, package = "MASS")

# Review info on the Boston dataset.
# ?MASS::Boston

# Extract our outcome variable from the dataframe.
outcome = Boston$medv

# Create a dataframe to contain our explanatory variables.
data = subset(Boston, select = -medv)

# use SuperLearner::SuperLearner with Boston data, mean, glmnet, and ranger and 10-folds
a <- Sys.time()
sl = SuperLearner(
  Y = outcome,
  X = data,
  SL.library = c("SL.mean", "SL.glmnet", "SL.ranger"),
  cvControl = list(V = 10)
)
#> Loading required namespace: glmnet
b <- Sys.time()
print(b-a)
#> Time difference of 1.578969 secs

sl
#> 
#> Call:  
#> SuperLearner(Y = outcome, X = data, SL.library = c("SL.mean", "SL.glmnet",  
#>     "SL.ranger"), cvControl = list(V = 10)) 
#> 
#> 
#>                   Risk Coef
#> SL.mean_All   84.77375    0
#> SL.glmnet_All 23.58582    0
#> SL.ranger_All 10.53051    1

sl$times$everything
#>    user  system elapsed 
#>   1.590   0.086   1.578



# using nadir::super_learner ----------------------------------------------

# run timing for nadir::super_learner
a = Sys.time()

sl_output <- nadir::super_learner(
  data = Boston,
  formulas = medv ~ ., # regress medv on everything
  learners = list(
    mean = lnr_mean,
    glmnet = lnr_glmnet,
    ranger = lnr_ranger
    ),
  n_folds = 10,
  verbose_output = TRUE
)

b = Sys.time()

print(b - a)
#> Time difference of 0.5208721 secs


# using sl3 ---------------------------------------------------------------

# run timing for sl3 

task <- sl3_Task$new(
  data = Boston,
  covariates = setdiff(colnames(Boston), 'medv'),
  outcome = "medv"
)

# set up learners 
glmnet_learner <- Lrnr_glmnet$new()
mean_learner <- Lrnr_mean$new()
ranger_learner <- Lrnr_ranger$new()

# create a stack
stack <- Stack$new(glmnet_learner, mean_learner, ranger_learner)

# construct the learner
sl <- Lrnr_sl$new(learners = stack, metalearner = Lrnr_nnls$new(), cv_control = list(V = 10))

# train the learner
a <- Sys.time()
sl$train(task)
#> [1] "Cross-validated risk:"
#> Key: <learner>
#>                                    learner coefficients      MSE       se
#>                                     <fctr>        <num>    <num>    <num>
#> 1: Lrnr_glmnet_NULL_deviance_10_1_100_TRUE            0 23.55781 2.860840
#> 2:                               Lrnr_mean            0 84.90519 7.020111
#> 3:             Lrnr_ranger_500_TRUE_none_1            1 10.76457 1.758499
#>      fold_sd fold_min_MSE fold_max_MSE
#>        <num>        <num>        <num>
#> 1:  4.744515    17.976672     34.38179
#> 2: 12.700613    58.969258    100.15607
#> 3:  4.210497     6.024291     19.11189
b <- Sys.time() 

# report on time taken for sl3
print(b - a)
#> Time difference of 2.5346 secs