Skip to content

Commit

Permalink
Merge pull request #421 from tlverse/cv_sl-preds
Browse files Browse the repository at this point in the history
Cross-validated SL predictions
  • Loading branch information
rachaelvp authored Aug 18, 2023
2 parents 825019f + 1260063 commit fa669c7
Show file tree
Hide file tree
Showing 96 changed files with 1,755 additions and 367 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ Suggests:
hal9001 (>= 0.4.4),
h2o,
keras,
kerasR,
nloptr,
nnls,
randomForest,
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@
differs from the detected one, a warning is now thrown. If `outcome_type` is
supplied but invalid, then an error is thrown upon `sl3_Task` instantiation,
opposed to learner training.
* Cross-validated super learner (`cv_sl`) returns the cross-validated
predictions for the super learner and its candidates.

# sl3 1.4.4
* Updates to `Lrnr_nnls` to support binary outcomes, including support for
Expand Down
10 changes: 5 additions & 5 deletions R/Lrnr_cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,22 @@ interpret_fold_number <- function(fold_number) {
#' This can then be accessed with predict_fold(task, fold_number="full")
#' }
#' }
#'
#' @examples
#'
#' @examples
#' library(origami)
#'
#'
#' # load example data
#' data(cpp_imputed)
#' covars <- c(
#' "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn"
#' )
#' outcome <- "haz"
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome)
#' glm_learner <- Lrnr_glm$new()
#' cv_glm <- Lrnr_cv$new(glm_learner, folds = make_folds(cpp_imputed, V = 10))
#'
#'
#' # train cv learner
#' cv_glm_fit <- cv_glm$train(task)
#' preds <- cv_glm_fit$predict()
Expand Down
10 changes: 5 additions & 5 deletions R/Lrnr_dbarts.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,18 @@
#' }
#'
#' @template common_parameters
#'
#' @examples
#'
#' @examples
#' set.seed(123)
#'
#'
#' # load example data
#' data(cpp_imputed)
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
#' dbart_learner <- make_learner(Lrnr_dbarts, ndpost = 200)
#'
#'
#' # train dbart learner and make predictions
#' dbart_fit <- dbart_learner$train(task)
#' preds <- dbart_fit$predict()
Expand Down
15 changes: 8 additions & 7 deletions R/Lrnr_density_discretize.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,18 @@
#' }
#'
#' @template common_parameters
#'
#' @examples
#'
#' @examples
#' # load example data
#' data(cpp_imputed)
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz")
#'
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz"
#' )
#'
#' # train density discretize learner and make predictions
#' lrnr_discretize <- Lrnr_density_discretize$new(
#' categorical_learner = Lrnr_glmnet$new()
Expand Down
13 changes: 7 additions & 6 deletions R/Lrnr_density_hse.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@
#'
#' @template common_parameters
#'
#' @examples
#' @examples
#' # load example data
#' data(cpp_imputed)
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz")
#'
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz"
#' )
#'
#' # train density hse learner and make predictions
#' lrnr_density_hse <- Lrnr_density_hse$new(mean_learner = Lrnr_glm$new())
#' fit_density_hse <- lrnr_density_hse$train(task)
Expand Down
13 changes: 7 additions & 6 deletions R/Lrnr_density_semiparametric.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,17 @@
#'
#' @template common_parameters
#'
#' @examples
#' @examples
#' # load example data
#' data(cpp_imputed)
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz")
#'
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz"
#' )
#'
#' # train density hse learner and make predictions
#' lrnr_density_semi <- Lrnr_density_semiparametric$new(
#' mean_learner = Lrnr_glm$new()
Expand Down
15 changes: 8 additions & 7 deletions R/Lrnr_grf.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,18 @@
#' }
#'
#' @template common_parameters
#'
#' @examples
#'
#' @examples
#' # load example data
#' data(cpp_imputed)
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz")
#'
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz"
#' )
#'
#' # train grf learner and make predictions
#' lrnr_grf <- Lrnr_grf$new(seed = 123)
#' lrnr_grf_fit <- lrnr_grf$train(task)
Expand Down
8 changes: 4 additions & 4 deletions R/Lrnr_gts.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,27 +59,27 @@
#' are going to be used.}
#' }
#'
#' @examples
#' @examples
#' # Example adapted from hts package manual
#' # The hierarchical structure looks like 2 child nodes associated with level 1,
#' # which are followed by 3 and 2 sub-child nodes respectively at level 2.
#' library(hts)
#'
#'
#' set.seed(3274)
#' abc <- as.data.table(5 + matrix(sort(rnorm(200)), ncol = 4, nrow = 50))
#' setnames(abc, paste("Series", 1:ncol(abc), sep = "_"))
#' abc[, time := .I]
#' grps <- rbind(c(1, 1, 2, 2), c(1, 2, 1, 2))
#' horizon <- 12
#' suppressWarnings(abc_long <- melt(abc, id = "time", variable.name = "series"))
#'
#'
#' # create sl3 task (no outcome for hierarchical/grouped series)
#' node_list <- list(outcome = "value", time = "time", id = "series")
#' train_task <- sl3_Task$new(data = abc_long, nodes = node_list)
#' test_data <- expand.grid(time = 51:55, series = unique(abc_long$series))
#' test_data <- as.data.table(test_data)[, value := 0]
#' test_task <- sl3_Task$new(data = test_data, nodes = node_list)
#'
#'
#' gts_learner <- Lrnr_gts$new()
#' gts_learner_fit <- gts_learner$train(train_task)
#' gts_learner_preds <- gts_learner_fit$predict(test_task)
Expand Down
15 changes: 8 additions & 7 deletions R/Lrnr_h2o_glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,20 @@ define_h2o_X <- function(task, outcome_type = NULL) {
#'
#' @template common_parameters
#'
#' @examples
#' @examples
#' library(h2o)
#' suppressWarnings(h2o.init())
#'
#'
#' # load example data
#' data(cpp_imputed)
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz")
#'
#' cpp_imputed,
#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"),
#' outcome = "haz"
#' )
#'
#' # train h2o glm learner and make predictions
#' lrnr_h2o <- Lrnr_h2o_glm$new()
#' lrnr_h2o_fit <- lrnr_h2o$train(task)
Expand Down
16 changes: 9 additions & 7 deletions R/Lrnr_h2o_grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@
#' }
#'
#' @template common_parameters
#'
#' @examples
#'
#' @examples
#' library(h2o)
#' suppressWarnings(h2o.init())
#' set.seed(1)
#'
#'
#' # load example data
#' data(cpp_imputed)
#' covars <- c(
Expand All @@ -57,13 +57,15 @@
#' )
#' outcome <- "haz"
#' cpp_imputed <- cpp_imputed[1:150, ]
#'
#'
#' # create sl3 task
#' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome)
#'
#'
#' # h2o grid search hyperparameter alpha
#' h2o_glm_grid <- Lrnr_h2o_grid$new(algorithm = "glm",
#' hyper_params = list(alpha = c(0, 0.5)))
#' h2o_glm_grid <- Lrnr_h2o_grid$new(
#' algorithm = "glm",
#' hyper_params = list(alpha = c(0, 0.5))
#' )
#' h2o_glm_grid_fit <- h2o_glm_grid$train(task)
#' pred <- h2o_glm_grid_fit$predict()
Lrnr_h2o_grid <- R6Class(
Expand Down
15 changes: 8 additions & 7 deletions R/Lrnr_independent_binomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@
#'
#' @template common_parameters
#'
#' @examples
#' @examples
#' library(dplyr)
#'
#'
#' # load example data
#' data(cpp)
#' cpp <- cpp %>%
#' select(c(bmi, agedays, feeding)) %>%
#' mutate(feeding = as.factor(feeding)) %>%
#' na.omit()
#'
#'
#' # create sl3 task
#' task <- make_sl3_Task(cpp,
#' covariates = c("agedays", "bmi"),
#' outcome = "feeding")
#'
#' task <- make_sl3_Task(cpp,
#' covariates = c("agedays", "bmi"),
#' outcome = "feeding"
#' )
#'
#' # train independent binomial learner and make predictions
#' lrnr_indbinomial <- make_learner(Lrnr_independent_binomial)
#' fit <- lrnr_indbinomial$train(task)
Expand Down
38 changes: 19 additions & 19 deletions R/Lrnr_multiple_ts.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@
#' \code{learner$train}. See its documentation for details.
#' }
#' }
#'
#' @examples
#'
#' @examples
#' library(origami)
#' library(dplyr)
#'
#'
#' set.seed(123)
#'
#'
#' # Simulate simple AR(2) process
#' data <- matrix(arima.sim(model = list(ar = c(.9, -.2)), n = 200))
#' id <- c(rep("Series_1", 50), rep("Series_2", 50), rep("Series_3", 50), rep("Series_4", 50))
Expand All @@ -41,36 +41,36 @@
#' data <- data %>%
#' group_by(id) %>%
#' dplyr::mutate(time = 1:n())
#'
#'
#' data$W1 <- rbinom(200, 1, 0.6)
#' data$W2 <- rbinom(200, 1, 0.2)
#'
#'
#' data <- as.data.table(data)
#'
#'
#' folds <- origami::make_folds(data,
#' t = max(data$time),
#' id = data$id,
#' time = data$time,
#' fold_fun = folds_rolling_window_pooled,
#' window_size = 20,
#' validation_size = 15,
#' gap = 0,
#' batch = 10
#' t = max(data$time),
#' id = data$id,
#' time = data$time,
#' fold_fun = folds_rolling_window_pooled,
#' window_size = 20,
#' validation_size = 15,
#' gap = 0,
#' batch = 10
#' )
#'
#'
#' task <- sl3_Task$new(
#' data = data, outcome = "data",
#' time = "time", id = "id",
#' covariates = c("W1", "W2"),
#' folds = folds
#' )
#'
#'
#' train_task <- training(task, fold = task$folds[[1]])
#' valid_task <- validation(task, fold = task$folds[[1]])
#'
#'
#' lrnr_arima <- Lrnr_arima$new()
#' multiple_ts_arima <- Lrnr_multiple_ts$new(learner = lrnr_arima)
#'
#'
#' multiple_ts_arima_fit <- multiple_ts_arima$train(train_task)
#' multiple_ts_arima_preds <- multiple_ts_arima_fit$predict(valid_task)
Lrnr_multiple_ts <- R6Class(
Expand Down
15 changes: 8 additions & 7 deletions R/Lrnr_multivariate.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
#' }
#'
#' @template common_parameters
#'
#' @examples
#'
#' @examples
#' library(data.table)
#'
#'
#' # simulate data
#' set.seed(123)
#' n <- 1000
Expand All @@ -41,12 +41,13 @@
#' data <- data.table(W, Y)
#' covariates <- grep("W", names(data), value = TRUE)
#' outcomes <- grep("Y", names(data), value = TRUE)
#'
#'
#' # make sl3 task
#' task <- sl3_Task$new(data.table::copy(data),
#' covariates = covariates,
#' outcome = outcomes)
#'
#' covariates = covariates,
#' outcome = outcomes
#' )
#'
#' # train multivariate learner and make predictions
#' mv_learner <- make_learner(Lrnr_multivariate, make_learner(Lrnr_glm_fast))
#' mv_fit <- mv_learner$train(task)
Expand Down
Loading

0 comments on commit fa669c7

Please sign in to comment.