diff --git a/DESCRIPTION b/DESCRIPTION index ca45558..18b1a63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: SelfControlledCaseSeries Type: Package Title: Self-Controlled Case Series Version: 3.3.0 -Date: 2022-04-04 +Date: 2022-05-25 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut", "cre")), person("Patrick", "Ryan", role = c("aut")), diff --git a/R/Analyses.R b/R/Analyses.R index 42f921b..0a026aa 100644 --- a/R/Analyses.R +++ b/R/Analyses.R @@ -65,10 +65,12 @@ createSccsAnalysis <- function(analysisId = 1, checkmate::assertClass(createScriIntervalDataArgs, "args", null.ok = TRUE, add = errorMessages) checkmate::assertClass(fitSccsModelArgs, "args", add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - if (toupper(design) == "SCCS" && is.null(createSccsIntervalDataArgs)) + if (toupper(design) == "SCCS" && is.null(createSccsIntervalDataArgs)) { stop("Must provide createSccsIntervalDataArgs argument when design = 'SCCS'") - if (toupper(design) == "SCRI" && is.null(createScriIntervalDataArgs)) + } + if (toupper(design) == "SCRI" && is.null(createScriIntervalDataArgs)) { stop("Must provide createScriIntervalDataArgs argument when design = 'SCRI'") + } analysis <- list() for (name in names(formals(createSccsAnalysis))) { analysis[[name]] <- get(name) @@ -132,10 +134,12 @@ loadSccsAnalysisList <- function(file) { #' @export createExposureOutcome <- function(exposureId, outcomeId, ...) { errorMessages <- checkmate::makeAssertCollection() - if (!is.list(exposureId)) + if (!is.list(exposureId)) { checkmate::assertInt(exposureId, add = errorMessages) - if (!is.list(outcomeId)) + } + if (!is.list(outcomeId)) { checkmate::assertInt(outcomeId, add = errorMessages) + } checkmate::reportAssertions(collection = errorMessages) exposureOutcome <- list(...) diff --git a/R/CovariateSettings.R b/R/CovariateSettings.R index 6745f47..0f55230 100644 --- a/R/CovariateSettings.R +++ b/R/CovariateSettings.R @@ -87,8 +87,9 @@ createEraCovariateSettings <- function(includeEraIds = NULL, checkmate::assertLogical(allowRegularization, len = 1, add = errorMessages) checkmate::assertLogical(profileLikelihood, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - if (allowRegularization && profileLikelihood) + if (allowRegularization && profileLikelihood) { stop("Cannot profile the likelihood of regularized covariates") + } if (!grepl("start$|end$", startAnchor, ignore.case = TRUE)) { stop("startAnchor should have value 'era start' or 'era end'") } @@ -98,8 +99,9 @@ createEraCovariateSettings <- function(includeEraIds = NULL, isEnd <- function(anchor) { return(grepl("end$", anchor, ignore.case = TRUE)) } - if (end < start && !isEnd(endAnchor)) + if (end < start && !isEnd(endAnchor)) { stop("End day always precedes start day. Either pick a later end day, or set endAnchor to 'era end'.") + } # Make sure string is exact: if (isEnd(startAnchor)) { @@ -315,8 +317,9 @@ createControlIntervalSettings <- function(includeEraIds = NULL, isEnd <- function(anchor) { return(grepl("end$", anchor, ignore.case = TRUE)) } - if (end < start && !isEnd(endAnchor)) + if (end < start && !isEnd(endAnchor)) { stop("End day always precedes start day. Either pick a later end day, or set endAnchor to 'era end'.") + } # Make sure string is exact: if (isEnd(startAnchor)) { @@ -329,17 +332,19 @@ createControlIntervalSettings <- function(includeEraIds = NULL, } else { endAnchor <- "era start" } - analysis <- createEraCovariateSettings(includeEraIds = includeEraIds, - excludeEraIds = excludeEraIds, - label = "Control interval", - stratifyById = FALSE, - start = start, - startAnchor = startAnchor, - end = end, - endAnchor = endAnchor, - firstOccurrenceOnly = firstOccurrenceOnly, - splitPoints = c(), - allowRegularization = FALSE) + analysis <- createEraCovariateSettings( + includeEraIds = includeEraIds, + excludeEraIds = excludeEraIds, + label = "Control interval", + stratifyById = FALSE, + start = start, + startAnchor = startAnchor, + end = end, + endAnchor = endAnchor, + firstOccurrenceOnly = firstOccurrenceOnly, + splitPoints = c(), + allowRegularization = FALSE + ) analysis$isControlInterval <- TRUE class(analysis) <- "ControlIntervalSettings" return(analysis) diff --git a/R/DataConversion.R b/R/DataConversion.R index c14f116..b271a74 100644 --- a/R/DataConversion.R +++ b/R/DataConversion.R @@ -62,8 +62,9 @@ createSccsIntervalData <- function(studyPopulation, checkmate::assertClass(sccsData, "SccsData", add = errorMessages) checkmate::assertList(studyPopulation, min.len = 1, add = errorMessages) if (is.list(eraCovariateSettings) && class(eraCovariateSettings) != "EraCovariateSettings") { - for (i in 1:length(eraCovariateSettings)) + for (i in 1:length(eraCovariateSettings)) { checkmate::assertClass(eraCovariateSettings[[i]], "EraCovariateSettings", add = errorMessages) + } } else { checkmate::assertClass(eraCovariateSettings, "EraCovariateSettings", add = errorMessages) } @@ -92,8 +93,8 @@ createSccsIntervalData <- function(studyPopulation, timeCovariateCases <- numeric(0) if (!is.null(ageCovariateSettings) || - !is.null(seasonalityCovariateSettings) || - !is.null(calendarTimeCovariateSettings)) { + !is.null(seasonalityCovariateSettings) || + !is.null(calendarTimeCovariateSettings)) { if (nrow(studyPopulation$cases) > minCasesForTimeCovariates) { set.seed(0) timeCovariateCases <- sample(studyPopulation$cases$caseId, minCasesForTimeCovariates, replace = FALSE) @@ -103,9 +104,11 @@ createSccsIntervalData <- function(studyPopulation, settings <- list() settings$metaData <- list() settings$covariateRef <- tibble() - settings <- addEventDependentObservationSettings(settings, - eventDependentObservation, - studyPopulation) + settings <- addEventDependentObservationSettings( + settings, + eventDependentObservation, + studyPopulation + ) if (eventDependentObservation && settings$metaData$censorModel$model %in% c(1, 3) && !is.null(ageCovariateSettings)) { warning("Optimal censoring model adjusts for age, so removing age as separate covariate.") ageCovariateSettings <- NULL @@ -125,23 +128,25 @@ createSccsIntervalData <- function(studyPopulation, eras <- sccsData$eras %>% arrange(.data$caseId) - data <- convertToSccs(cases = cases, - outcomes = outcomes, - eras = eras, - includeAge = !is.null(ageCovariateSettings), - ageOffset = settings$ageOffset, - ageDesignMatrix = settings$ageDesignMatrix, - includeSeason = !is.null(seasonalityCovariateSettings), - seasonDesignMatrix = settings$seasonDesignMatrix, - includeCalendarTime = !is.null(calendarTimeCovariateSettings), - calendarTimeOffset = settings$calendarTimeOffset, - calendarTimeDesignMatrix = settings$calendarTimeDesignMatrix, - timeCovariateCases = timeCovariateCases, - covariateSettingsList = settings$covariateSettingsList, - eventDependentObservation = eventDependentObservation, - censorModel = settings$censorModel, - scri = FALSE, - controlIntervalId = 0) + data <- convertToSccs( + cases = cases, + outcomes = outcomes, + eras = eras, + includeAge = !is.null(ageCovariateSettings), + ageOffset = settings$ageOffset, + ageDesignMatrix = settings$ageDesignMatrix, + includeSeason = !is.null(seasonalityCovariateSettings), + seasonDesignMatrix = settings$seasonDesignMatrix, + includeCalendarTime = !is.null(calendarTimeCovariateSettings), + calendarTimeOffset = settings$calendarTimeOffset, + calendarTimeDesignMatrix = settings$calendarTimeDesignMatrix, + timeCovariateCases = timeCovariateCases, + covariateSettingsList = settings$covariateSettingsList, + eventDependentObservation = eventDependentObservation, + censorModel = settings$censorModel, + scri = FALSE, + controlIntervalId = 0 + ) if (is.null(data$outcomes) || is.null(data$covariates)) { warning("Conversion resulted in empty data set. Perhaps no one with the outcome had any exposure of interest?") @@ -150,14 +155,12 @@ createSccsIntervalData <- function(studyPopulation, if (nrow(settings$covariateRef) > 0) { data$covariateRef <- settings$covariateRef } - } else { metaData$covariateStatistics <- collect(data$covariateStatistics) metaData$daysObserved <- pull(data$observedDays, .data$observedDays) data$covariateStatistics <- NULL data$observedDays <- NULL data$covariateRef <- settings$covariateRef - } attr(data, "metaData") <- metaData class(data) <- "SccsIntervalData" @@ -169,19 +172,27 @@ createSccsIntervalData <- function(studyPopulation, } createEmptySccsIntervalData <- function() { - sccsIntervalData <- Andromeda::andromeda(outcomes = tibble(rowId = 1, - stratumId = 1, - time = 1, - y = 1)[-1, ], - covariates = tibble(rowId = 1, - stratumId = 1, - covariateId = 1, - covariateValue = 1)[-1, ], - covariateRef = tibble(covariateId = 1, - covariateName = "", - originalEraId = 1, - originalEraName = "", - originalEraType = "")[-1, ]) + sccsIntervalData <- Andromeda::andromeda( + outcomes = tibble( + rowId = 1, + stratumId = 1, + time = 1, + y = 1 + )[-1, ], + covariates = tibble( + rowId = 1, + stratumId = 1, + covariateId = 1, + covariateValue = 1 + )[-1, ], + covariateRef = tibble( + covariateId = 1, + covariateName = "", + originalEraId = 1, + originalEraName = "", + originalEraType = "" + )[-1, ] + ) return(sccsIntervalData) } @@ -210,21 +221,28 @@ addAgeSettings <- function(settings, } settings$ageOffset <- ageKnots[1] ageDesignMatrix <- splines::bs(ageKnots[1]:ageKnots[length(ageKnots)], - knots = ageKnots[2:(length(ageKnots) - 1)], - Boundary.knots = ageKnots[c(1, length(ageKnots))]) + knots = ageKnots[2:(length(ageKnots) - 1)], + Boundary.knots = ageKnots[c(1, length(ageKnots))] + ) # Fixing first beta to zero, so dropping first column of design matrix: settings$ageDesignMatrix <- ageDesignMatrix[, 2:ncol(ageDesignMatrix)] - splineCovariateRef <- tibble(covariateId = 100:(100 + length(ageKnots) - 1), - covariateName = paste("Age spline component", - 1:(length(ageKnots))), - originalEraId = 0, - originalEraType = "", - originalEraName = "") + splineCovariateRef <- tibble( + covariateId = 100:(100 + length(ageKnots) - 1), + covariateName = paste( + "Age spline component", + 1:(length(ageKnots)) + ), + originalEraId = 0, + originalEraType = "", + originalEraName = "" + ) settings$covariateRef <- bind_rows(settings$covariateRef, splineCovariateRef) - age <- list(ageKnots = ageKnots, - covariateIds = splineCovariateRef$covariateId, - allowRegularization = ageCovariateSettings$allowRegularization, - computeConfidenceIntervals = ageCovariateSettings$computeConfidenceIntervals) + age <- list( + ageKnots = ageKnots, + covariateIds = splineCovariateRef$covariateId, + allowRegularization = ageCovariateSettings$allowRegularization, + computeConfidenceIntervals = ageCovariateSettings$computeConfidenceIntervals + ) settings$metaData$age <- age return(settings) } @@ -243,17 +261,23 @@ addSeasonalitySettings <- function(settings, seasonalityCovariateSettings, sccsD seasonDesignMatrix <- cyclicSplineDesign(1:12, knots = seasonKnots) # Fixing first beta to zero, so dropping first column of design matrix: settings$seasonDesignMatrix <- seasonDesignMatrix[, 2:ncol(seasonDesignMatrix)] - splineCovariateRef <- tibble(covariateId = 200:(200 + length(seasonKnots) - 3), - covariateName = paste("Seasonality spline component", - 1:(length(seasonKnots) - 2)), - originalEraId = 0, - originalEraType = "", - originalEraName = "") + splineCovariateRef <- tibble( + covariateId = 200:(200 + length(seasonKnots) - 3), + covariateName = paste( + "Seasonality spline component", + 1:(length(seasonKnots) - 2) + ), + originalEraId = 0, + originalEraType = "", + originalEraName = "" + ) settings$covariateRef <- bind_rows(settings$covariateRef, splineCovariateRef) - seasonality <- list(seasonKnots = seasonKnots, - covariateIds = splineCovariateRef$covariateId, - allowRegularization = seasonalityCovariateSettings$allowRegularization, - computeConfidenceIntervals = seasonalityCovariateSettings$computeConfidenceIntervals) + seasonality <- list( + seasonKnots = seasonKnots, + covariateIds = splineCovariateRef$covariateId, + allowRegularization = seasonalityCovariateSettings$allowRegularization, + computeConfidenceIntervals = seasonalityCovariateSettings$computeConfidenceIntervals + ) settings$metaData$seasonality <- seasonality } return(settings) @@ -270,14 +294,14 @@ addCalendarTimeSettings <- function(settings, if (length(calendarTimeCovariateSettings$calendarTimeKnots) == 1) { observationPeriodCounts <- computeObservedPerMonth(studyPopulation) %>% arrange(.data$month) %>% - mutate(cumCount = cumsum(.data$observationPeriodCount )) + mutate(cumCount = cumsum(.data$observationPeriodCount)) total <- observationPeriodCounts %>% tail(1) %>% pull(.data$cumCount) - cutoffs <- total * seq(0.01, 0.99, length.out = calendarTimeCovariateSettings$calendarTimeKnots) - calendarTimeKnots = rep(0, calendarTimeCovariateSettings$calendarTimeKnots) + cutoffs <- total * seq(0.01, 0.99, length.out = calendarTimeCovariateSettings$calendarTimeKnots) + calendarTimeKnots <- rep(0, calendarTimeCovariateSettings$calendarTimeKnots) for (i in 1:calendarTimeCovariateSettings$calendarTimeKnots) { calendarTimeKnots[i] <- min(observationPeriodCounts$month[observationPeriodCounts$cumCount >= cutoffs[i]]) } @@ -293,37 +317,46 @@ addCalendarTimeSettings <- function(settings, } settings$calendarTimeOffset <- calendarTimeKnots[1] calendarTimeDesignMatrix <- splines::bs(calendarTimeKnots[1]:calendarTimeKnots[length(calendarTimeKnots)], - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) + knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], + Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))] + ) # Fixing first beta to zero, so dropping first column of design matrix: settings$calendarTimeDesignMatrix <- calendarTimeDesignMatrix[, 2:ncol(calendarTimeDesignMatrix)] - splineCovariateRef <- tibble(covariateId = 300:(300 + length(calendarTimeKnots) - 1), - covariateName = paste("Calendar time spline component", - 1:(length(calendarTimeKnots))), - originalEraId = 0, - originalEraType = "", - originalEraName = "") + splineCovariateRef <- tibble( + covariateId = 300:(300 + length(calendarTimeKnots) - 1), + covariateName = paste( + "Calendar time spline component", + 1:(length(calendarTimeKnots)) + ), + originalEraId = 0, + originalEraType = "", + originalEraName = "" + ) settings$covariateRef <- bind_rows(settings$covariateRef, splineCovariateRef) - calendarTime <- list(calendarTimeKnots = calendarTimeKnots, - covariateIds = splineCovariateRef$covariateId, - allowRegularization = calendarTimeCovariateSettings$allowRegularization, - computeConfidenceIntervals = calendarTimeCovariateSettings$computeConfidenceIntervals) + calendarTime <- list( + calendarTimeKnots = calendarTimeKnots, + covariateIds = splineCovariateRef$covariateId, + allowRegularization = calendarTimeCovariateSettings$allowRegularization, + computeConfidenceIntervals = calendarTimeCovariateSettings$computeConfidenceIntervals + ) settings$metaData$calendarTime <- calendarTime return(settings) } } convertDateToMonth <- function(date) { - return(as.numeric(format(date, '%Y')) * 12 + as.numeric(format(date, '%m')) - 1) + return(as.numeric(format(date, "%Y")) * 12 + as.numeric(format(date, "%m")) - 1) } convertMonthToStartDate <- function(month) { year <- floor(month / 12) month <- floor(month %% 12) + 1 - return(as.Date(sprintf("%s-%s-%s", - year, - month, - 1))) + return(as.Date(sprintf( + "%s-%s-%s", + year, + month, + 1 + ))) } convertMonthToEndDate <- function(month) { @@ -331,17 +364,21 @@ convertMonthToEndDate <- function(month) { month <- floor(month %% 12) + 1 year <- if_else(month == 12, year + 1, year) month <- if_else(month == 12, 1, month + 1) - return(as.Date(sprintf("%s-%s-%s", - year, - month, - 1)) - 1) + return(as.Date(sprintf( + "%s-%s-%s", + year, + month, + 1 + )) - 1) } computeObservedPerMonth <- function(studyPopulation) { observationPeriods <- studyPopulation$cases %>% mutate(endDate = .data$startDate + .data$endDay) %>% - mutate(startMonth = convertDateToMonth(.data$startDate), - endMonth = convertDateToMonth(.data$endDate) + 1) %>% + mutate( + startMonth = convertDateToMonth(.data$startDate), + endMonth = convertDateToMonth(.data$endDate) + 1 + ) %>% select(.data$startMonth, .data$endMonth) months <- full_join( @@ -353,20 +390,27 @@ computeObservedPerMonth <- function(studyPopulation) { group_by(.data$endMonth) %>% summarise(endCount = n()) %>% rename(month = .data$endMonth), - by = "month") %>% - mutate(startCount = ifelse(is.na(.data$startCount), 0, .data$startCount), - endCount = ifelse(is.na(.data$endCount), 0, .data$endCount)) + by = "month" + ) %>% + mutate( + startCount = ifelse(is.na(.data$startCount), 0, .data$startCount), + endCount = ifelse(is.na(.data$endCount), 0, .data$endCount) + ) # Adding months with no starts and ends: months <- months %>% full_join(tibble(month = min(months$month):max(months$month)), by = "month") %>% - mutate(startCount = if_else(is.na(.data$startCount), 0, .data$startCount), - endCount = if_else(is.na(.data$endCount), 0, .data$endCount)) + mutate( + startCount = if_else(is.na(.data$startCount), 0, .data$startCount), + endCount = if_else(is.na(.data$endCount), 0, .data$endCount) + ) months <- months %>% arrange(.data$month) %>% - mutate(cumStarts = cumsum(.data$startCount), - cumEnds = cumsum(.data$endCount)) %>% + mutate( + cumStarts = cumsum(.data$startCount), + cumEnds = cumsum(.data$endCount) + ) %>% mutate(observationPeriodCount = .data$cumStarts - .data$cumEnds) %>% select(.data$month, .data$observationPeriodCount) %>% head(-1) @@ -380,15 +424,16 @@ addEventDependentObservationSettings <- function(settings, if (!eventDependentObservation) { settings$censorModel <- list(model = 0, p = c(0)) } else { - data <- studyPopulation$outcomes %>% group_by(.data$caseId) %>% summarise(outcomeDay = min(.data$outcomeDay)) %>% inner_join(studyPopulation$cases, by = "caseId") %>% - transmute(astart = .data$ageInDays, - aend = .data$ageInDays + .data$endDay + 1, - aevent = .data$ageInDays + .data$outcomeDay + 1, - present = .data$noninformativeEndCensor == 1) + transmute( + astart = .data$ageInDays, + aend = .data$ageInDays + .data$endDay + 1, + aevent = .data$ageInDays + .data$outcomeDay + 1, + present = .data$noninformativeEndCensor == 1 + ) settings$censorModel <- fitModelsAndPickBest(data) settings$metaData$censorModel <- settings$censorModel @@ -429,12 +474,14 @@ addEraCovariateSettings <- function(settings, eraCovariateSettings, sccsData) { if (!covariateSettings$stratifyById) { # Create a single output ID covariateSettings$outputIds <- as.matrix(outputId) - newCovariateRef <- tibble(covariateId = outputId, - covariateName = covariateSettings$label, - originalEraId = 0, - originalEraType = "", - originalEraName = "", - isControlInterval = covariateSettings$isControlInterval) + newCovariateRef <- tibble( + covariateId = outputId, + covariateName = covariateSettings$label, + originalEraId = 0, + originalEraType = "", + originalEraName = "", + isControlInterval = covariateSettings$isControlInterval + ) settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) outputId <- outputId + 1 } else { @@ -447,16 +494,21 @@ addEraCovariateSettings <- function(settings, eraCovariateSettings, sccsData) { warning(paste0("Could not find era with ID ", covariateSettings$eraIds, " in data")) } else { varNames <- varNames %>% - transmute(originalEraId = .data$eraId, - originalEraType = .data$eraType, - originalEraName = .data$eraName, - covariateName = paste(covariateSettings$label, - .data$eraName, - sep = ": "), - isControlInterval = FALSE) - - newCovariateRef <- tibble(covariateId = outputIds, - originalEraId = covariateSettings$eraIds) %>% + transmute( + originalEraId = .data$eraId, + originalEraType = .data$eraType, + originalEraName = .data$eraName, + covariateName = paste(covariateSettings$label, + .data$eraName, + sep = ": " + ), + isControlInterval = FALSE + ) + + newCovariateRef <- tibble( + covariateId = outputIds, + originalEraId = covariateSettings$eraIds + ) %>% inner_join(varNames, by = "originalEraId") settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) } @@ -471,43 +523,55 @@ addEraCovariateSettings <- function(settings, eraCovariateSettings, sccsData) { varNames <- paste(varNames, " day ", startDays, "-", c(endDays[1:length(endDays) - 1], "")) # covariateSettings$outputIds <- matrix(outputIds, ncol = 1) covariateSettings$outputIds <- matrix(outputIds, - ncol = length(covariateSettings$splitPoints) + 1, - byrow = TRUE) - newCovariateRef <- tibble(covariateId = outputIds, - covariateName = varNames, - originaEraId = 0, - originalEraType = "", - originalEraName = "", - isControlInterval = FALSE) + ncol = length(covariateSettings$splitPoints) + 1, + byrow = TRUE + ) + newCovariateRef <- tibble( + covariateId = outputIds, + covariateName = varNames, + originaEraId = 0, + originalEraType = "", + originalEraName = "", + isControlInterval = FALSE + ) settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) } else { outputIds <- outputId:(outputId + (length(covariateSettings$splitPoint) + 1) * length(covariateSettings$eraIds) - 1) outputId <- max(outputIds) + 1 covariateSettings$outputIds <- matrix(outputIds, - ncol = length(covariateSettings$splitPoints) + 1, - byrow = TRUE) + ncol = length(covariateSettings$splitPoints) + 1, + byrow = TRUE + ) if (any(covariateSettings$eraIds %in% eraRef$eraId)) { originalEraId <- rep(covariateSettings$eraIds, - each = length(covariateSettings$splitPoints) + 1) - originalEraType <- eraRef$eraType[match(originalEraId, - eraRef$eraId)] - originalEraName <- eraRef$eraName[match(originalEraId, - eraRef$eraId)] + each = length(covariateSettings$splitPoints) + 1 + ) + originalEraType <- eraRef$eraType[match( + originalEraId, + eraRef$eraId + )] + originalEraName <- eraRef$eraName[match( + originalEraId, + eraRef$eraId + )] originalEraName[originalEraName == ""] <- originalEraId[originalEraName == ""] varNames <- paste(covariateSettings$label, ": ", originalEraName, sep = "") varNames <- paste(varNames, - ", day ", - startDays, - "-", - c(endDays[1:length(endDays) - 1], ""), - sep = "") - - newCovariateRef <- tibble(covariateId = outputIds, - covariateName = varNames, - originalEraId = originalEraId, - originalEraType = originalEraType, - originalEraName = originalEraName, - isControlInterval = FALSE) + ", day ", + startDays, + "-", + c(endDays[1:length(endDays) - 1], ""), + sep = "" + ) + + newCovariateRef <- tibble( + covariateId = outputIds, + covariateName = varNames, + originalEraId = originalEraId, + originalEraType = originalEraType, + originalEraName = originalEraName, + isControlInterval = FALSE + ) settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) } } @@ -535,14 +599,17 @@ cyclicSplineDesign <- function(x, knots, ord = 4) { checkmate::assertInt(ord, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) nk <- length(knots) - if (ord < 2) + if (ord < 2) { stop("order too low") - if (nk < ord) + } + if (nk < ord) { stop("too few knots") + } knots <- sort(knots) k1 <- knots[1] - if (min(x) < k1 || max(x) > knots[nk]) + if (min(x) < k1 || max(x) > knots[nk]) { stop("x out of range") + } xc <- knots[nk - ord + 1] knots <- c(k1 - (knots[nk] - knots[(nk - ord + 1):(nk - 1)]), knots) ind <- x > xc diff --git a/R/DataLoadingSaving.R b/R/DataLoadingSaving.R index 77f5a67..ab4d35b 100644 --- a/R/DataLoadingSaving.R +++ b/R/DataLoadingSaving.R @@ -137,16 +137,21 @@ getDbSccsData <- function(connectionDetails, studyEndDate = "", cdmVersion = "5", maxCasesPerOutcome = 0) { - if (studyStartDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyStartDate) == -1) + if (studyStartDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyStartDate) == -1) { stop("Study start date must have format YYYYMMDD") - if (studyEndDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyEndDate) == -1) + } + if (studyEndDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyEndDate) == -1) { stop("Study end date must have format YYYYMMDD") - if (cdmVersion == "4") + } + if (cdmVersion == "4") { stop("CDM version 4 is no longer supported") - if (!is.null(exposureIds) && length(exposureIds) > 0 && !is.numeric(exposureIds)) + } + if (!is.null(exposureIds) && length(exposureIds) > 0 && !is.numeric(exposureIds)) { stop("exposureIds must be a (vector of) numeric") - if (useCustomCovariates && !is.null(customCovariateIds) && length(customCovariateIds) > 0 && !is.numeric(customCovariateIds)) + } + if (useCustomCovariates && !is.null(customCovariateIds) && length(customCovariateIds) > 0 && !is.numeric(customCovariateIds)) { stop("customCovariateIds must be a (vector of) numeric") + } if (!is.null(oracleTempSchema) && oracleTempSchema != "") { warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema @@ -160,12 +165,13 @@ getDbSccsData <- function(connectionDetails, } else { hasExposureIds <- TRUE DatabaseConnector::insertTable(conn, - tableName = "#exposure_ids", - data = data.frame(concept_id = as.integer(exposureIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema) + tableName = "#exposure_ids", + data = data.frame(concept_id = as.integer(exposureIds)), + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = TRUE, + tempEmulationSchema = tempEmulationSchema + ) } if (!useCustomCovariates || is.null(customCovariateIds) || length(customCovariateIds) == 0) { @@ -173,61 +179,66 @@ getDbSccsData <- function(connectionDetails, } else { hasCustomCovariateIds <- TRUE DatabaseConnector::insertTable(conn, - tableName = "#custom_cov_ids", - data = data.frame(concept_id = as.integer(customCovariateIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema) + tableName = "#custom_cov_ids", + data = data.frame(concept_id = as.integer(customCovariateIds)), + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = TRUE, + tempEmulationSchema = tempEmulationSchema + ) } ParallelLogger::logInfo("Selecting outcomes") sql <- SqlRender::loadRenderTranslateSql("SelectOutcomes.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - outcome_database_schema = outcomeDatabaseSchema, - outcome_table = outcomeTable, - outcome_concept_ids = outcomeIds, - use_nesting_cohort = useNestingCohort, - nesting_cohort_database_schema = nestingCohortDatabaseSchema, - nesting_cohort_table = nestingCohortTable, - nesting_cohort_id = nestingCohortId, - study_start_date = studyStartDate, - study_end_date = studyEndDate) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + outcome_database_schema = outcomeDatabaseSchema, + outcome_table = outcomeTable, + outcome_concept_ids = outcomeIds, + use_nesting_cohort = useNestingCohort, + nesting_cohort_database_schema = nestingCohortDatabaseSchema, + nesting_cohort_table = nestingCohortTable, + nesting_cohort_id = nestingCohortId, + study_start_date = studyStartDate, + study_end_date = studyEndDate + ) DatabaseConnector::executeSql(conn, sql) ParallelLogger::logInfo("Creating cases") sql <- SqlRender::loadRenderTranslateSql("CreateCases.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - use_nesting_cohort = useNestingCohort, - nesting_cohort_database_schema = nestingCohortDatabaseSchema, - nesting_cohort_table = nestingCohortTable, - nesting_cohort_id = nestingCohortId, - study_start_date = studyStartDate, - study_end_date = studyEndDate) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + use_nesting_cohort = useNestingCohort, + nesting_cohort_database_schema = nestingCohortDatabaseSchema, + nesting_cohort_table = nestingCohortTable, + nesting_cohort_id = nestingCohortId, + study_start_date = studyStartDate, + study_end_date = studyEndDate + ) DatabaseConnector::executeSql(conn, sql) DatabaseConnector::insertTable(conn, - tableName = "#outcome_ids", - data = data.frame(outcome_id = as.integer(outcomeIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema) + tableName = "#outcome_ids", + data = data.frame(outcome_id = as.integer(outcomeIds)), + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = TRUE, + tempEmulationSchema = tempEmulationSchema + ) ParallelLogger::logInfo("Counting outcomes") sql <- SqlRender::loadRenderTranslateSql("CountOutcomes.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - use_nesting_cohort = useNestingCohort, - study_start_date = studyStartDate, - study_end_date = studyEndDate) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + use_nesting_cohort = useNestingCohort, + study_start_date = studyStartDate, + study_end_date = studyEndDate + ) DatabaseConnector::executeSql(conn, sql) sql <- "SELECT * FROM #counts;" @@ -246,49 +257,54 @@ getDbSccsData <- function(connectionDetails, } if (sampledCases) { sql <- SqlRender::loadRenderTranslateSql("SampleCases.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - max_cases_per_outcome = maxCasesPerOutcome) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + max_cases_per_outcome = maxCasesPerOutcome + ) DatabaseConnector::executeSql(conn, sql) } } ParallelLogger::logInfo("Creating eras") sql <- SqlRender::loadRenderTranslateSql("CreateEras.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - outcome_database_schema = outcomeDatabaseSchema, - outcome_table = outcomeTable, - outcome_concept_ids = outcomeIds, - exposure_database_schema = exposureDatabaseSchema, - exposure_table = exposureTable, - use_nesting_cohort = useNestingCohort, - use_custom_covariates = useCustomCovariates, - custom_covariate_database_schema = customCovariateDatabaseSchema, - custom_covariate_table = customCovariateTable, - has_exposure_ids = hasExposureIds, - has_custom_covariate_ids = hasCustomCovariateIds, - delete_covariates_small_count = deleteCovariatesSmallCount, - study_start_date = studyStartDate, - study_end_date = studyEndDate, - sampled_cases = sampledCases) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + outcome_database_schema = outcomeDatabaseSchema, + outcome_table = outcomeTable, + outcome_concept_ids = outcomeIds, + exposure_database_schema = exposureDatabaseSchema, + exposure_table = exposureTable, + use_nesting_cohort = useNestingCohort, + use_custom_covariates = useCustomCovariates, + custom_covariate_database_schema = customCovariateDatabaseSchema, + custom_covariate_table = customCovariateTable, + has_exposure_ids = hasExposureIds, + has_custom_covariate_ids = hasCustomCovariateIds, + delete_covariates_small_count = deleteCovariatesSmallCount, + study_start_date = studyStartDate, + study_end_date = studyEndDate, + sampled_cases = sampledCases + ) DatabaseConnector::executeSql(conn, sql) ParallelLogger::logInfo("Fetching data from server") sccsData <- Andromeda::andromeda() sql <- SqlRender::loadRenderTranslateSql("QueryCases.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - sampled_cases = sampledCases) - DatabaseConnector::querySqlToAndromeda(connection = conn, - sql = sql, - andromeda = sccsData, - andromedaTableName = "cases", - snakeCaseToCamelCase = TRUE) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + sampled_cases = sampledCases + ) + DatabaseConnector::querySqlToAndromeda( + connection = conn, + sql = sql, + andromeda = sccsData, + andromedaTableName = "cases", + snakeCaseToCamelCase = TRUE + ) ParallelLogger::logDebug("Fetched ", sccsData$cases %>% count() %>% pull(), " cases from server") @@ -300,50 +316,60 @@ getDbSccsData <- function(connectionDetails, if (countNegativeAges > 0) { warning("There are ", countNegativeAges, " cases with negative ages. Setting their starting age to 0. Please review your data.") sccsData$cases <- sccsData$cases %>% - mutate(ageInDays = case_when(.data$ageInDays < 0 ~ 0, - TRUE ~ .data$ageInDays)) + mutate(ageInDays = case_when( + .data$ageInDays < 0 ~ 0, + TRUE ~ .data$ageInDays + )) } sql <- SqlRender::loadRenderTranslateSql("QueryEras.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema) - DatabaseConnector::querySqlToAndromeda(connection = conn, - sql = sql, - andromeda = sccsData, - andromedaTableName = "eras", - snakeCaseToCamelCase = TRUE) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::querySqlToAndromeda( + connection = conn, + sql = sql, + andromeda = sccsData, + andromedaTableName = "eras", + snakeCaseToCamelCase = TRUE + ) sql <- "SELECT era_type, era_id, era_name FROM #era_ref" sql <- SqlRender::translate(sql = sql, targetDialect = connectionDetails$dbms, tempEmulationSchema = tempEmulationSchema) - DatabaseConnector::querySqlToAndromeda(connection = conn, - sql = sql, - andromeda = sccsData, - andromedaTableName = "eraRef", - snakeCaseToCamelCase = TRUE) + DatabaseConnector::querySqlToAndromeda( + connection = conn, + sql = sql, + andromeda = sccsData, + andromedaTableName = "eraRef", + snakeCaseToCamelCase = TRUE + ) # Delete temp tables sql <- SqlRender::loadRenderTranslateSql("RemoveTempTables.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - study_start_date = studyStartDate, - study_end_date = studyEndDate, - sampled_cases = sampledCases, - has_exposure_ids = hasExposureIds, - use_nesting_cohort = useNestingCohort, - has_custom_covariate_ids = hasCustomCovariateIds) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + study_start_date = studyStartDate, + study_end_date = studyEndDate, + sampled_cases = sampledCases, + has_exposure_ids = hasExposureIds, + use_nesting_cohort = useNestingCohort, + has_custom_covariate_ids = hasCustomCovariateIds + ) DatabaseConnector::executeSql(conn, sql, progressBar = FALSE, reportOverallTime = FALSE) - if (sampledCases) { + if (sampledCases) { sampledCounts <- sccsData$eras %>% filter(.data$eraType == "hoi") %>% inner_join(sccsData$cases, by = "caseId") %>% group_by(.data$eraId) %>% - summarise(outcomeSubjects = n_distinct(.data$personId), - outcomeEvents = count(), - outcomeObsPeriods = n_distinct(.data$observationPeriodId), - .groups = "drop_last") %>% + summarise( + outcomeSubjects = n_distinct(.data$personId), + outcomeEvents = count(), + outcomeObsPeriods = n_distinct(.data$observationPeriodId), + .groups = "drop_last" + ) %>% rename(outcomeId = .data$eraId) %>% mutate(description = "Random sample") %>% collect() @@ -353,9 +379,11 @@ getDbSccsData <- function(connectionDetails, } } - attr(sccsData, "metaData") <- list(exposureIds = exposureIds, - outcomeIds = outcomeIds, - attrition = outcomeCounts) + attr(sccsData, "metaData") <- list( + exposureIds = exposureIds, + outcomeIds = outcomeIds, + attrition = outcomeCounts + ) class(sccsData) <- "SccsData" attr(class(sccsData), "package") <- "SelfControlledCaseSeries" diff --git a/R/Diagnostics.R b/R/Diagnostics.R index b966453..ab67e9e 100644 --- a/R/Diagnostics.R +++ b/R/Diagnostics.R @@ -17,15 +17,17 @@ computeOutcomeRatePerMonth <- function(studyPopulation) { observationPeriodCounts <- computeObservedPerMonth(studyPopulation) outcomeCounts <- studyPopulation$outcomes %>% - inner_join(studyPopulation$cases , by = "caseId") %>% + inner_join(studyPopulation$cases, by = "caseId") %>% transmute(month = convertDateToMonth(.data$startDate + .data$outcomeDay)) %>% group_by(.data$month) %>% summarise(outcomeCount = n()) data <- observationPeriodCounts %>% inner_join(outcomeCounts, by = "month") %>% mutate(rate = .data$outcomeCount / .data$observationPeriodCount) %>% - mutate(monthStartDate = convertMonthToStartDate(.data$month), - monthEndDate = convertMonthToEndDate(.data$month)) + mutate( + monthStartDate = convertMonthToStartDate(.data$month), + monthEndDate = convertMonthToEndDate(.data$month) + ) return(data) } @@ -41,14 +43,14 @@ adjustOutcomeRatePerMonth <- function(data, sccsModel) { calendarTime[calendarTime < calendarTimeKnots[1]] <- calendarTimeKnots[1] calendarTime[calendarTime > calendarTimeKnots[length(calendarTimeKnots)]] <- calendarTimeKnots[length(calendarTimeKnots)] calendarTimeDesignMatrix <- splines::bs(calendarTime, - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) + knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], + Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))] + ) logRr <- apply(calendarTimeDesignMatrix %*% splineCoefs, 1, sum) logRr <- logRr - mean(logRr) data$calendarTimeRr <- exp(logRr) data <- data %>% mutate(adjustedRate = .data$adjustedRate / .data$calendarTimeRr) - } if (hasSeasonality(sccsModel)) { @@ -63,9 +65,12 @@ adjustOutcomeRatePerMonth <- function(data, sccsModel) { data <- data %>% mutate(monthOfYear = .data$month %% 12 + 1) %>% - inner_join(tibble(monthOfYear = season, - seasonRr = exp(logRr)), - by = "monthOfYear") + inner_join(tibble( + monthOfYear = season, + seasonRr = exp(logRr) + ), + by = "monthOfYear" + ) data <- data %>% mutate(adjustedRate = .data$adjustedRate / .data$seasonRr) @@ -105,17 +110,19 @@ computeTimeStability <- function(studyPopulation, sccsModel = NULL, maxRatio = 1 data <- adjustOutcomeRatePerMonth(data, sccsModel) } computeTwoSidedP <- function(observed, expected) { - pUpperBound = 1 - ppois(observed, expected * maxRatio, lower.tail = TRUE) - pLowerBound = 1 - ppois(observed, expected / maxRatio, lower.tail = FALSE) + pUpperBound <- 1 - ppois(observed, expected * maxRatio, lower.tail = TRUE) + pLowerBound <- 1 - ppois(observed, expected / maxRatio, lower.tail = FALSE) return(min(1, 2 * pmin(pUpperBound, pLowerBound))) } # Season and calendar time splines lack intercept, so need to compute expected count in indirect way: - meanAdjustedRate <- sum(data$adjustedRate * data$observationPeriodCount ) / sum(data$observationPeriodCount) + meanAdjustedRate <- sum(data$adjustedRate * data$observationPeriodCount) / sum(data$observationPeriodCount) data <- data %>% mutate(expected = .data$outcomeCount * meanAdjustedRate / .data$adjustedRate) %>% - mutate(p = computeTwoSidedP(.data$outcomeCount, .data$expected), - alpha = !!alpha / n()) %>% + mutate( + p = computeTwoSidedP(.data$outcomeCount, .data$expected), + alpha = !!alpha / n() + ) %>% mutate(stable = .data$p >= .data$alpha) # print(data[50:100, ], n = 35) # sum(!data$stable) diff --git a/R/EventDepObservation.R b/R/EventDepObservation.R index 22557b3..e032bfe 100644 --- a/R/EventDepObservation.R +++ b/R/EventDepObservation.R @@ -4,15 +4,14 @@ # One major modification: removed possibility to specify covariates for censoring models fitModelsAndPickBest <- function(data) { - - fitCensorModel <- function(model, data){ + fitCensorModel <- function(model, data) { # This function gives a matrix created by multiplying # (Pointwise multiplication) a Matrix M by each column of Matrix S - Yproduct <- function(S, M){ - product <- matrix(NA, nrow(S), ncol(S)*ncol(M)) + Yproduct <- function(S, M) { + product <- matrix(NA, nrow(S), ncol(S) * ncol(M)) for (i in 1:ncol(S)) { - product[,(1 + ncol(M)*(i-1)):(ncol(M)*i)] <- S[,i]*M + product[, (1 + ncol(M) * (i - 1)):(ncol(M) * i)] <- S[, i] * M } return(product) } @@ -20,7 +19,7 @@ fitModelsAndPickBest <- function(data) { #--------------------------------------------------------# # Exponential- Weibull (Age) mixture Model # #--------------------------------------------------------# - mod_ewad2<-function(p, astart, aevent, aend, present){ + mod_ewad2 <- function(p, astart, aevent, aend, present) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) # @@ -36,24 +35,24 @@ fitModelsAndPickBest <- function(data) { # gamma0 <- Dmatrixstartlog%*%p[((5*(ncol(Dmatrix))) + 1):(7*(ncol(Dmatrix)))] # log(nu(t,y)) gamma0 <- p[6] + p[7] * log(astart) - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu - - lik<- ((1-present)*log(pi0*lamA*exp(-lamA*(aend-aevent))+ - (1-pi0)*nu0*lamB*((aend*lamB)^(nu0-1))*exp(-((aend*lamB)^nu0-(aevent*lamB)^nu0))) + - present *log(pi0*exp(-lamA*(aend-aevent))+ - (1-pi0)*exp(-((aend*lamB)^nu0-(aevent*lamB)^nu0)))) - l<-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) - return (l) + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu + + lik <- ((1 - present) * log(pi0 * lamA * exp(-lamA * (aend - aevent)) + + (1 - pi0) * nu0 * lamB * ((aend * lamB)^(nu0 - 1)) * exp(-((aend * lamB)^nu0 - (aevent * lamB)^nu0))) + + present * log(pi0 * exp(-lamA * (aend - aevent)) + + (1 - pi0) * exp(-((aend * lamB)^nu0 - (aevent * lamB)^nu0)))) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) + return(l) } #--------------------------------------------------------# # Exponential- Weibull (Interval) mixture Model # #--------------------------------------------------------# - mod_ewid2 <-function(p, aevent, aend, present, Dmatrix){ + mod_ewid2 <- function(p, aevent, aend, present, Dmatrix) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) @@ -68,28 +67,27 @@ fitModelsAndPickBest <- function(data) { gamma0 <- p[6] + p[7] * log(aevent) - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu - - int <- aend-aevent - lik<- - - ((1-present)*log(pi0*lamA*exp(-lamA*int)+ - (1-pi0)*nu0*lamB*((int*lamB)^(nu0-1))*exp(-((int*lamB)^nu0))) + + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - present *log(pi0*exp(-lamA*int)+ - (1-pi0)*exp(-((int*lamB)^nu0)))) - l<-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) + int <- aend - aevent + lik <- + ((1 - present) * log(pi0 * lamA * exp(-lamA * int) + + (1 - pi0) * nu0 * lamB * ((int * lamB)^(nu0 - 1)) * exp(-((int * lamB)^nu0))) + + + present * log(pi0 * exp(-lamA * int) + + (1 - pi0) * exp(-((int * lamB)^nu0)))) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) l } #--------------------------------------------------------# # Exponential- Gamma (Age) mixture Model # #--------------------------------------------------------# - mod_egad2<-function(p, astart, aevent, aend, present, Dmatrix){ + mod_egad2 <- function(p, astart, aevent, aend, present, Dmatrix) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) @@ -107,27 +105,27 @@ fitModelsAndPickBest <- function(data) { gamma0 <- p[6] + p[7] * log(astart) - lamA <-exp(-thetaA) # 1/rho in the paper - lamB <-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0 <-exp(gamma0) # nu + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - rate0 <-nu0*lamB + rate0 <- nu0 * lamB - lik<-((1-present)*log(pi0*lamA*exp(-lamA*(aend-aevent))+ - (1-pi0)*dgamma(aend,shape=nu0,rate=rate0)/ifelse(pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F)==0,0.000000001, pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F))) + - present *log(pi0*exp(-lamA*(aend-aevent))+ - (1-pi0)*pgamma(aend,shape=nu0,rate=rate0,lower.tail=F)/ifelse(pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F)==0, 0.000000001, pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F)))) + lik <- ((1 - present) * log(pi0 * lamA * exp(-lamA * (aend - aevent)) + + (1 - pi0) * dgamma(aend, shape = nu0, rate = rate0) / ifelse(pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F) == 0, 0.000000001, pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F))) + + present * log(pi0 * exp(-lamA * (aend - aevent)) + + (1 - pi0) * pgamma(aend, shape = nu0, rate = rate0, lower.tail = F) / ifelse(pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F) == 0, 0.000000001, pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F)))) - l <-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) l } #--------------------------------------------------------# # Exponential- Gamma (Interval) mixture Model # #--------------------------------------------------------# - mod_egid2<-function(p, aevent, aend, present, Dmatrix){ + mod_egid2 <- function(p, aevent, aend, present, Dmatrix) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) @@ -141,52 +139,63 @@ fitModelsAndPickBest <- function(data) { # gamma0 <- Dmatrixeventlog%*%p[((5*(ncol(Dmatrix))) + 1):(7*(ncol(Dmatrix)))] # log(nu(t,y)) gamma0 <- p[6] + p[7] * log(aevent) - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - rate0 <-nu0*lamB + rate0 <- nu0 * lamB int <- aend - aevent - lik<-((1-present)*log(pi0*lamA*exp(-lamA*int)+ - (1-pi0)*dgamma(int,shape=nu0,rate=rate0)) + - present *log(pi0*exp(-lamA*int)+ - (1-pi0)*pgamma(int,shape=nu0,rate=rate0,lower.tail=F))) + lik <- ((1 - present) * log(pi0 * lamA * exp(-lamA * int) + + (1 - pi0) * dgamma(int, shape = nu0, rate = rate0)) + + present * log(pi0 * exp(-lamA * int) + + (1 - pi0) * pgamma(int, shape = nu0, rate = rate0, lower.tail = F))) - l<-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) l } npar <- 7 - p0 <- rep(0.1, times=npar) # inital values - result <- tryCatch({ - if (model == 1){ - fit <- nlm(mod_ewad2, p=p0, astart=data$astart/365.25, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) - } else if (model == 2){ - fit <- nlm(mod_ewid2, p=p0, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) - } else if (model == 3){ - fit <- nlm(mod_egad2, p=p0, astart=data$astart/365.25, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) - } else { - fit <- nlm(mod_egid2, p=p0, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) + p0 <- rep(0.1, times = npar) # inital values + result <- tryCatch( + { + if (model == 1) { + fit <- nlm(mod_ewad2, + p = p0, astart = data$astart / 365.25, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } else if (model == 2) { + fit <- nlm(mod_ewid2, + p = p0, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } else if (model == 3) { + fit <- nlm(mod_egad2, + p = p0, astart = data$astart / 365.25, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } else { + fit <- nlm(mod_egid2, + p = p0, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } + list(model = model, p = fit$estimate, aic = 2 * npar + fit$minimum) + }, + error = function(e) { + missing(e) # suppresses R CMD check note + list(model = model, p = rep(0, npar), aic = 999999999) } - list(model = model, p = fit$estimate, aic = 2*npar + fit$minimum) - }, error = function(e) { - missing(e) # suppresses R CMD check note - list(model = model, p = rep(0,npar), aic = 999999999) - }) + ) return(result) } ParallelLogger::logInfo("Fitting censoring models") cluster <- ParallelLogger::makeCluster(4) results <- ParallelLogger::clusterApply(cluster, 1:4, fitCensorModel, data) ParallelLogger::stopCluster(cluster) - for (i in 1:4){ + for (i in 1:4) { if (results[[i]]$aic == 999999999) { if (results[[i]]$model == 1) { warning("Could not fit exponential - Weibull (Age) mixture Model") diff --git a/R/ModelFitting.R b/R/ModelFitting.R index 632fa34..384f4fc 100644 --- a/R/ModelFitting.R +++ b/R/ModelFitting.R @@ -50,22 +50,27 @@ #' @export fitSccsModel <- function(sccsIntervalData, prior = createPrior("laplace", useCrossValidation = TRUE), - control = createControl(cvType = "auto", - selectorType = "byPid", - startingVariance = 0.1, - seed = 1, - resetCoefficients = TRUE, - noiseLevel = "quiet"), + control = createControl( + cvType = "auto", + selectorType = "byPid", + startingVariance = 0.1, + seed = 1, + resetCoefficients = TRUE, + noiseLevel = "quiet" + ), profileGrid = NULL, profileBounds = c(log(0.1), log(10))) { - if (!is.null(profileGrid) && !is.null(profileBounds)) + if (!is.null(profileGrid) && !is.null(profileBounds)) { stop("Specify either profileGrid or profileBounds") + } ParallelLogger::logTrace("Fitting SCCS model") metaData <- attr(sccsIntervalData, "metaData") if (!is.null(metaData$error)) { - result <- list(status = metaData$error, - metaData = metaData) + result <- list( + status = metaData$error, + metaData = metaData + ) class(result) <- "sccsModel" return(result) } @@ -126,16 +131,20 @@ fitSccsModel <- function(sccsIntervalData, prior$exclude <- intersect(nonRegularized, covariateIds) } cyclopsData <- Cyclops::convertToCyclopsData(sccsIntervalData$outcomes, - sccsIntervalData$covariates, - modelType = "cpr", - addIntercept = FALSE, - checkRowIds = FALSE, - quiet = TRUE) - fit <- tryCatch({ - Cyclops::fitCyclopsModel(cyclopsData, prior = prior, control = control) - }, error = function(e) { - e$message - }) + sccsIntervalData$covariates, + modelType = "cpr", + addIntercept = FALSE, + checkRowIds = FALSE, + quiet = TRUE + ) + fit <- tryCatch( + { + Cyclops::fitCyclopsModel(cyclopsData, prior = prior, control = control) + }, + error = function(e) { + e$message + } + ) if (is.character(fit)) { coefficients <- c(0) estimates <- NULL @@ -145,12 +154,14 @@ fitSccsModel <- function(sccsIntervalData, if (!is.null(profileGrid) || !is.null(profileBounds)) { covariateIds <- intersect(needProfile, as.numeric(Cyclops::getCovariateIds(cyclopsData))) getLikelihoodProfile <- function(covariateId) { - logLikelihoodProfile <- Cyclops::getCyclopsProfileLogLikelihood(object = fit, - parm = covariateId, - x = profileGrid, - bounds = profileBounds, - tolerance = 0.1, - includePenalty = TRUE) + logLikelihoodProfile <- Cyclops::getCyclopsProfileLogLikelihood( + object = fit, + parm = covariateId, + x = profileGrid, + bounds = profileBounds, + tolerance = 0.1, + includePenalty = TRUE + ) return(logLikelihoodProfile) } logLikelihoodProfiles <- lapply(covariateIds, getLikelihoodProfile) @@ -176,26 +187,31 @@ fitSccsModel <- function(sccsIntervalData, estimates$logUb95 <- NA estimates$seLogRr <- NA } else { - ci <- tryCatch({ - result <- confint(fit, parm = intersect(needCi, estimates$covariateId), includePenalty = TRUE) - attr(result, "dimnames")[[1]] <- 1:length(attr(result, "dimnames")[[1]]) - result <- as.data.frame(result) - rownames(result) <- NULL - result - }, error = function(e) { - missing(e) # suppresses R CMD check note - data.frame(covariate = 0, logLb95 = 0, logUb95 = 0) - }) + ci <- tryCatch( + { + result <- confint(fit, parm = intersect(needCi, estimates$covariateId), includePenalty = TRUE) + attr(result, "dimnames")[[1]] <- 1:length(attr(result, "dimnames")[[1]]) + result <- as.data.frame(result) + rownames(result) <- NULL + result + }, + error = function(e) { + missing(e) # suppresses R CMD check note + data.frame(covariate = 0, logLb95 = 0, logUb95 = 0) + } + ) names(ci)[names(ci) == "2.5 %"] <- "logLb95" names(ci)[names(ci) == "97.5 %"] <- "logUb95" ci$evaluations <- NULL estimates <- merge(estimates, ci, by.x = "covariateId", by.y = "covariate", all.x = TRUE) - estimates$seLogRr <- (estimates$logUb95 - estimates$logLb95)/(2*qnorm(0.975)) + estimates$seLogRr <- (estimates$logUb95 - estimates$logLb95) / (2 * qnorm(0.975)) for (param in intersect(needCi, estimates$covariateId)) { - llNull <- Cyclops::getCyclopsProfileLogLikelihood(object = fit, - parm = param, - x = 0, - includePenalty = FALSE)$value + llNull <- Cyclops::getCyclopsProfileLogLikelihood( + object = fit, + parm = param, + x = 0, + includePenalty = FALSE + )$value estimates$llr[estimates$covariateId == param] <- fit$log_likelihood - llNull } } @@ -206,12 +222,14 @@ fitSccsModel <- function(sccsIntervalData, } } } - result <- list(estimates = estimates, - priorVariance = priorVariance, - logLikelihood = logLikelihood, - logLikelihoodProfiles = logLikelihoodProfiles, - status = status, - metaData = metaData) + result <- list( + estimates = estimates, + priorVariance = priorVariance, + logLikelihood = logLikelihood, + logLikelihoodProfiles = logLikelihoodProfiles, + status = status, + metaData = metaData + ) class(result) <- "SccsModel" delta <- Sys.time() - start ParallelLogger::logInfo(paste("Fitting the model took", signif(delta, 3), attr(delta, "units"))) @@ -249,13 +267,15 @@ print.SccsModel <- function(x, ...) { } else { writeLines("Estimates:") d <- x$estimates - output <- tibble(d$covariateName, - d$covariateId, - exp(d$logRr), - exp(d$logLb95), - exp(d$logUb95), - d$logRr, - d$seLogRr) + output <- tibble( + d$covariateName, + d$covariateId, + exp(d$logRr), + exp(d$logLb95), + exp(d$logUb95), + d$logRr, + d$seLogRr + ) colnames(output) <- c("Name", "ID", "Estimate", "LB95CI", "UB95CI", "LogRr", "SeLogRr") print(output, n = 25) @@ -273,31 +293,36 @@ print.SccsModel <- function(x, ...) { #' #' @export getModel <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } d <- sccsModel$estimates # d$seLogRr <- (d$logUb95 - d$logRr)/qnorm(0.975) - output <- tibble(d$covariateName, - d$covariateId, - exp(d$logRr), - exp(d$logLb95), - exp(d$logUb95), - d$logRr, - d$seLogRr, - d$originalEraId, - d$originalEraType, - d$originalEraName) - colnames(output) <- c("name", - "id", - "estimate", - "lb95Ci", - "ub95Ci", - "logRr", - "seLogRr", - "originalEraId", - "originalEraType", - "originalEraName") + output <- tibble( + d$covariateName, + d$covariateId, + exp(d$logRr), + exp(d$logLb95), + exp(d$logUb95), + d$logRr, + d$seLogRr, + d$originalEraId, + d$originalEraType, + d$originalEraName + ) + colnames(output) <- c( + "name", + "id", + "estimate", + "lb95Ci", + "ub95Ci", + "logRr", + "seLogRr", + "originalEraId", + "originalEraType", + "originalEraName" + ) return(output) } @@ -311,8 +336,9 @@ getModel <- function(sccsModel) { #' #' @export hasAgeEffect <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } estimates <- sccsModel$estimates return(any(estimates$covariateId >= 100 & estimates$covariateId < 200)) } @@ -327,8 +353,9 @@ hasAgeEffect <- function(sccsModel) { #' #' @export hasSeasonality <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } estimates <- sccsModel$estimates return(any(estimates$covariateId >= 200 & estimates$covariateId < 300)) } @@ -343,8 +370,9 @@ hasSeasonality <- function(sccsModel) { #' #' @export hasCalendarTimeEffect <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } estimates <- sccsModel$estimates return(any(estimates$covariateId >= 300 & estimates$covariateId < 400)) } diff --git a/R/Plots.R b/R/Plots.R index 93cc8f3..30606ec 100644 --- a/R/Plots.R +++ b/R/Plots.R @@ -40,9 +40,9 @@ plotAgeSpans <- function(studyPopulation, arrange(.data$startAge, .data$endAge) %>% mutate(rank = row_number()) - ageLabels <- floor(min(cases$startAge)/365.25):ceiling(max(cases$endAge)/365.25) + ageLabels <- floor(min(cases$startAge) / 365.25):ceiling(max(cases$endAge) / 365.25) if (length(ageLabels) > 10) { - ageLabels <- 10 * (floor(min(cases$startAge)/3652.5):floor(max(cases$endAge)/3652.5)) + ageLabels <- 10 * (floor(min(cases$startAge) / 3652.5):floor(max(cases$endAge) / 3652.5)) } ageBreaks <- ageLabels * 365.25 if (nrow(cases) > maxPersons) { @@ -56,23 +56,26 @@ plotAgeSpans <- function(studyPopulation, ggplot2::geom_errorbarh(color = rgb(0, 0, 0.8), alpha = 0.8) + ggplot2::scale_x_continuous("Age (years)", breaks = ageBreaks, labels = ageLabels) + ggplot2::scale_y_continuous("Case rank") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } # fileName <- "S:/temp/plot.png" - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -104,17 +107,19 @@ plotAgeSpans <- function(studyPopulation, plotEventObservationDependence <- function(studyPopulation, title = NULL, fileName = NULL) { - - outcomes <- studyPopulation$outcomes %>% group_by(.data$caseId) %>% summarise(outcomeDay = min(.data$outcomeDay), .groups = "drop_last") %>% inner_join(studyPopulation$cases, by = "caseId") %>% - transmute(daysFromEvent = .data$endDay - .data$outcomeDay, - censoring = case_when(.data$noninformativeEndCensor == 1 ~ "Uncensored", - TRUE ~ "Censored")) + transmute( + daysFromEvent = .data$endDay - .data$outcomeDay, + censoring = case_when( + .data$noninformativeEndCensor == 1 ~ "Uncensored", + TRUE ~ "Censored" + ) + ) - ageLabels <- 0:ceiling(max(outcomes$daysFromEvent)/365.25) + ageLabels <- 0:ceiling(max(outcomes$daysFromEvent) / 365.25) ageBreaks <- ageLabels * 365.25 @@ -125,23 +130,26 @@ plotEventObservationDependence <- function(studyPopulation, ggplot2::geom_histogram(binwidth = 30.5, fill = rgb(0, 0, 0.8), alpha = 0.8) + ggplot2::scale_x_continuous("Years from event", breaks = ageBreaks, labels = ageLabels) + ggplot2::scale_y_continuous("Frequency") + - ggplot2::facet_grid(censoring~., scales = "free_y") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::facet_grid(censoring ~ ., scales = "free_y") + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -172,7 +180,6 @@ plotExposureCentered <- function(studyPopulation, highlightExposedEvents = TRUE, title = NULL, fileName = NULL) { - if (is.null(exposureEraId)) { exposureEraId <- attr(sccsData, "metaData")$exposureIds if (length(exposureEraId) != 1) { @@ -187,8 +194,10 @@ plotExposureCentered <- function(studyPopulation, filter(.data$eraId == exposureEraId & .data$eraType == "rx") %>% group_by(.data$caseId) %>% inner_join(cases, by = "caseId", copy = TRUE) %>% - mutate(startDay = .data$startDay - .data$offset, - endDay = .data$endDay - .data$offset) %>% + mutate( + startDay = .data$startDay - .data$offset, + endDay = .data$endDay - .data$offset + ) %>% filter(.data$startDay >= 0, .data$startDay < .data$caseEndDay) %>% collect() @@ -198,9 +207,11 @@ plotExposureCentered <- function(studyPopulation, } firstExposures <- exposures %>% group_by(.data$caseId, .data$caseEndDay) %>% - summarise(startDay = min(.data$startDay, na.rm = TRUE), - endDay = min(.data$endDay, na.rm = TRUE), - .groups = "drop_last") + summarise( + startDay = min(.data$startDay, na.rm = TRUE), + endDay = min(.data$endDay, na.rm = TRUE), + .groups = "drop_last" + ) outcomes <- studyPopulation$outcomes %>% inner_join(firstExposures, by = "caseId") %>% @@ -209,8 +220,10 @@ plotExposureCentered <- function(studyPopulation, exposedoutcomes <- exposures %>% inner_join(outcomes, by = "caseId") %>% - filter(.data$outcomeDay >= .data$startDay, - .data$outcomeDay <= .data$endDay) %>% + filter( + .data$outcomeDay >= .data$startDay, + .data$outcomeDay <= .data$endDay + ) %>% select(.data$caseId, .data$delta) %>% mutate(exposed = 1) @@ -219,50 +232,58 @@ plotExposureCentered <- function(studyPopulation, mutate(exposed = coalesce(.data$exposed, 0)) weeks <- dplyr::tibble(number = -26:25) %>% - mutate(start = .data$number*7, - end = .data$number*7 + 7) + mutate( + start = .data$number * 7, + end = .data$number * 7 + 7 + ) events <- weeks %>% full_join(select(outcomes, .data$delta, .data$exposed), by = character()) %>% filter(.data$delta >= .data$start, .data$delta < .data$end) %>% group_by(.data$number, .data$start, .data$end) %>% - summarise(eventsExposed = sum(.data$exposed), - eventsUnexposed = n() - sum(.data$exposed), - .groups = "drop_last") + summarise( + eventsExposed = sum(.data$exposed), + eventsUnexposed = n() - sum(.data$exposed), + .groups = "drop_last" + ) observed <- weeks %>% full_join(transmute(firstExposures, startDelta = -.data$startDay, endDelta = .data$caseEndDay - .data$startDay), by = character()) %>% filter(.data$endDelta >= .data$start, .data$startDelta < .data$end) %>% group_by(.data$number, .data$start, .data$end) %>% - summarise(observed = n(), - .groups = "drop_last") + summarise( + observed = n(), + .groups = "drop_last" + ) if (highlightExposedEvents) { events <- events %>% transmute(.data$start, - .data$end, - type = "Events", - count1 = .data$eventsUnexposed, - count2 = .data$eventsExposed) + .data$end, + type = "Events", + count1 = .data$eventsUnexposed, + count2 = .data$eventsExposed + ) } else { events <- events %>% transmute(.data$start, - .data$end, - type = "Events", - count1 = .data$eventsUnexposed + .data$eventsExposed, - count2 = NA) - + .data$end, + type = "Events", + count1 = .data$eventsUnexposed + .data$eventsExposed, + count2 = NA + ) } observed <- observed %>% transmute(.data$start, - .data$end, - type = "Subjects under observation", - count1 = .data$observed, - count2 = NA) + .data$end, + type = "Subjects under observation", + count1 = .data$observed, + count2 = NA + ) data <- bind_rows(events, observed) - breaks <- seq(-150,150, 30) + breaks <- seq(-150, 150, 30) theme <- ggplot2::element_text(colour = "#000000", size = 12) themeRA <- ggplot2::element_text(colour = "#000000", size = 12, hjust = 1) plot <- ggplot2::ggplot(data, ggplot2::aes(x = .data$start, xmin = .data$start, xmax = .data$end, ymax = .data$count1, ymin = 0)) + @@ -271,23 +292,26 @@ plotExposureCentered <- function(studyPopulation, ggplot2::geom_vline(xintercept = 0, colour = "#000000", lty = 1, size = 1) + ggplot2::scale_x_continuous("Days since first exposure start", breaks = breaks, labels = breaks) + ggplot2::scale_y_continuous("Count") + - ggplot2::facet_grid(type~., scales = "free_y") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::facet_grid(type ~ ., scales = "free_y") + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -337,23 +361,26 @@ plotEventToCalendarTime <- function(studyPopulation, ggplot2::scale_x_date("Calendar time") + ggplot2::scale_y_continuous("Count", limits = c(0, NA)) + ggplot2::facet_grid(.data$type ~ ., scales = "free_y") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) # plot if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 1 + (2 * length(levels)), dpi = 400) + } return(plot) } @@ -377,8 +404,9 @@ plotAgeEffect <- function(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL) { - if (!hasAgeEffect(sccsModel)) + if (!hasAgeEffect(sccsModel)) { stop("The model does not contain an age effect.") + } estimates <- sccsModel$estimates estimates <- estimates[estimates$covariateId >= 100 & estimates$covariateId < 200, ] @@ -386,16 +414,17 @@ plotAgeEffect <- function(sccsModel, ageKnots <- sccsModel$metaData$age$ageKnots age <- seq(min(ageKnots), max(ageKnots), length.out = 100) ageDesignMatrix <- splines::bs(age, - knots = ageKnots[2:(length(ageKnots) - 1)], - Boundary.knots = ageKnots[c(1, length(ageKnots))]) + knots = ageKnots[2:(length(ageKnots) - 1)], + Boundary.knots = ageKnots[c(1, length(ageKnots))] + ) logRr <- apply(ageDesignMatrix %*% splineCoefs, 1, sum) logRr <- logRr - mean(logRr) rr <- exp(logRr) data <- data.frame(age = age, rr = rr) breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 6, 8, 10) - ageLabels <- floor(min(ageKnots)/365.25):floor(max(ageKnots)/365.25) + ageLabels <- floor(min(ageKnots) / 365.25):floor(max(ageKnots) / 365.25) if (length(ageLabels) > 10) { - ageLabels <- 10 * (floor(min(ageKnots)/3652.5):floor(max(ageKnots)/3652.5)) + ageLabels <- 10 * (floor(min(ageKnots) / 3652.5):floor(max(ageKnots) / 3652.5)) } ageBreaks <- ageLabels * 365.25 theme <- ggplot2::element_text(colour = "#000000", size = 12) @@ -405,26 +434,30 @@ plotAgeEffect <- function(sccsModel, ggplot2::geom_line(color = rgb(0, 0, 0.8), alpha = 0.8, lwd = 1) + ggplot2::scale_x_continuous("Age", breaks = ageBreaks, labels = ageLabels) + ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + limits = rrLim, + trans = "log10", + breaks = breaks, + labels = breaks + ) + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -448,8 +481,9 @@ plotSeasonality <- function(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL) { - if (!hasSeasonality(sccsModel)) + if (!hasSeasonality(sccsModel)) { stop("The model does not contain seasonality.") + } estimates <- sccsModel$estimates estimates <- estimates[estimates$covariateId >= 200 & estimates$covariateId < 300, ] @@ -471,26 +505,30 @@ plotSeasonality <- function(sccsModel, ggplot2::geom_line(color = rgb(0, 0, 0.8), alpha = 0.8, lwd = 1) + ggplot2::scale_x_continuous("Month", breaks = seasonBreaks, labels = seasonBreaks) + ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + limits = rrLim, + trans = "log10", + breaks = breaks, + labels = breaks + ) + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -530,24 +568,27 @@ plotCalendarTimeSpans <- function(studyPopulation, ggplot2::geom_errorbarh(color = rgb(0, 0, 0.8)) + ggplot2::scale_x_date("Calendar time") + ggplot2::scale_y_continuous("Case rank") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major.x = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), - panel.grid.major.y = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major.x = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), + panel.grid.major.y = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } # fileName <- "S:/temp/plot.png" - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -571,8 +612,9 @@ plotCalendarTimeEffect <- function(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL) { - if (!hasCalendarTimeEffect(sccsModel)) + if (!hasCalendarTimeEffect(sccsModel)) { stop("The model does not contain a calendar time effect.") + } estimates <- sccsModel$estimates estimates <- estimates[estimates$covariateId >= 300 & estimates$covariateId < 400, ] @@ -580,8 +622,9 @@ plotCalendarTimeEffect <- function(sccsModel, calendarTimeKnots <- sccsModel$metaData$calendarTime$calendarTimeKnots calendarTime <- seq(min(calendarTimeKnots), max(calendarTimeKnots), length.out = 100) calendarTimeDesignMatrix <- splines::bs(calendarTime, - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) + knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], + Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))] + ) logRr <- apply(calendarTimeDesignMatrix %*% splineCoefs, 1, sum) logRr <- logRr - mean(logRr) rr <- exp(logRr) @@ -594,25 +637,29 @@ plotCalendarTimeEffect <- function(sccsModel, ggplot2::geom_line(color = rgb(0, 0, 0.8), alpha = 0.8, lwd = 1) + ggplot2::scale_x_date("Calendar Time") + ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + limits = rrLim, + trans = "log10", + breaks = breaks, + labels = breaks + ) + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } diff --git a/R/Power.R b/R/Power.R index 6442ccb..1e871ac 100644 --- a/R/Power.R +++ b/R/Power.R @@ -47,8 +47,9 @@ computeMdrr <- function(sccsIntervalData, power = 0.8, twoSided = TRUE, method = "SRL1") { - if (!method %in% c("proportion", "binomial", "SRL1", "SRL2", "ageEffects")) + if (!method %in% c("proportion", "binomial", "SRL1", "SRL2", "ageEffects")) { stop("Method must be either 'proportion', 'binomial', 'SRL1', 'SRL2', or 'ageEffects'.") + } # Check if there is anyone with the exposure at all; nExposed <- sccsIntervalData$covariates %>% @@ -56,12 +57,14 @@ computeMdrr <- function(sccsIntervalData, count() %>% pull() if (nExposed == 0) { - result <- tibble(timeExposed = 0, - timeTotal = 0, - propTimeExposed = 0, - propPopulationExposed = 0, - events = 0, - mdrr = Inf) + result <- tibble( + timeExposed = 0, + timeTotal = 0, + propTimeExposed = 0, + propPopulationExposed = 0, + events = 0, + mdrr = Inf + ) return(result) } @@ -76,9 +79,11 @@ computeMdrr <- function(sccsIntervalData, overall <- sccsIntervalData$outcomes %>% filter(.data$stratumId %in% exposedStratumIds) %>% - summarise(time = sum(.data$time, na.rm = TRUE), - observationPeriods = n_distinct(.data$stratumId), - events = sum(.data$y, na.rm = TRUE)) %>% + summarise( + time = sum(.data$time, na.rm = TRUE), + observationPeriods = n_distinct(.data$stratumId), + events = sum(.data$y, na.rm = TRUE) + ) %>% collect() exposed <- sccsIntervalData$outcomes %>% @@ -101,57 +106,57 @@ computeMdrr <- function(sccsIntervalData, if (method == "distribution") { # expression 5 - computePower <- function(p, z, r, n, alpha) - { + computePower <- function(p, z, r, n, alpha) { zbnum <- log(p) * sqrt(n * p * r * (1 - r)) - z * sqrt(p) zbden <- p * r + 1 - r zb <- zbnum / zbden power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } if (method == "binomial") { # expression 6 - computePower <- function(p, z, r, n, alpha) - { - pi <- p*r/(p*r + 1 - r) + computePower <- function(p, z, r, n, alpha) { + pi <- p * r / (p * r + 1 - r) tAlt <- asin(sqrt(pi)) tNull <- asin(sqrt(r)) zb <- sqrt(n * 4 * (tAlt - tNull)^2) - z power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } if (method == "SRL1") { # expression 7 - computePowerSrl <- function(b, z, r, n, alpha) - { + computePowerSrl <- function(b, z, r, n, alpha) { A <- 2 * ((exp(b) * r / (exp(b) * r + 1 - r)) * b - log(exp(b) * r + 1 - r)) B <- b^2 / A * exp(b) * r * (1 - r) / (exp(b) * r + 1 - r)^2 zb <- (sqrt(n * A) - z) / sqrt(B) power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } if (method == "SRL2") { # expression 8 - computePowerSrl <- function(b, z, r, n, alpha) - { + computePowerSrl <- function(b, z, r, n, alpha) { A <- 2 * pr * (exp(b) * r + 1 - r) / (1 + pr * r * (exp(b) - 1)) * ((exp(b) * r / (exp(b) * r + 1 - r)) * b - log(exp(b) * r + 1 - r)) B <- b^2 / A * pr * (exp(b) * r + 1 - r) / (1 + pr * r * (exp(b) - 1)) * exp(b) * r * (1 - r) / (exp(b) * r + 1 - r)^2 zb <- (sqrt(n * A) - z) / sqrt(B) power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } @@ -167,8 +172,7 @@ computeMdrr <- function(sccsIntervalData, M <- L + (H - L) / 2 if (method %in% c("SRL1", "SRL2")) { powerM <- computePowerSrl(M, z, r, n, alpha) - } - else { + } else { powerM <- computePower(exp(M), z, r, n, alpha) } d <- powerM - power @@ -179,18 +183,21 @@ computeMdrr <- function(sccsIntervalData, } else { return(M) } - if (M == 0 || M == 10) + if (M == 0 || M == 10) { return(M) + } } } mdLogRr <- binarySearch(z, r, n, power, alpha) mdrr <- exp(mdLogRr) - result <- tibble(timeExposed = timeExposed, - timeTotal = timeTotal, - propTimeExposed = round(r, 4), - propPopulationExposed = round(pr, 4), - events = n, - mdrr = round(mdrr, 4)) + result <- tibble( + timeExposed = timeExposed, + timeTotal = timeTotal, + propTimeExposed = round(r, 4), + propPopulationExposed = round(pr, 4), + events = n, + mdrr = round(mdrr, 4) + ) return(result) } diff --git a/R/RunAnalyses.R b/R/RunAnalyses.R index 1b1a459..708545e 100644 --- a/R/RunAnalyses.R +++ b/R/RunAnalyses.R @@ -133,30 +133,39 @@ runSccsAnalyses <- function(connectionDetails, fitSccsModelThreads = 1, cvThreads = 1, analysesToExclude = NULL) { - for (exposureOutcome in exposureOutcomeList) + for (exposureOutcome in exposureOutcomeList) { stopifnot(class(exposureOutcome) == "exposureOutcome") - for (sccsAnalysis in sccsAnalysisList) + } + for (sccsAnalysis in sccsAnalysisList) { stopifnot(class(sccsAnalysis) == "sccsAnalysis") + } if (!is.null(oracleTempSchema) && oracleTempSchema != "") { warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema } - uniqueExposureOutcomeList <- unique(ParallelLogger::selectFromList(exposureOutcomeList, - c("exposureId", "outcomeId"))) - if (length(uniqueExposureOutcomeList) != length(exposureOutcomeList)) + uniqueExposureOutcomeList <- unique(ParallelLogger::selectFromList( + exposureOutcomeList, + c("exposureId", "outcomeId") + )) + if (length(uniqueExposureOutcomeList) != length(exposureOutcomeList)) { stop("Duplicate exposure-outcomes pairs are not allowed") + } uniqueAnalysisIds <- unlist(unique(ParallelLogger::selectFromList(sccsAnalysisList, "analysisId"))) - if (length(uniqueAnalysisIds) != length(sccsAnalysisList)) + if (length(uniqueAnalysisIds) != length(sccsAnalysisList)) { stop("Duplicate analysis IDs are not allowed") + } - if (!file.exists(outputFolder)) + if (!file.exists(outputFolder)) { dir.create(outputFolder) + } - referenceTable <- createReferenceTable(sccsAnalysisList, - exposureOutcomeList, - outputFolder, - combineDataFetchAcrossOutcomes, - analysesToExclude) + referenceTable <- createReferenceTable( + sccsAnalysisList, + exposureOutcomeList, + outputFolder, + combineDataFetchAcrossOutcomes, + analysesToExclude + ) sccsAnalysisPerRow <- attr(referenceTable, "sccsAnalysisPerRow") instantiatedExposureOutcomePerRow <- attr(referenceTable, "instantiatedExposureOutcomePerRow") @@ -171,38 +180,44 @@ runSccsAnalyses <- function(connectionDetails, head(1) loadConcepts <- loadConceptsPerLoad[[referenceRow$loadId]] - if (length(loadConcepts$exposureIds) == 1 && loadConcepts$exposureIds[1] == "all") + if (length(loadConcepts$exposureIds) == 1 && loadConcepts$exposureIds[1] == "all") { loadConcepts$exposureIds <- c() + } useCustomCovariates <- (length(loadConcepts$customCovariateIds) > 0) - if (length(loadConcepts$customCovariateIds) == 1 && loadConcepts$customCovariateIds[1] == "all") + if (length(loadConcepts$customCovariateIds) == 1 && loadConcepts$customCovariateIds[1] == "all") { loadConcepts$customCovariateIds <- c() + } outcomeIds <- unique(loadConcepts$outcomeIds) exposureIds <- unique(loadConcepts$exposureIds) customCovariateIds <- unique(loadConcepts$customCovariateIds) - args <- list(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - exposureDatabaseSchema = exposureDatabaseSchema, - exposureTable = exposureTable, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - customCovariateDatabaseSchema = customCovariateDatabaseSchema, - customCovariateTable = customCovariateTable, - nestingCohortDatabaseSchema = nestingCohortDatabaseSchema, - nestingCohortTable = nestingCohortTable, - cdmVersion = cdmVersion, - exposureIds = exposureIds, - outcomeIds = outcomeIds, - useCustomCovariates = useCustomCovariates, - customCovariateIds = customCovariateIds, - useNestingCohort = loadConcepts$nestingCohortId != -1, - nestingCohortId = loadConcepts$nestingCohortId, - deleteCovariatesSmallCount = loadConcepts$deleteCovariatesSmallCount, - studyStartDate = loadConcepts$studyStartDate, - studyEndDate = loadConcepts$studyEndDate, - maxCasesPerOutcome = loadConcepts$maxCasesPerOutcome) - sccsDataObjectsToCreate[[length(sccsDataObjectsToCreate) + 1]] <- list(args = args, - sccsDataFileName = file.path(outputFolder, sccsDataFileName)) + args <- list( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + exposureDatabaseSchema = exposureDatabaseSchema, + exposureTable = exposureTable, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + customCovariateDatabaseSchema = customCovariateDatabaseSchema, + customCovariateTable = customCovariateTable, + nestingCohortDatabaseSchema = nestingCohortDatabaseSchema, + nestingCohortTable = nestingCohortTable, + cdmVersion = cdmVersion, + exposureIds = exposureIds, + outcomeIds = outcomeIds, + useCustomCovariates = useCustomCovariates, + customCovariateIds = customCovariateIds, + useNestingCohort = loadConcepts$nestingCohortId != -1, + nestingCohortId = loadConcepts$nestingCohortId, + deleteCovariatesSmallCount = loadConcepts$deleteCovariatesSmallCount, + studyStartDate = loadConcepts$studyStartDate, + studyEndDate = loadConcepts$studyEndDate, + maxCasesPerOutcome = loadConcepts$maxCasesPerOutcome + ) + sccsDataObjectsToCreate[[length(sccsDataObjectsToCreate) + 1]] <- list( + args = args, + sccsDataFileName = file.path(outputFolder, sccsDataFileName) + ) } } @@ -215,9 +230,11 @@ runSccsAnalyses <- function(connectionDetails, analysisRow <- sccsAnalysisPerRow[[refRow$rowId]] args <- analysisRow$createStudyPopulationArgs args$outcomeId <- refRow$outcomeId - studyPopFilesToCreate[[length(studyPopFilesToCreate) + 1]] <- list(args = args, - sccsDataFile = file.path(outputFolder, refRow$sccsDataFile), - studyPopFile = file.path(outputFolder, refRow$studyPopFile)) + studyPopFilesToCreate[[length(studyPopFilesToCreate) + 1]] <- list( + args = args, + sccsDataFile = file.path(outputFolder, refRow$sccsDataFile), + studyPopFile = file.path(outputFolder, refRow$studyPopFile) + ) } # Create arguments for interval data objects --------------------------------------------------- @@ -236,8 +253,9 @@ runSccsAnalyses <- function(connectionDetails, args <- analysisRow$createScriIntervalDataArgs } covariateSettings <- args$eraCovariateSettings - if (is(covariateSettings, "EraCovariateSettings")) + if (is(covariateSettings, "EraCovariateSettings")) { covariateSettings <- list(covariateSettings) + } if (!sccs) { covariateSettings[[length(covariateSettings) + 1]] <- args$controlIntervalSettings } @@ -247,8 +265,9 @@ runSccsAnalyses <- function(connectionDetails, if (length(settings$includeEraIds) != 0) { for (includeEraId in settings$includeEraIds) { if (is.character(includeEraId)) { - if (is.null(exposureOutcome[[includeEraId]])) + if (is.null(exposureOutcome[[includeEraId]])) { stop(paste("Variable", includeEraId, " not found in exposure-outcome pair")) + } includeEraIds <- c(includeEraIds, exposureOutcome[[includeEraId]]) } else { includeEraIds <- c(includeEraIds, includeEraId) @@ -259,8 +278,9 @@ runSccsAnalyses <- function(connectionDetails, if (length(settings$excludeEraIds) != 0) { for (excludeEraId in settings$excludeEraIds) { if (is.character(excludeEraId)) { - if (is.null(exposureOutcome[[excludeEraId]])) + if (is.null(exposureOutcome[[excludeEraId]])) { stop(paste("Variable", excludeEraId, " not found in exposure-outcome pair")) + } excludeEraIds <- c(excludeEraIds, exposureOutcome[[excludeEraId]]) } else { excludeEraIds <- c(excludeEraIds, excludeEraId) @@ -279,11 +299,13 @@ runSccsAnalyses <- function(connectionDetails, } sccsDataFileName <- refRow$sccsDataFile studyPopFile <- refRow$studyPopFile - sccsIntervalDataObjectsToCreate[[length(sccsIntervalDataObjectsToCreate) + 1]] <- list(args = args, - sccs = sccs, - sccsDataFileName = file.path(outputFolder, sccsDataFileName), - studyPopFile = file.path(outputFolder, studyPopFile), - sccsIntervalDataFileName = file.path(outputFolder, sccsIntervalDataFile)) + sccsIntervalDataObjectsToCreate[[length(sccsIntervalDataObjectsToCreate) + 1]] <- list( + args = args, + sccs = sccs, + sccsDataFileName = file.path(outputFolder, sccsDataFileName), + studyPopFile = file.path(outputFolder, studyPopFile), + sccsIntervalDataFileName = file.path(outputFolder, sccsIntervalDataFile) + ) } # Create arguments for model objects --------------------------------------------- @@ -295,9 +317,11 @@ runSccsAnalyses <- function(connectionDetails, analysisRow <- sccsAnalysisPerRow[[refRow$rowId]] args <- analysisRow$fitSccsModelArgs args$control$threads <- cvThreads - sccsModelObjectsToCreate[[length(sccsModelObjectsToCreate) + 1]] <- list(args = args, - sccsIntervalDataFileName = file.path(outputFolder, refRow$sccsIntervalDataFile), - sccsModelFileName = file.path(outputFolder, sccsModelFile)) + sccsModelObjectsToCreate[[length(sccsModelObjectsToCreate) + 1]] <- list( + args = args, + sccsIntervalDataFileName = file.path(outputFolder, refRow$sccsIntervalDataFile), + sccsModelFileName = file.path(outputFolder, sccsModelFile) + ) } referenceTable$loadId <- NULL @@ -358,10 +382,12 @@ createReferenceTable <- function(sccsAnalysisList, instantiatedExposureOutcome <- exposureOutcome instantiatedExposureOutcome$exposureId <- .selectByType(sccsAnalysis$exposureType, exposureOutcome$exposureId, "exposure") instantiatedExposureOutcome$outcomeId <- .selectByType(sccsAnalysis$outcomeType, exposureOutcome$outcomeId, "outcome") - row <- tibble(rowId = rowId, - exposureId = instantiatedExposureOutcome$exposureId, - outcomeId = instantiatedExposureOutcome$outcomeId, - analysisId = sccsAnalysis$analysisId) + row <- tibble( + rowId = rowId, + exposureId = instantiatedExposureOutcome$exposureId, + outcomeId = instantiatedExposureOutcome$outcomeId, + analysisId = sccsAnalysis$analysisId + ) referenceTable <- rbind(referenceTable, row) sccsAnalysisPerRow[[rowId]] <- sccsAnalysis instantiatedExposureOutcomePerRow[[rowId]] <- instantiatedExposureOutcome @@ -384,8 +410,9 @@ createReferenceTable <- function(sccsAnalysisList, } else { for (exposureId in sccsAnalysis$getDbSccsDataArgs$exposureIds) { if (suppressWarnings(is.na(as.numeric(exposureId)))) { - if (is.null(exposureOutcome[[exposureId]])) + if (is.null(exposureOutcome[[exposureId]])) { stop(paste("Variable", exposureId, " not found in exposure-outcome pair")) + } exposureIds <- c(exposureIds, exposureOutcome[[exposureId]]) } else { exposureIds <- c(exposureIds, as.numeric(exposureId)) @@ -400,8 +427,9 @@ createReferenceTable <- function(sccsAnalysisList, } else { for (customCovariateId in sccsAnalysis$getDbSccsDataArgs$customCovariateIds) { if (is.character(customCovariateId)) { - if (is.null(exposureOutcome[[customCovariateId]])) + if (is.null(exposureOutcome[[customCovariateId]])) { stop(paste("Variable", customCovariateId, " not found in exposure-outcome pair")) + } customCovariateIds <- c(customCovariateIds, exposureOutcome[[customCovariateId]]) } else { customCovariateIds <- c(customCovariateIds, customCovariateId) @@ -413,15 +441,17 @@ createReferenceTable <- function(sccsAnalysisList, if (sccsAnalysis$getDbSccsDataArgs$useNestingCohort) { nestingCohortId <- sccsAnalysis$getDbSccsDataArgs$nestingCohortId } - row <- list(outcomeId = outcomeId, - exposureIds = exposureIds, - customCovariateIds = customCovariateIds, - nestingCohortId = nestingCohortId, - deleteCovariatesSmallCount = sccsAnalysis$getDbSccsDataArgs$deleteCovariatesSmallCount, - studyStartDate = sccsAnalysis$getDbSccsDataArgs$studyStartDate, - studyEndDate = sccsAnalysis$getDbSccsDataArgs$studyEndDate, - maxCasesPerOutcome = sccsAnalysis$getDbSccsDataArgs$maxCasesPerOutcome, - rowId = rowId) + row <- list( + outcomeId = outcomeId, + exposureIds = exposureIds, + customCovariateIds = customCovariateIds, + nestingCohortId = nestingCohortId, + deleteCovariatesSmallCount = sccsAnalysis$getDbSccsDataArgs$deleteCovariatesSmallCount, + studyStartDate = sccsAnalysis$getDbSccsDataArgs$studyStartDate, + studyEndDate = sccsAnalysis$getDbSccsDataArgs$studyEndDate, + maxCasesPerOutcome = sccsAnalysis$getDbSccsDataArgs$maxCasesPerOutcome, + rowId = rowId + ) conceptsPerLoad[[length(conceptsPerLoad) + 1]] <- row rowId <- rowId + 1 } @@ -429,20 +459,28 @@ createReferenceTable <- function(sccsAnalysisList, # Group loads where possible if (combineDataFetchAcrossOutcomes) { - uniqueLoads <- unique(ParallelLogger::selectFromList(conceptsPerLoad, - c("nestingCohortId", - "deleteCovariatesSmallCount", - "studyStartDate", - "studyEndDate", - "maxCasesPerOutcome"))) + uniqueLoads <- unique(ParallelLogger::selectFromList( + conceptsPerLoad, + c( + "nestingCohortId", + "deleteCovariatesSmallCount", + "studyStartDate", + "studyEndDate", + "maxCasesPerOutcome" + ) + )) } else { - uniqueLoads <- unique(ParallelLogger::selectFromList(conceptsPerLoad, - c("nestingCohortId", - "deleteCovariatesSmallCount", - "studyStartDate", - "studyEndDate", - "maxCasesPerOutcome", - "outcomeId"))) + uniqueLoads <- unique(ParallelLogger::selectFromList( + conceptsPerLoad, + c( + "nestingCohortId", + "deleteCovariatesSmallCount", + "studyStartDate", + "studyEndDate", + "maxCasesPerOutcome", + "outcomeId" + ) + )) } # Compute unions of concept sets @@ -474,14 +512,16 @@ createReferenceTable <- function(sccsAnalysisList, } rowIds <- c(rowIds, groupable$rowId) } - loadConceptsPerLoad[[loadId]] <- list(exposureIds = exposureIds, - outcomeIds = outcomeIds, - customCovariateIds = customCovariateIds, - nestingCohortId = groupables[[1]]$nestingCohortId, - deleteCovariatesSmallCount = groupables[[1]]$deleteCovariatesSmallCount, - studyStartDate = groupables[[1]]$studyStartDate, - studyEndDate = groupables[[1]]$studyEndDate, - maxCasesPerOutcome = groupables[[1]]$maxCasesPerOutcome) + loadConceptsPerLoad[[loadId]] <- list( + exposureIds = exposureIds, + outcomeIds = outcomeIds, + customCovariateIds = customCovariateIds, + nestingCohortId = groupables[[1]]$nestingCohortId, + deleteCovariatesSmallCount = groupables[[1]]$deleteCovariatesSmallCount, + studyStartDate = groupables[[1]]$studyStartDate, + studyEndDate = groupables[[1]]$studyEndDate, + maxCasesPerOutcome = groupables[[1]]$maxCasesPerOutcome + ) sccsDataFileName <- .createSccsDataFileName(loadId) referenceTable$loadId[rowIds] <- loadId referenceTable$sccsDataFile[rowIds] <- sccsDataFileName @@ -491,16 +531,26 @@ createReferenceTable <- function(sccsAnalysisList, # Add study population filenames -------------------------- analysisIds <- unlist(ParallelLogger::selectFromList(sccsAnalysisList, "analysisId")) uniqueStudyPopArgs <- unique(ParallelLogger::selectFromList(sccsAnalysisList, "createStudyPopulationArgs")) - uniqueStudyPopArgs <- lapply(uniqueStudyPopArgs, function(x) return(x[[1]])) - studyPopId <- sapply(sccsAnalysisList, - function(sccsAnalysis, uniqueStudyPopArgs) return(which.list(uniqueStudyPopArgs, - sccsAnalysis$createStudyPopulationArgs)), - uniqueStudyPopArgs) + uniqueStudyPopArgs <- lapply(uniqueStudyPopArgs, function(x) { + return(x[[1]]) + }) + studyPopId <- sapply( + sccsAnalysisList, + function(sccsAnalysis, uniqueStudyPopArgs) { + return(which.list( + uniqueStudyPopArgs, + sccsAnalysis$createStudyPopulationArgs + )) + }, + uniqueStudyPopArgs + ) analysisIdToStudyPopId <- tibble(analysisId = analysisIds, studyPopId = studyPopId) referenceTable <- inner_join(referenceTable, analysisIdToStudyPopId, by = "analysisId") - referenceTable$studyPopFile <- .createStudyPopulationFileName(loadId = referenceTable$loadId, - studyPopId = referenceTable$studyPopId, - outcomeId = referenceTable$outcomeId) + referenceTable$studyPopFile <- .createStudyPopulationFileName( + loadId = referenceTable$loadId, + studyPopId = referenceTable$studyPopId, + outcomeId = referenceTable$outcomeId + ) # Add interval data and model filenames ----------------------------------------------------- for (sccsAnalysis in sccsAnalysisList) { @@ -511,16 +561,20 @@ createReferenceTable <- function(sccsAnalysisList, } generateFileName <- function(i) { - return(.createSccsIntervalDataFileName(paste("Analysis_", referenceTable$analysisId[i], sep = ""), - referenceTable$exposureId[i], - referenceTable$outcomeId[i])) + return(.createSccsIntervalDataFileName( + paste("Analysis_", referenceTable$analysisId[i], sep = ""), + referenceTable$exposureId[i], + referenceTable$outcomeId[i] + )) } referenceTable$sccsIntervalDataFile <- generateFileName(1:nrow(referenceTable)) generateFileName <- function(i) { - return(.createSccsModelFileName(paste("Analysis_", referenceTable$analysisId[i], sep = ""), - referenceTable$exposureId[i], - referenceTable$outcomeId[i])) + return(.createSccsModelFileName( + paste("Analysis_", referenceTable$analysisId[i], sep = ""), + referenceTable$exposureId[i], + referenceTable$outcomeId[i] + )) } referenceTable$sccsModelFile <- generateFileName(1:nrow(referenceTable)) @@ -535,9 +589,11 @@ createReferenceTable <- function(sccsAnalysisList, referenceTable <- referenceTable %>% anti_join(analysesToExclude, by = matchingColumns) countAfter <- nrow(referenceTable) - ParallelLogger::logInfo(sprintf("Removed %d of the %d exposure-outcome-analysis combinations as specified by the user.", - countBefore - countAfter, - countBefore)) + ParallelLogger::logInfo(sprintf( + "Removed %d of the %d exposure-outcome-analysis combinations as specified by the user.", + countBefore - countAfter, + countBefore + )) } return(referenceTable) @@ -545,7 +601,11 @@ createReferenceTable <- function(sccsAnalysisList, which.list <- function(list, object) { return(do.call("c", lapply(1:length(list), function(i, list, object) { - if (identical(list[[i]], object)) return(i) else return(c()) + if (identical(list[[i]], object)) { + return(i) + } else { + return(c()) + } }, list, object))) } @@ -596,9 +656,11 @@ createSccsModelObject <- function(params) { sccsIntervalData <- loadSccsIntervalData(params$sccsIntervalDataFileName) params$args$sccsIntervalData <- sccsIntervalData # sccsModel <- do.call("fitSccsModel", params$args) - sccsModel <- fitSccsModel(sccsIntervalData = sccsIntervalData, - prior = params$args$prior, - control = params$args$control) + sccsModel <- fitSccsModel( + sccsIntervalData = sccsIntervalData, + prior = params$args$prior, + control = params$args$control + ) saveRDS(sccsModel, params$sccsModelFileName) return(NULL) } @@ -633,9 +695,10 @@ createSccsModelObject <- function(params) { if (is.null(type)) { if (is.list(value)) { stop(paste("Multiple ", - label, - "s specified, but none selected in analyses (comparatorType).", - sep = "")) + label, + "s specified, but none selected in analyses (comparatorType).", + sep = "" + )) } return(value) } else { @@ -672,25 +735,31 @@ summarizeSccsAnalyses <- function(referenceTable, outputFolder) { estimates <- sccsModel$estimates[sccsModel$estimates$originalEraId == referenceTable$exposureId[i], ] if (!is.null(estimates) && nrow(estimates) != 0) { for (j in 1:nrow(estimates)) { - estimatesToInsert <- c(rr = exp(estimates$logRr[j]), - ci95lb = exp(estimates$logLb95[j]), - ci95ub = exp(estimates$logUb95[j]), - logRr = estimates$logRr[j], - seLogRr = estimates$seLogRr[j], - llr = estimates$llr) + estimatesToInsert <- c( + rr = exp(estimates$logRr[j]), + ci95lb = exp(estimates$logLb95[j]), + ci95ub = exp(estimates$logUb95[j]), + logRr = estimates$logRr[j], + seLogRr = estimates$seLogRr[j], + llr = estimates$llr + ) if (grepl(".*, day -?[0-9]+--?[0-9]*$", estimates$covariateName[j])) { name <- as.character(estimates$covariateName[j]) - pos1 <- attr(regexpr("^[^:]*:", name),"match.length") - 1 + pos1 <- attr(regexpr("^[^:]*:", name), "match.length") - 1 pos2 <- regexpr(", day -?[0-9]+--?[0-9]*$", name) + 2 - label <- paste(substr(name, 1, pos1), - substr(name, pos2, nchar(name))) + label <- paste( + substr(name, 1, pos1), + substr(name, pos2, nchar(name)) + ) } else { label <- sub(":.*$", "", estimates$covariateName[j]) } - names(estimatesToInsert) <- paste0(names(estimatesToInsert), - "(", - label, - ")") + names(estimatesToInsert) <- paste0( + names(estimatesToInsert), + "(", + label, + ")" + ) for (colName in names(estimatesToInsert)) { if (!(colName %in% colnames(result))) { result$newVar <- as.numeric(NA) diff --git a/R/SccsData.R b/R/SccsData.R index c1ee7d1..aa2e16f 100644 --- a/R/SccsData.R +++ b/R/SccsData.R @@ -47,12 +47,15 @@ setClass("SccsData", contains = "Andromeda") #' #' @export saveSccsData <- function(SccsData, file) { - if (missing(SccsData)) + if (missing(SccsData)) { stop("Must specify SccsData") - if (missing(file)) + } + if (missing(file)) { stop("Must specify file") - if (!inherits(SccsData, "SccsData")) + } + if (!inherits(SccsData, "SccsData")) { stop("Data not of class SccsData") + } Andromeda::saveAndromeda(SccsData, file) } @@ -69,10 +72,12 @@ saveSccsData <- function(SccsData, file) { #' #' @export loadSccsData <- function(file) { - if (!file.exists(file)) + if (!file.exists(file)) { stop("Cannot find file ", file) - if (file.info(file)$isdir) - stop(file , " is a folder, but should be a file") + } + if (file.info(file)$isdir) { + stop(file, " is a folder, but should be a file") + } SccsData <- Andromeda::loadAndromeda(file) class(SccsData) <- "SccsData" attr(class(SccsData), "package") <- "SelfControlledCaseSeries" @@ -91,11 +96,15 @@ setMethod("show", "SccsData", function(object) { if (length(metaData$exposureIds) == 0) { cli::cat_line("All exposures") } else { - cli::cat_line(paste("Exposure cohort ID(s):", - paste(metaData$exposureIds, collapse = ","))) + cli::cat_line(paste( + "Exposure cohort ID(s):", + paste(metaData$exposureIds, collapse = ",") + )) } - cli::cat_line(paste("Outcome cohort ID(s):", - paste(metaData$outcomeIds, collapse = ","))) + cli::cat_line(paste( + "Outcome cohort ID(s):", + paste(metaData$outcomeIds, collapse = ",") + )) cli::cat_line("") cli::cat_line(pillar::style_subtle("Inherits from Andromeda:")) class(object) <- "Andromeda" @@ -109,8 +118,9 @@ setMethod("show", "SccsData", function(object) { #' @export #' @rdname SccsData-class setMethod("summary", "SccsData", function(object) { - if (!Andromeda::isValidAndromeda(object)) + if (!Andromeda::isValidAndromeda(object)) { stop("Object is not valid. Probably the Andromeda object was closed.") + } caseCount <- object$cases %>% count() %>% pull() @@ -120,17 +130,21 @@ setMethod("summary", "SccsData", function(object) { filter(.data$eraType == "hoi") %>% inner_join(object$cases, by = "caseId") %>% group_by(.data$eraId) %>% - summarise(outcomeSubjects = n_distinct(.data$personId), - outcomeEvents = count(), - outcomeObsPeriods = n_distinct(.data$caseId)) %>% + summarise( + outcomeSubjects = n_distinct(.data$personId), + outcomeEvents = count(), + outcomeObsPeriods = n_distinct(.data$caseId) + ) %>% rename(outcomeId = .data$eraId) %>% collect() - result <- list(metaData = attr(object, "metaData"), - caseCount = caseCount, - outcomeCounts = outcomeCounts, - eraTypeCount = object$eraRef %>% count() %>% pull(), - eraCount = object$eras %>% count() %>% pull()) + result <- list( + metaData = attr(object, "metaData"), + caseCount = caseCount, + outcomeCounts = outcomeCounts, + eraTypeCount = object$eraRef %>% count() %>% pull(), + eraCount = object$eras %>% count() %>% pull() + ) class(result) <- "summary.SccsData" return(result) }) @@ -143,11 +157,15 @@ print.summary.SccsData <- function(x, ...) { if (length(metaData$exposureIds) == 0) { writeLines("All exposures") } else { - writeLines(paste("Exposure cohort ID(s):", - paste(x$metaData$exposureIds, collapse = ","))) + writeLines(paste( + "Exposure cohort ID(s):", + paste(x$metaData$exposureIds, collapse = ",") + )) } - writeLines(paste("Outcome cohort ID(s):", - paste(metaData$outcomeIds, collapse = ","))) + writeLines(paste( + "Outcome cohort ID(s):", + paste(metaData$outcomeIds, collapse = ",") + )) writeLines("") writeLines("Outcome counts:") outcomeCounts <- as.data.frame(x$outcomeCounts) diff --git a/R/SccsIntervalData.R b/R/SccsIntervalData.R index 26d0542..a38d209 100644 --- a/R/SccsIntervalData.R +++ b/R/SccsIntervalData.R @@ -48,12 +48,15 @@ setClass("SccsIntervalData", contains = "Andromeda") #' #' @export saveSccsIntervalData <- function(SccsIntervalData, file) { - if (missing(SccsIntervalData)) + if (missing(SccsIntervalData)) { stop("Must specify SccsIntervalData") - if (missing(file)) + } + if (missing(file)) { stop("Must specify file") - if (!inherits(SccsIntervalData, "SccsIntervalData")) + } + if (!inherits(SccsIntervalData, "SccsIntervalData")) { stop("Data not of class SccsIntervalData") + } Andromeda::saveAndromeda(SccsIntervalData, file) } @@ -70,10 +73,12 @@ saveSccsIntervalData <- function(SccsIntervalData, file) { #' #' @export loadSccsIntervalData <- function(file) { - if (!file.exists(file)) + if (!file.exists(file)) { stop("Cannot find file ", file) - if (file.info(file)$isdir) - stop(file , " is a folder, but should be a file") + } + if (file.info(file)$isdir) { + stop(file, " is a folder, but should be a file") + } SccsIntervalData <- Andromeda::loadAndromeda(file) class(SccsIntervalData) <- "SccsIntervalData" attr(class(SccsIntervalData), "package") <- "SelfControlledCaseSeries" @@ -103,21 +108,34 @@ setMethod("show", "SccsIntervalData", function(object) { #' @export #' @rdname SccsIntervalData-class setMethod("summary", "SccsIntervalData", function(object) { - if (!Andromeda::isValidAndromeda(object)) + if (!Andromeda::isValidAndromeda(object)) { stop("Object is not valid. Probably the Andromeda object was closed.") + } - caseCount <- object$outcomes %>% summarise(n = n_distinct(.data$stratumId)) %>% pull() - eraCount <- object$outcomes %>% count() %>% pull() - outcomeCount <- object$outcomes %>% summarise(n = sum(.data$y, na.rm = TRUE)) %>% pull() - covariateCount <- object$covariateRef %>% count() %>% pull() - covariateValueCount <- object$covariates %>% count() %>% pull() + caseCount <- object$outcomes %>% + summarise(n = n_distinct(.data$stratumId)) %>% + pull() + eraCount <- object$outcomes %>% + count() %>% + pull() + outcomeCount <- object$outcomes %>% + summarise(n = sum(.data$y, na.rm = TRUE)) %>% + pull() + covariateCount <- object$covariateRef %>% + count() %>% + pull() + covariateValueCount <- object$covariates %>% + count() %>% + pull() - result <- list(metaData = attr(object, "metaData"), - caseCount = caseCount, - eraCount = eraCount, - outcomeCount = outcomeCount, - covariateCount = covariateCount, - covariateValueCount = covariateValueCount) + result <- list( + metaData = attr(object, "metaData"), + caseCount = caseCount, + eraCount = eraCount, + outcomeCount = outcomeCount, + covariateCount = covariateCount, + covariateValueCount = covariateValueCount + ) class(result) <- "summary.SccsIntervalData" return(result) diff --git a/R/ScriDataConversion.R b/R/ScriDataConversion.R index c7fa5c4..2aff510 100644 --- a/R/ScriDataConversion.R +++ b/R/ScriDataConversion.R @@ -46,8 +46,9 @@ createScriIntervalData <- function(studyPopulation, sccsData, eraCovariateSettings, controlIntervalSettings) { - if (class(controlIntervalSettings) != "ControlIntervalSettings") + if (class(controlIntervalSettings) != "ControlIntervalSettings") { stop("The controlIntervalSettings argument should be of type 'ControlIntervalSettings'") + } start <- Sys.time() if (nrow(studyPopulation$outcomes) == 0) { @@ -70,7 +71,7 @@ createScriIntervalData <- function(studyPopulation, } else { covariateSettings <- list(eraCovariateSettings) } - covariateSettings[[length(covariateSettings) + 1]] <- controlIntervalSettings + covariateSettings[[length(covariateSettings) + 1]] <- controlIntervalSettings settings <- addEraCovariateSettings(settings, covariateSettings, sccsData) settings$metaData$covariateSettingsList <- cleanCovariateSettingsList(settings$covariateSettingsList) metaData <- append(studyPopulation$metaData, settings$metaData) @@ -83,23 +84,25 @@ createScriIntervalData <- function(studyPopulation, arrange(.data$caseId) controlIntervalId <- settings$covariateSettingsList[sapply(settings$covariateSettingsList, function(x) x$isControlInterval)][[1]]$outputIds[1, 1] - data <- convertToSccs(cases = cases, - outcomes = outcomes, - eras = eras, - includeAge = FALSE, - ageOffset = 0, - ageDesignMatrix = matrix(), - includeSeason = FALSE, - seasonDesignMatrix = matrix(), - includeCalendarTime = FALSE, - calendarTimeOffset = 0, - calendarTimeDesignMatrix = matrix(), - timeCovariateCases = numeric(0), - covariateSettingsList = settings$covariateSettingsList, - eventDependentObservation = FALSE, - censorModel = list(model = 0, p = c(0)), - scri = TRUE, - controlIntervalId = controlIntervalId) + data <- convertToSccs( + cases = cases, + outcomes = outcomes, + eras = eras, + includeAge = FALSE, + ageOffset = 0, + ageDesignMatrix = matrix(), + includeSeason = FALSE, + seasonDesignMatrix = matrix(), + includeCalendarTime = FALSE, + calendarTimeOffset = 0, + calendarTimeDesignMatrix = matrix(), + timeCovariateCases = numeric(0), + covariateSettingsList = settings$covariateSettingsList, + eventDependentObservation = FALSE, + censorModel = list(model = 0, p = c(0)), + scri = TRUE, + controlIntervalId = controlIntervalId + ) if (is.null(data$outcomes) || is.null(data$covariates)) { warning("Conversion resulted in empty data set. Perhaps no one with the outcome had any exposure of interest?") @@ -108,14 +111,12 @@ createScriIntervalData <- function(studyPopulation, if (nrow(settings$covariateRef) > 0) { data$covariateRef <- settings$covariateRef } - } else { metaData$covariateStatistics <- collect(data$covariateStatistics) metaData$daysObserved <- pull(data$observedDays, .data$observedDays) data$covariateStatistics <- NULL data$observedDays <- NULL data$covariateRef <- settings$covariateRef - } attr(data, "metaData") <- metaData class(data) <- "SccsIntervalData" @@ -129,5 +130,8 @@ createScriIntervalData <- function(studyPopulation, cleanCovariateSettingsList <- function(covariateSettingsList) { # Remove control interval settings and field: noCi <- covariateSettingsList[!sapply(covariateSettingsList, function(x) x$isControlInterval)] - return(lapply(noCi, function(x) {x$isControlInterval <- NULL; return(x)})) + return(lapply(noCi, function(x) { + x$isControlInterval <- NULL + return(x) + })) } diff --git a/R/Simulation.R b/R/Simulation.R index 3fa4255..5199115 100644 --- a/R/Simulation.R +++ b/R/Simulation.R @@ -51,8 +51,9 @@ createSimulationRiskWindow <- function(start = 0, # Second: overwrite defaults with actual values: values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3))) for (name in names(values)) { - if (name %in% names(analysis)) + if (name %in% names(analysis)) { analysis[[name]] <- values[[name]] + } } class(analysis) <- "simulationRiskWindow" return(analysis) @@ -105,8 +106,10 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365, usageRate = c(0.01, 0.01), meanPrescriptionDurations = c(14, 30), sdPrescriptionDurations = c(7, 14), - simulationRiskWindows = list(createSimulationRiskWindow(relativeRisks = 1), - createSimulationRiskWindow(relativeRisks = 1.5)), + simulationRiskWindows = list( + createSimulationRiskWindow(relativeRisks = 1), + createSimulationRiskWindow(relativeRisks = 1.5) + ), includeAgeEffect = TRUE, ageKnots = 5, includeSeasonality = TRUE, @@ -122,8 +125,9 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365, # Second: overwrite defaults with actual values: values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3))) for (name in names(values)) { - if (name %in% names(analysis)) + if (name %in% names(analysis)) { analysis[[name]] <- values[[name]] + } } class(analysis) <- "sccsSimulationSettings" return(analysis) @@ -140,50 +144,59 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf maxCalendarDays <- as.numeric(settings$maxCalendarTime) - as.numeric(settings$minCalendarTime) observationDays[observationDays > maxCalendarDays] <- maxCalendarDays ageInDays <- round(runif(n, settings$minAge, settings$maxAge - observationDays)) - startDate <- round(runif(n, - rep(as.numeric(settings$minCalendarTime), n), - as.numeric(settings$maxCalendarTime) - observationDays)) + startDate <- round(runif( + n, + rep(as.numeric(settings$minCalendarTime), n), + as.numeric(settings$maxCalendarTime) - observationDays + )) startDate <- as.Date(startDate, origin = "1970-01-01") startYear <- as.numeric(format(startDate, format = "%Y")) startMonth <- as.numeric(format(startDate, format = "%m")) startDay <- as.numeric(format(startDate, format = "%d")) - cases <- tibble(observationPeriodId = 1:n, - caseId = 1:n, - personId = 1:n, - observationDays = observationDays, - ageInDays = ageInDays, - startYear = startYear, - startMonth = startMonth, - startDay = startDay, - startDate = as.numeric(startDate), - censoredDays = 0, - noninformativeEndCensor = 0) + cases <- tibble( + observationPeriodId = 1:n, + caseId = 1:n, + personId = 1:n, + observationDays = observationDays, + ageInDays = ageInDays, + startYear = startYear, + startMonth = startMonth, + startDay = startDay, + startDate = as.numeric(startDate), + censoredDays = 0, + noninformativeEndCensor = 0 + ) ### Generate eras ### eras <- tibble() for (i in 1:length(settings$eraIds)) { # i <- 1 patientsOnDrug <- sample.int(nrow(cases), - settings$patientUsages[i] * nrow(cases), - replace = FALSE) + settings$patientUsages[i] * nrow(cases), + replace = FALSE + ) patientsOnDrug <- patientsOnDrug[order(patientsOnDrug)] count <- rpois(length(patientsOnDrug), observationDays[patientsOnDrug] * settings$usageRate[i]) observationPeriodId <- rep(patientsOnDrug, count) patientsOnDrug <- patientsOnDrug[count != 0] startDay <- round(runif(sum(count), 0, cases$observationDays[observationPeriodId])) - duration <- round(rnorm(sum(count), - settings$meanPrescriptionDurations[i], - settings$sdPrescriptionDurations[i])) + duration <- round(rnorm( + sum(count), + settings$meanPrescriptionDurations[i], + settings$sdPrescriptionDurations[i] + )) duration[duration < 1] <- 1 endDay <- startDay + duration endDay[endDay > cases$observationDays[observationPeriodId]] <- cases$observationDays[observationPeriodId][endDay > - cases$observationDays[observationPeriodId]] - newEras <- tibble(eraType = "rx", - caseId = observationPeriodId, - eraId = settings$eraIds[i], - value = 1, - startDay = startDay, - endDay = endDay) + cases$observationDays[observationPeriodId]] + newEras <- tibble( + eraType = "rx", + caseId = observationPeriodId, + eraId = settings$eraIds[i], + value = 1, + startDay = startDay, + endDay = endDay + ) eras <- rbind(eras, newEras) } eras <- eras[order(eras$caseId, eras$eraId), ] @@ -227,12 +240,14 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf truncatedEnds <- riskEnds truncatedEnds[truncatedEnds > end] <- end filteredIndex <- truncatedEnds >= start - riskEras <- tibble(eraType = "rx", - caseId = sourceEras$caseId[filteredIndex], - eraId = eraId, - value = 1, - startDay = sourceEras$startDay[filteredIndex] + start, - endDay = sourceEras$startDay[filteredIndex] + truncatedEnds[filteredIndex]) + riskEras <- tibble( + eraType = "rx", + caseId = sourceEras$caseId[filteredIndex], + eraId = eraId, + value = 1, + startDay = sourceEras$startDay[filteredIndex] + start, + endDay = sourceEras$startDay[filteredIndex] + truncatedEnds[filteredIndex] + ) newEras <- rbind(newEras, riskEras) eraIds <- c(eraIds, eraId) rrs <- c(rrs, simulationRiskWindow$relativeRisks[j]) @@ -242,24 +257,28 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf } newEras <- newEras[order(newEras$caseId, newEras$eraId), ] eraRrs <- tibble(eraId = eraIds, rr = rrs) - outcomes <- simulateSccsOutcomes(cases, - newEras, - baselineRates, - eraRrs, - settings$includeAgeEffect, - settings$minAge, - ageRrs, - settings$includeSeasonality, - seasonRrs, - settings$includeCalendarTimeEffect, - as.numeric(settings$minCalendarTime), - calendarTimeRrs) - outcomes <- tibble(eraType = "hoi", - caseId = outcomes$caseId, - eraId = settings$outcomeId, - value = 1, - startDay = outcomes$startDay, - endDay = outcomes$startDay) + outcomes <- simulateSccsOutcomes( + cases, + newEras, + baselineRates, + eraRrs, + settings$includeAgeEffect, + settings$minAge, + ageRrs, + settings$includeSeasonality, + seasonRrs, + settings$includeCalendarTimeEffect, + as.numeric(settings$minCalendarTime), + calendarTimeRrs + ) + outcomes <- tibble( + eraType = "hoi", + caseId = outcomes$caseId, + eraId = settings$outcomeId, + value = 1, + startDay = outcomes$startDay, + endDay = outcomes$startDay + ) # ** Remove non-cases *** caseIds <- unique(outcomes$caseId) @@ -325,29 +344,39 @@ simulateSccsData <- function(nCases, settings) { lastCaseId <- max(batch$cases$caseId) } else { cases <- rbind(cases, batch$cases[1:need, ]) - eras <- rbind(eras, - batch$eras[batch$eras$caseId %in% batch$cases$caseId[1:need],]) + eras <- rbind( + eras, + batch$eras[batch$eras$caseId %in% batch$cases$caseId[1:need], ] + ) } } cases$observationPeriodId <- as.character(cases$observationPeriodId) cases$personId <- as.character(cases$personId) - data <- Andromeda::andromeda(cases = cases, - eras = eras, - eraRef = tibble(eraId = settings$eraIds, - eraType = "", - eraName = "")) + data <- Andromeda::andromeda( + cases = cases, + eras = eras, + eraRef = tibble( + eraId = settings$eraIds, + eraType = "", + eraName = "" + ) + ) - attr(data, "metaData") <- list(sccsSimulationSettings = settings, - ageFun = ageFun, - seasonFun = seasonFun, - calendarTimeFun = calendarTimeFun, - exposureIds = settings$eraIds, - outcomeIds = settings$outcomeId, - attrition = tibble(outcomeId = settings$outcomeId, - description = "Outcomes", - outcomeSubjects = 0, - outcomeEvents = 0, - outcomeObsPeriods = 0)) + attr(data, "metaData") <- list( + sccsSimulationSettings = settings, + ageFun = ageFun, + seasonFun = seasonFun, + calendarTimeFun = calendarTimeFun, + exposureIds = settings$eraIds, + outcomeIds = settings$outcomeId, + attrition = tibble( + outcomeId = settings$outcomeId, + description = "Outcomes", + outcomeSubjects = 0, + outcomeEvents = 0, + outcomeObsPeriods = 0 + ) + ) class(data) <- "SccsData" attr(class(data), "package") <- "SelfControlledCaseSeries" diff --git a/R/StudyPopulation.R b/R/StudyPopulation.R index f8b3586..1e381d4 100644 --- a/R/StudyPopulation.R +++ b/R/StudyPopulation.R @@ -74,18 +74,24 @@ createStudyPopulation <- function(sccsData, filter(row_number(.data$startDay) == 1) %>% ungroup() - attrition <- bind_rows(attrition, - countOutcomes(outcomes, cases, "First outcome only")) + attrition <- bind_rows( + attrition, + countOutcomes(outcomes, cases, "First outcome only") + ) } cases <- cases %>% - mutate(startAgeInDays = .data$ageInDays, - endAgeInDays = .data$ageInDays + .data$observationDays - 1) + mutate( + startAgeInDays = .data$ageInDays, + endAgeInDays = .data$ageInDays + .data$observationDays - 1 + ) if (naivePeriod != 0) { cases <- cases %>% - mutate(startAgeInDays = case_when(naivePeriod > .data$censoredDays ~ .data$startAgeInDays + naivePeriod - .data$censoredDays, - TRUE ~ .data$startAgeInDays)) %>% + mutate(startAgeInDays = case_when( + naivePeriod > .data$censoredDays ~ .data$startAgeInDays + naivePeriod - .data$censoredDays, + TRUE ~ .data$startAgeInDays + )) %>% filter(.data$endAgeInDays > .data$startAgeInDays) outcomes <- outcomes %>% @@ -96,8 +102,10 @@ createStudyPopulation <- function(sccsData, cases <- cases %>% filter(.data$caseId %in% unique(outcomes$caseId)) - attrition <- bind_rows(attrition, - countOutcomes(outcomes, cases, sprintf("%s days naive period", naivePeriod))) + attrition <- bind_rows( + attrition, + countOutcomes(outcomes, cases, sprintf("%s days naive period", naivePeriod)) + ) } if (!is.null(minAge) || !is.null(maxAge)) { @@ -105,18 +113,26 @@ createStudyPopulation <- function(sccsData, if (!is.null(minAge)) { minAgeInDays <- minAge * 365.25 cases <- cases %>% - mutate(startAgeInDays = case_when(.data$startAgeInDays < minAgeInDays ~ minAgeInDays, - TRUE ~ .data$startAgeInDays)) %>% + mutate(startAgeInDays = case_when( + .data$startAgeInDays < minAgeInDays ~ minAgeInDays, + TRUE ~ .data$startAgeInDays + )) %>% filter(.data$endAgeInDays > .data$startAgeInDays) labels <- c(labels, sprintf("Age >= %s", minAge)) } if (!is.null(maxAge)) { maxAgeInDays <- round((maxAge + 1) * 365.25) cases <- cases %>% - mutate(noninformativeEndCensor = case_when(.data$endAgeInDays > maxAgeInDays ~ 1, - TRUE ~ noninformativeEndCensor), - endAgeInDays = case_when(.data$endAgeInDays > maxAgeInDays ~ maxAgeInDays, - TRUE ~ .data$endAgeInDays)) %>% + mutate( + noninformativeEndCensor = case_when( + .data$endAgeInDays > maxAgeInDays ~ 1, + TRUE ~ noninformativeEndCensor + ), + endAgeInDays = case_when( + .data$endAgeInDays > maxAgeInDays ~ maxAgeInDays, + TRUE ~ .data$endAgeInDays + ) + ) %>% filter(.data$endAgeInDays > .data$startAgeInDays) labels <- c(labels, sprintf("Age <= %s", maxAge)) } @@ -124,22 +140,28 @@ createStudyPopulation <- function(sccsData, outcomes <- outcomes %>% inner_join(select(cases, .data$observationPeriodId, .data$caseId, .data$startAgeInDays, .data$endAgeInDays, .data$ageInDays), by = "caseId") %>% filter(.data$startDay >= .data$startAgeInDays - .data$ageInDays & - .data$startDay <= .data$endAgeInDays - .data$ageInDays) %>% + .data$startDay <= .data$endAgeInDays - .data$ageInDays) %>% select(-.data$startAgeInDays, -.data$endAgeInDays, -.data$ageInDays) - attrition <- bind_rows(attrition, - countOutcomes(outcomes, cases, paste(labels, collapse = " & "))) + attrition <- bind_rows( + attrition, + countOutcomes(outcomes, cases, paste(labels, collapse = " & ")) + ) } - metaData <- list(exposureIds = attr(sccsData, "metaData")$exposureIds, - outcomeId = unique(outcomes$eraId), - attrition = attrition) + metaData <- list( + exposureIds = attr(sccsData, "metaData")$exposureIds, + outcomeId = unique(outcomes$eraId), + attrition = attrition + ) - cases$startDate <- as.Date(paste(cases$startYear, cases$startMonth, cases$startDay, sep = "-"), format="%Y-%m-%d") + cases$startDate <- as.Date(paste(cases$startYear, cases$startMonth, cases$startDay, sep = "-"), format = "%Y-%m-%d") cases <- cases %>% - mutate(offset = .data$startAgeInDays - .data$ageInDays, - startDate = .data$startDate + .data$startAgeInDays - .data$ageInDays, - endDay = .data$endAgeInDays - .data$startAgeInDays) %>% + mutate( + offset = .data$startAgeInDays - .data$ageInDays, + startDate = .data$startDate + .data$startAgeInDays - .data$ageInDays, + endDay = .data$endAgeInDays - .data$startAgeInDays + ) %>% mutate(ageInDays = .data$startAgeInDays) %>% select(.data$observationPeriodId, .data$caseId, .data$personId, .data$startDate, .data$endDay, .data$ageInDays, .data$offset, .data$noninformativeEndCensor) @@ -159,10 +181,12 @@ countOutcomes <- function(outcomes, cases, description) { counts <- outcomes %>% inner_join(cases, by = "caseId") %>% group_by(.data$eraId) %>% - summarise(outcomeSubjects = n_distinct(.data$personId), - outcomeEvents = n(), - outcomeObsPeriods = n_distinct(.data$caseId), - .groups = "drop_last") %>% + summarise( + outcomeSubjects = n_distinct(.data$personId), + outcomeEvents = n(), + outcomeObsPeriods = n_distinct(.data$caseId), + .groups = "drop_last" + ) %>% rename(outcomeId = .data$eraId) %>% mutate(description = description) return(counts) diff --git a/docs/404.html b/docs/404.html index 1ae74d7..d10ebbe 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - - - + + + + - Page not found (404) • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
+ + - - diff --git a/docs/articles/MultipleAnalyses.html b/docs/articles/MultipleAnalyses.html index 3cffbd8..85c2a42 100644 --- a/docs/articles/MultipleAnalyses.html +++ b/docs/articles/MultipleAnalyses.html @@ -19,6 +19,8 @@ + +
+
-
-

-Introduction

-

In this vignette we focus on running several different analyses on several exposure-outcome pairs This can be useful when we want to explore the sensitivity to analyses choices, include controls, or run an experiment similar to the OMOP experiment to empirically identify the optimal analysis choices for a particular research question.

-

This vignette assumes you are already familiar with the SelfControlledCaseSeries package and are able to perform single studies. We will walk through all the steps needed to perform an exemplar set of analyses, and we have selected the well-studied topic of the effect of nonsteroidal anti-inflammatory drugs (NSAIDs) on gastrointestinal (GI) bleeding-related hospitalization. For simplicity, we focus on one NSAID: diclofenac. We will execute various variations of an analysis for the primary exposure pair and a large set of negative control exposures.

+
+

Introduction +

+

In this vignette we focus on running several different analyses on +several exposure-outcome pairs This can be useful when we want to +explore the sensitivity to analyses choices, include controls, or run an +experiment similar to the OMOP experiment to empirically identify the +optimal analysis choices for a particular research question.

+

This vignette assumes you are already familiar with the +SelfControlledCaseSeries package and are able to perform +single studies. We will walk through all the steps needed to perform an +exemplar set of analyses, and we have selected the well-studied topic of +the effect of nonsteroidal anti-inflammatory drugs (NSAIDs) on +gastrointestinal (GI) bleeding-related hospitalization. For simplicity, +we focus on one NSAID: diclofenac. We will execute various variations of +an analysis for the primary exposure pair and a large set of negative +control exposures.

-
-

-General approach

-

The general approach to running a set of analyses is that you specify all the function arguments of the functions you would normally call, and create sets of these function arguments. The final outcome models as well as intermediate data objects will all be saved to disk for later extraction.

-

An analysis will be executed by calling these functions in sequence:

+
+

General approach +

+

The general approach to running a set of analyses is that you specify +all the function arguments of the functions you would normally call, and +create sets of these function arguments. The final outcome models as +well as intermediate data objects will all be saved to disk for later +extraction.

+

An analysis will be executed by calling these functions in +sequence:

  1. getDbSccsData()
  2. createStudyPopulation()
  3. createSccsIntervalData()
  4. fitSccsModel()
-

When you provide several analyses to the SelfControlledCaseSeries package, it will determine whether any of the analyses and exposure-outcome pairs have anything in common, and will take advantage of this fact. For example, if we specify several exposure-outcome pairs with the same outcome, the data for the outcome will be extracted only once.

-

The function arguments you need to define have been divided into four groups:

+

When you provide several analyses to the +SelfControlledCaseSeries package, it will determine whether +any of the analyses and exposure-outcome pairs have anything in common, +and will take advantage of this fact. For example, if we specify several +exposure-outcome pairs with the same outcome, the data for the outcome +will be extracted only once.

+

The function arguments you need to define have been divided into four +groups:

  1. -Hypothesis of interest: arguments that are specific to a hypothesis of interest, in the case of the self-controlled case series this is a combination of exposure and outcome.
  2. +Hypothesis of interest: arguments that are specific +to a hypothesis of interest, in the case of the self-controlled case +series this is a combination of exposure and outcome.
  3. -Analyses: arguments that are not directly specific to a hypothesis of interest, such as the washout window, whether to adjust for age and seasonality, etc.
  4. -
  5. Arguments that are the output of a previous function in the SelfControlledCaseSeries package, such as the SccsIntervalData argument of the createSccsIntervalData function. These cannot be specified by the user.
  6. -
  7. Arguments that are specific to an environment, such as the connection details for connecting to the server, and the name of the schema holding the CDM data.
  8. +Analyses: arguments that are not directly specific +to a hypothesis of interest, such as the washout window, whether to +adjust for age and seasonality, etc. +
  9. Arguments that are the output of a previous function in the +SelfControlledCaseSeries package, such as the +SccsIntervalData argument of the +createSccsIntervalData function. These cannot be specified +by the user.
  10. +
  11. Arguments that are specific to an environment, such as the +connection details for connecting to the server, and the name of the +schema holding the CDM data.
-
-

-Preparation for the example

-

We need to tell R how to connect to the server where the data are. SelfControlledCaseSeries uses the DatabaseConnector package, which provides the createConnectionDetails function. Type ?createConnectionDetails for the specific settings required for the various database management systems (DBMS). For example, one might connect to a PostgreSQL database using this code:

+
+

Preparation for the example +

+

We need to tell R how to connect to the server where the data are. +SelfControlledCaseSeries uses the +DatabaseConnector package, which provides the +createConnectionDetails function. Type +?createConnectionDetails for the specific settings required +for the various database management systems (DBMS). For example, one +might connect to a PostgreSQL database using this code:

 connectionDetails <- createConnectionDetails(dbms = "postgresql", 
                                              server = "localhost/ohdsi", 
@@ -142,11 +179,24 @@ 

cdmDatabaseSchema <- "my_cdm_data" cohortDatabaseSchema <- "my_cohorts" -options(sqlRenderTempEmulationSchema = NULL) +options(sqlRenderTempEmulationSchema = NULL) cdmVersion <- "5"

-

The last three lines define the cdmDatabaseSchema and cohortDatabaseSchema variables, as well as the CDM version. We’ll use these later to tell R where the data in CDM format live, where we want to store the (outcome) cohorts, and what version CDM is used. Note that for Microsoft SQL Server, databaseschemas need to specify both the database and the schema, so for example cdmDatabaseSchema <- "my_cdm_data.dbo".

-

We also need to prepare our exposures and outcomes of interest. The drug_era table in the OMOP Common Data Model already contains prespecified cohorts of users at the ingredient level, so we will use that for the exposures. For the outcomes, we want to restrict our analysis only to those events that are recorded in an inpatient setting, so we will need to create a custom cohort table. For this example, we are only interested in GI bleed (concept ID 192671) .

-

We create a text file called vignette.sql with the following content:

+

The last three lines define the cdmDatabaseSchema and +cohortDatabaseSchema variables, as well as the CDM version. +We’ll use these later to tell R where the data in CDM format live, where +we want to store the (outcome) cohorts, and what version CDM is used. +Note that for Microsoft SQL Server, databaseschemas need to specify both +the database and the schema, so for example +cdmDatabaseSchema <- "my_cdm_data.dbo".

+

We also need to prepare our exposures and outcomes of interest. The +drug_era table in the OMOP Common Data Model already contains +prespecified cohorts of users at the ingredient level, so we will use +that for the exposures. For the outcomes, we want to restrict our +analysis only to those events that are recorded in an inpatient setting, +so we will need to create a custom cohort table. For this example, we +are only interested in GI bleed (concept ID 192671) .

+

We create a text file called vignette.sql with the following +content:

/***********************************
 File vignette.sql 
 ***********************************/
@@ -168,52 +218,79 @@ 

WHERE ancestor_concept_id = 192671 -- GI - Gastrointestinal haemorrhage ) AND visit_occurrence.visit_concept_id IN (9201, 9203);

-

This is parameterized SQL which can be used by the SqlRender package. We use parameterized SQL so we do not have to pre-specify the names of the CDM and result schemas. That way, if we want to run the SQL on a different schema, we only need to change the parameter values; we do not have to change the SQL code. By also making use of translation functionality in SqlRender, we can make sure the SQL code can be run in many different environments.

+

This is parameterized SQL which can be used by the +SqlRender package. We use parameterized SQL so we do not +have to pre-specify the names of the CDM and result schemas. That way, +if we want to run the SQL on a different schema, we only need to change +the parameter values; we do not have to change the SQL code. By also +making use of translation functionality in SqlRender, we +can make sure the SQL code can be run in many different +environments.

-library(SqlRender)
-sql <- readSql("vignette.sql")
-sql <- render(sql,
+library(SqlRender)
+sql <- readSql("vignette.sql")
+sql <- render(sql,
               cdmDatabaseSchema = cdmDatabaseSchema, 
               cohortDatabaseSchema = cohortDatabaseSchema)
-sql <- translate(sql, targetDialect = connectionDetails$dbms)
+sql <- translate(sql, targetDialect = connectionDetails$dbms)
 
 connection <- connect(connectionDetails)
 executeSql(connection, sql)
-

In this code, we first read the SQL from the file into memory. In the next line, we replace the two parameter names with the actual values. We then translate the SQL into the dialect appropriate for the DBMS we already specified in the connectionDetails. Next, we connect to the server, and submit the rendered and translated SQL.

+

In this code, we first read the SQL from the file into memory. In the +next line, we replace the two parameter names with the actual values. We +then translate the SQL into the dialect appropriate for the DBMS we +already specified in the connectionDetails. Next, we +connect to the server, and submit the rendered and translated SQL.

-
-

-Specifying hypotheses of interest

-

The first group of arguments define the exposure and outcome. Here we demonstrate how to create a list of exposure-outcome pairs:

+
+

Specifying hypotheses of interest +

+

The first group of arguments define the exposure and outcome. Here we +demonstrate how to create a list of exposure-outcome pairs:

-negativeControls <- c(705178, 705944, 710650, 714785, 719174, 719311, 735340, 742185, 
+negativeControls <- c(705178, 705944, 710650, 714785, 719174, 719311, 735340, 742185, 
                       780369, 781182, 924724, 990760, 1110942, 1111706, 1136601, 
                       1317967, 1501309, 1505346, 1551673, 1560278, 1584910, 19010309, 
                       40163731)
 diclofenac <- 1124300
-ppis <- c(911735, 929887, 923645, 904453, 948078, 19039926)
+ppis <- c(911735, 929887, 923645, 904453, 948078, 19039926)
 
-exposureOutcomeList <- list()
-for (exposureId in c(diclofenac, negativeControls)){
+exposureOutcomeList <- list()
+for (exposureId in c(diclofenac, negativeControls)){
   exposureOutcome <- createExposureOutcome(exposureId = exposureId,
                                            outcomeId = 1,
                                            prophylactics = ppis)
-  exposureOutcomeList[[length(exposureOutcomeList) + 1]] <- exposureOutcome
+  exposureOutcomeList[[length(exposureOutcomeList) + 1]] <- exposureOutcome
 }
-

We defined the outcome of interest to be the custom cohort with ID 1 we defined in the SQL above. The exposures include diclofenac (concept ID 1124300) and a large number of negative control exposures. We furthermore specify a custom variable prophylactics that contains the concept IDs of drugs that are used in preventing the outcome of interest, in this case proton pump inhibitors (PPIs).

-

A convenient way to save exposureOutcomeList to file is by using the saveExposureOutcomeList function, and we can load it again using the loadExposureOutcomeList function.

+

We defined the outcome of interest to be the custom cohort with ID 1 +we defined in the SQL above. The exposures include diclofenac (concept +ID 1124300) and a large number of negative control exposures. We +furthermore specify a custom variable prophylactics that +contains the concept IDs of drugs that are used in preventing the +outcome of interest, in this case proton pump inhibitors (PPIs).

+

A convenient way to save exposureOutcomeList to file is +by using the saveExposureOutcomeList function, and we can +load it again using the loadExposureOutcomeList +function.

-
-

-Specifying analyses

-

The second group of arguments are not specific to a hypothesis of interest, and comprise the majority of arguments. For each function that will be called during the execution of the analyses, a companion function is available that has (almost) the same arguments. For example, for the fitSccsModel() function there is the createFitSccsModelArgs() function. These companion functions can be used to create the arguments to be used during execution:

+
+

Specifying analyses +

+

The second group of arguments are not specific to a hypothesis of +interest, and comprise the majority of arguments. For each function that +will be called during the execution of the analyses, a companion +function is available that has (almost) the same arguments. For example, +for the fitSccsModel() function there is the +createFitSccsModelArgs() function. These companion +functions can be used to create the arguments to be used during +execution:

 getDbSccsDataArgs1 <- createGetDbSccsDataArgs(
   useCustomCovariates = FALSE,
   deleteCovariatesSmallCount = 100,
   studyStartDate = "",
   studyEndDate = "",
-  exposureIds = c(),
+  exposureIds = c(),
   maxCasesPerOutcome = 1000)
 
 createStudyPopulationArgs1 <- createCreateStudyPopulationArgs(
@@ -231,8 +308,14 @@ 

eraCovariateSettings = covarExposureOfInt) fitSccsModelArgs <- createFitSccsModelArgs()

-

Any argument that is not explicitly specified by the user will assume the default value specified in the function. Note that for several arguments for concept or cohort definition IDs we can use the name of a variable in the exposureOutcome objects. In this case, we defined the argument includeEraIds to get the value of the exposureId variable.

-

We can now combine the arguments for the various functions into a single analysis:

+

Any argument that is not explicitly specified by the user will assume +the default value specified in the function. Note that for several +arguments for concept or cohort definition IDs we can use the name of a +variable in the exposureOutcome objects. In this case, we +defined the argument includeEraIds to get the value of the +exposureId variable.

+

We can now combine the arguments for the various functions into a +single analysis:

 sccsAnalysis1 <- createSccsAnalysis(analysisId = 1,
                                     description = "Simplest model",
@@ -240,8 +323,13 @@ 

createStudyPopulationArgs = createStudyPopulationArgs1, createSccsIntervalDataArgs = createSccsIntervalDataArgs1, fitSccsModelArgs = fitSccsModelArgs)

-

Note that we have assigned an analysis ID (1) to this set of arguments. We can use this later to link the results back to this specific set of choices. We also include a short description of the analysis.

-

We can easily create more analyses, for example by including adjustments for age and seasonality, or for including other drugs in the model:

+

Note that we have assigned an analysis ID (1) to this set of +arguments. We can use this later to link the results back to this +specific set of choices. We also include a short description of the +analysis.

+

We can easily create more analyses, for example by including +adjustments for age and seasonality, or for including other drugs in the +model:

 covarProphylactics <- createEraCovariateSettings(
   label = "Prophylactics",
@@ -251,7 +339,7 @@ 

endAnchor = "era end") createSccsIntervalDataArgs2 <- createCreateSccsIntervalDataArgs( - eraCovariateSettings = list(covarExposureOfInt, + eraCovariateSettings = list(covarExposureOfInt, covarProphylactics)) sccsAnalysis2 <- createSccsAnalysis( @@ -274,7 +362,7 @@

endAnchor = "era start") createSccsIntervalDataArgs3 <- createCreateSccsIntervalDataArgs( - eraCovariateSettings = list(covarExposureOfInt, + eraCovariateSettings = list(covarExposureOfInt, covarPreExp, covarProphylactics), ageCovariateSettings = ageSettings, @@ -299,7 +387,7 @@

allowRegularization = TRUE) createSccsIntervalDataArgs4 <- createCreateSccsIntervalDataArgs( - eraCovariateSettings = list(covarExposureOfInt, + eraCovariateSettings = list(covarExposureOfInt, covarPreExp, covarAllDrugs), ageCovariateSettings = ageSettings, @@ -315,21 +403,31 @@

fitSccsModelArgs = fitSccsModelArgs)

These analyses can be combined in a list:

-sccsAnalysisList <- list(sccsAnalysis1, sccsAnalysis2, sccsAnalysis3, sccsAnalysis4)
-

A convenient way to save sccsAnalysisList to file is by using the saveSccsAnalysisList function, and we can load it again using the loadSccsAnalysisList function.

-
-

-Exposure and outcome selection strategies

-

Often we would like to evaluate different definitions of the exposure and/or outcome. We could include these by created extra exposure-outcome pairs, but that would mean that all defined analyses would be executed against these variations of the definitions, and this may not be what we want. Perhaps we would like to define just a single sensitivity analyses with a different outcome definition, in which case we could argue that the strategy of selecting the outcome becomes part of the analysis.

-

In such a case, we can define the multiple strategies using a list:

+sccsAnalysisList <- list(sccsAnalysis1, sccsAnalysis2, sccsAnalysis3, sccsAnalysis4)
+

A convenient way to save sccsAnalysisList to file is by +using the saveSccsAnalysisList function, and we can load it +again using the loadSccsAnalysisList function.

+
+

Exposure and outcome selection strategies +

+

Often we would like to evaluate different definitions of the exposure +and/or outcome. We could include these by created extra exposure-outcome +pairs, but that would mean that all defined analyses would be executed +against these variations of the definitions, and this may not be what we +want. Perhaps we would like to define just a single sensitivity analyses +with a different outcome definition, in which case we could argue that +the strategy of selecting the outcome becomes part of the analysis.

+

In such a case, we can define the multiple strategies using a +list:

-outcomeIds = list(narrowDefinition = 1,
+outcomeIds = list(narrowDefinition = 1,
                   broadDefinition = 2)
 
 exposureOutcome <- createExposureOutcome(
   exposureId = 1124300,
   outcomeId = outcomeIds)
-

When we specify an analysis, we can then refer to one definition or the other:

+

When we specify an analysis, we can then refer to one definition or +the other:

 sccsAnalysisA <- createSccsAnalysis(
   analysisId = 1,
@@ -349,15 +447,22 @@ 

createSccsIntervalDataArgs = createSccsIntervalDataArgs1, fitSccsModelArgs = fitSccsModelArgs) -sccsAnalysisList2 <- list(sccsAnalysisA, sccsAnalysisB)

-

In this example, the first analysis (analysisID = 1) will use cohort definition 1 as outcome, whilst the second analysis analysis (analysisID = 2) will use cohort definition 2 as outcome.

-

The same mechanism can be used to specify types for the exposureId.

+sccsAnalysisList2 <- list(sccsAnalysisA, sccsAnalysisB)
+

In this example, the first analysis (analysisID = 1) will use cohort +definition 1 as outcome, whilst the second analysis analysis (analysisID += 2) will use cohort definition 2 as outcome.

+

The same mechanism can be used to specify types for the +exposureId.

-
-

-Executing multiple analyses

-

We can now run the analyses against the hypotheses of interest using the runScsAnalyses()function. This function will run all specified analyses against all hypotheses of interest, meaning that the total number of outcome models is length(sccsAnalysisList) * length(exposureOutcomeList).

+
+

Executing multiple analyses +

+

We can now run the analyses against the hypotheses of interest using +the runScsAnalyses()function. This function will run all +specified analyses against all hypotheses of interest, meaning that the +total number of outcome models is +length(sccsAnalysisList) * length(exposureOutcomeList).

 result <- runSccsAnalyses(
   connectionDetails = connectionDetails,
@@ -376,178 +481,208 @@ 

createSccsIntervalDataThreads = 3, fitSccsModelThreads = 4, cvThreads = 10)

-

In the code above, we provide the arguments for connecting to the database, which schemas and tables to use, as well as the analyses and hypotheses of interest. The outputFolder specifies where the outcome models and intermediate files will be written. We also instruct SelfControlledCaseSeries to use multiple threads for various stages in the analyses, meaning these will be executed in parallel on multiple CPUs in the computer. Multithreading can significantly reduce execution time, but will require more system resources such as memory and temporary disk space.

-
-

-Restarting

-

If for some reason the execution was interrupted, you can restart by re-issuing the runSccsAnalyses() command. Any intermediate and final products that have already been completed and written to disk will be skipped.

+

In the code above, we provide the arguments for connecting to the +database, which schemas and tables to use, as well as the analyses and +hypotheses of interest. The outputFolder specifies where +the outcome models and intermediate files will be written. We also +instruct SelfControlledCaseSeries to use multiple threads +for various stages in the analyses, meaning these will be executed in +parallel on multiple CPUs in the computer. Multithreading can +significantly reduce execution time, but will require more system +resources such as memory and temporary disk space.

+
+

Restarting +

+

If for some reason the execution was interrupted, you can restart by +re-issuing the runSccsAnalyses() command. Any intermediate +and final products that have already been completed and written to disk +will be skipped.

-
-

-Retrieving the results

-

The result of the runSccsAnalyses() is a data frame with one row per exposure-outcome-analysis combination. It provides the file names of the intermediate and end-result files that were constructed. For example, we can retrieve the fitted model for the combination of our drug of interest, outcome, and first analysis:

+
+

Retrieving the results +

+

The result of the runSccsAnalyses() is a data frame with +one row per exposure-outcome-analysis combination. It provides the file +names of the intermediate and end-result files that were constructed. +For example, we can retrieve the fitted model for the combination of our +drug of interest, outcome, and first analysis:

 sccsModelFile <- result$sccsModelFile[result$exposureId == 1124300 & 
                                       result$outcomeId == 1 &
                                       result$analysisId == 1]
-sccsModel <- readRDS(file.path(outputFolder, sccsModelFile))
+sccsModel <- readRDS(file.path(outputFolder, sccsModelFile))
 sccsModel
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1           77351        253169             77530
-## 
-## Estimates:
-## # A tibble: 1 x 7
-##   Name                                ID Estimate LB95CI UB95CI LogRr SeLogRr
-##   <chr>                            <dbl>    <dbl>  <dbl>  <dbl> <dbl>   <dbl>
-## 1 Exposure of interest: diclofenac  1000     1.23   1.16   1.29 0.203  0.0280
-

Note that some of the file names will appear several times in the table. For example, all analysis share the same sccsData object.

-

We can create a summary of the results using summarizeSccsAnalyses():

+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1           77351        253169             77530
+## 
+## Estimates:
+## # A tibble: 1 x 7
+##   Name                                ID Estimate LB95CI UB95CI LogRr SeLogRr
+##   <chr>                            <dbl>    <dbl>  <dbl>  <dbl> <dbl>   <dbl>
+## 1 Exposure of interest: diclofenac  1000     1.23   1.16   1.29 0.203  0.0280
+

Note that some of the file names will appear several times in the +table. For example, all analysis share the same sccsData object.

+

We can create a summary of the results using +summarizeSccsAnalyses():

 analysisSum <- summarizeSccsAnalyses(result, outputFolder)
-head(analysisSum)
-
## # A tibble: 6 x 21
-##   analysisId exposureId outcomeId outcomeSubjects outcomeEvents outcomeObsPerio~
-##        <dbl>      <dbl>     <dbl>           <dbl>         <dbl>            <dbl>
-## 1          1    1124300         1           77351        253169            77530
-## 2          1     705178         1           77351        253169            77530
-## 3          1     705944         1           77351        253169            77530
-## 4          1     710650         1           77351        253169            77530
-## 5          1     714785         1           77351        253169            77530
-## 6          1     719174         1           77351        253169            77530
-## # ... with 15 more variables: rr(Exposure of interest) <dbl>,
-## #   ci95lb(Exposure of interest) <dbl>, ci95ub(Exposure of interest) <dbl>,
-## #   logRr(Exposure of interest) <dbl>, seLogRr(Exposure of interest) <dbl>,
-## #   llr(Exposure of interest) <dbl>, llr1(Exposure of interest) <dbl>,
-## #   llr2(Exposure of interest) <dbl>, rr(Pre-exposure) <dbl>,
-## #   ci95lb(Pre-exposure) <dbl>, ci95ub(Pre-exposure) <dbl>,
-## #   logRr(Pre-exposure) <dbl>, seLogRr(Pre-exposure) <dbl>, ...
-

This tells us, per exposure-outcome-analysis combination, the estimated relative risk and 95% confidence interval, as well as the number of subjects (cases) and the number of events observed for those subjects.

-
-

-Empirical calibration

-

Now that we have produced estimates for all outcomes including our negative controls, we can perform empirical calibration to estimate the bias of the various analyses included in our study. We will create the calibration effect plots for every analysis ID. In each plot, the blue dots represent our negative control exposures, and the yellow diamond represents our exposure of interest: diclofenac. An unbiased, well-calibrated analysis should have 95% of the negative controls between the dashed lines (ie. 95% should have p > .05).

+head(analysisSum)
+
## # A tibble: 6 x 21
+##   analysisId exposureId outcomeId outcomeSubjects outcomeEvents outcomeObsPerio~
+##        <dbl>      <dbl>     <dbl>           <dbl>         <dbl>            <dbl>
+## 1          1    1124300         1           77351        253169            77530
+## 2          1     705178         1           77351        253169            77530
+## 3          1     705944         1           77351        253169            77530
+## 4          1     710650         1           77351        253169            77530
+## 5          1     714785         1           77351        253169            77530
+## 6          1     719174         1           77351        253169            77530
+## # ... with 15 more variables: `rr(Exposure of interest)` <dbl>,
+## #   `ci95lb(Exposure of interest)` <dbl>, `ci95ub(Exposure of interest)` <dbl>,
+## #   `logRr(Exposure of interest)` <dbl>, `seLogRr(Exposure of interest)` <dbl>,
+## #   `llr(Exposure of interest)` <dbl>, `llr1(Exposure of interest)` <dbl>,
+## #   `llr2(Exposure of interest)` <dbl>, `rr(Pre-exposure)` <dbl>,
+## #   `ci95lb(Pre-exposure)` <dbl>, `ci95ub(Pre-exposure)` <dbl>,
+## #   `logRr(Pre-exposure)` <dbl>, `seLogRr(Pre-exposure)` <dbl>, ...
+

This tells us, per exposure-outcome-analysis combination, the +estimated relative risk and 95% confidence interval, as well as the +number of subjects (cases) and the number of events observed for those +subjects.

+
+

Empirical calibration +

+

Now that we have produced estimates for all outcomes including our +negative controls, we can perform empirical calibration to estimate the +bias of the various analyses included in our study. We will create the +calibration effect plots for every analysis ID. In each plot, the blue +dots represent our negative control exposures, and the yellow diamond +represents our exposure of interest: diclofenac. An unbiased, +well-calibrated analysis should have 95% of the negative controls +between the dashed lines (ie. 95% should have p > .05).

-install.packages("EmpiricalCalibration")
-library(EmpiricalCalibration)
+install.packages("EmpiricalCalibration")
+library(EmpiricalCalibration)
 
 # Analysis 1: Simplest model
 negCons <- analysisSum[analysisSum$analysisId == 1 & analysisSum$exposureId != 1124300, ]
 ei <-  analysisSum[analysisSum$analysisId == 1 & analysisSum$exposureId == 1124300, ]
-null <- fitNull(negCons$`logRr(Exposure of interest)`, 
+null <- fitNull(negCons$`logRr(Exposure of interest)`, 
                 negCons$`seLogRr(Exposure of interest)`)
-plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
+plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
                       seLogRrNegatives = negCons$`seLogRr(Exposure of interest)`, 
                       logRrPositives = ei$`logRr(Exposure of interest)`, 
                       seLogRrPositives = ei$`seLogRr(Exposure of interest)`, 
                       null)
-
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
-## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
-## detected. Removing before fitting null distribution
-
## Warning: Removed 3 rows containing missing values (geom_point).
+
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
+## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
+## detected. Removing before fitting null distribution
+
## Warning: Removed 3 rows containing missing values (geom_point).

 # Analysis 2: Including prophylactics
 negCons <- analysisSum[analysisSum$analysisId == 2 & analysisSum$exposureId != 1124300, ]
 ei <-  analysisSum[analysisSum$analysisId == 2 & analysisSum$exposureId == 1124300, ]
-null <- fitNull(negCons$`logRr(Exposure of interest)`, 
+null <- fitNull(negCons$`logRr(Exposure of interest)`, 
                 negCons$`seLogRr(Exposure of interest)`)
-plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
+plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
                       seLogRrNegatives = negCons$`seLogRr(Exposure of interest)`, 
                       logRrPositives = ei$`logRr(Exposure of interest)`, 
                       seLogRrPositives = ei$`seLogRr(Exposure of interest)`, 
                       null)
-
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
-## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
-## detected. Removing before fitting null distribution
-
## Warning: Removed 3 rows containing missing values (geom_point).
+
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
+## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
+## detected. Removing before fitting null distribution
+
## Warning: Removed 3 rows containing missing values (geom_point).

 # Analysis 3: Including prophylactics, age, season, pre-exposure, and censoring
 negCons <- analysisSum[analysisSum$analysisId == 3 & analysisSum$exposureId != 1124300, ]
 ei <-  analysisSum[analysisSum$analysisId == 3 & analysisSum$exposureId == 1124300, ]
-null <- fitNull(negCons$`logRr(Exposure of interest)`, 
+null <- fitNull(negCons$`logRr(Exposure of interest)`, 
                 negCons$`seLogRr(Exposure of interest)`)
-plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
+plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
                       seLogRrNegatives = negCons$`seLogRr(Exposure of interest)`, 
                       logRrPositives = ei$`logRr(Exposure of interest)`, 
                       seLogRrPositives = ei$`seLogRr(Exposure of interest)`, 
                       null)
-
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
-## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
-## detected. Removing before fitting null distribution
-
## Warning: Removed 3 rows containing missing values (geom_point).
+
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
+## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
+## detected. Removing before fitting null distribution
+
## Warning: Removed 3 rows containing missing values (geom_point).

 # Analysis 4: Including all other drugs (as well as prophylactics, age, season, pre-
 # exposure, and censoring)
 negCons <- analysisSum[analysisSum$analysisId == 4 & analysisSum$exposureId != 1124300, ]
 ei <-  analysisSum[analysisSum$analysisId == 4 & analysisSum$exposureId == 1124300, ]
-null <- fitNull(negCons$`logRr(Exposure of interest)`, 
+null <- fitNull(negCons$`logRr(Exposure of interest)`, 
                 negCons$`seLogRr(Exposure of interest)`)
-plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
+plotCalibrationEffect(logRrNegatives = negCons$`logRr(Exposure of interest)`, 
                       seLogRrNegatives = negCons$`seLogRr(Exposure of interest)`, 
                       logRrPositives = ei$`logRr(Exposure of interest)`, 
                       seLogRrPositives = ei$`seLogRr(Exposure of interest)`, 
                       null)
-
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
-## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
-## detected. Removing before fitting null distribution
-
## Warning: Removed 3 rows containing missing values (geom_point).
+
## Warning in fitNull(negCons$`logRr(Exposure of interest)`,
+## negCons$`seLogRr(Exposure of interest)`): Estimate(s) with NA standard error
+## detected. Removing before fitting null distribution
+
## Warning: Removed 3 rows containing missing values (geom_point).

-
-

-Acknowledgments

-

Considerable work has been dedicated to provide the SelfControlledCaseSeries package.

+
+

Acknowledgments +

+

Considerable work has been dedicated to provide the +SelfControlledCaseSeries package.

-citation("SelfControlledCaseSeries")
-
## 
-## To cite package 'SelfControlledCaseSeries' in publications use:
-## 
-##   Martijn Schuemie, Patrick Ryan, Trevor Shaddox and Marc Suchard
-##   (2022). SelfControlledCaseSeries: Self-Controlled Case Series. R
-##   package version 3.2.1.
-##   https://github.com/OHDSI/SelfControlledCaseSeries
-## 
-## A BibTeX entry for LaTeX users is
-## 
-##   @Manual{,
-##     title = {SelfControlledCaseSeries: Self-Controlled Case Series},
-##     author = {Martijn Schuemie and Patrick Ryan and Trevor Shaddox and Marc Suchard},
-##     year = {2022},
-##     note = {R package version 3.2.1},
-##     url = {https://github.com/OHDSI/SelfControlledCaseSeries},
-##   }
-

Further, SelfControlledCaseSeries makes extensive use of the Cyclops package.

+citation("SelfControlledCaseSeries")
+
## 
+## To cite package 'SelfControlledCaseSeries' in publications use:
+## 
+##   Schuemie M, Ryan P, Shaddox T, Suchard M (2022).
+##   _SelfControlledCaseSeries: Self-Controlled Case Series_. R package
+##   version 3.3.0, <https://github.com/OHDSI/SelfControlledCaseSeries>.
+## 
+## A BibTeX entry for LaTeX users is
+## 
+##   @Manual{,
+##     title = {SelfControlledCaseSeries: Self-Controlled Case Series},
+##     author = {Martijn Schuemie and Patrick Ryan and Trevor Shaddox and Marc Suchard},
+##     year = {2022},
+##     note = {R package version 3.3.0},
+##     url = {https://github.com/OHDSI/SelfControlledCaseSeries},
+##   }
+

Further, SelfControlledCaseSeries makes extensive use of +the Cyclops package.

-citation("Cyclops")
-
## 
-## To cite Cyclops in publications use:
-## 
-## Suchard MA, Simpson SE, Zorych I, Ryan P, Madigan D (2013). "Massive
-## parallelization of serial inference algorithms for complex generalized
-## linear models." _ACM Transactions on Modeling and Computer Simulation_,
-## *23*, 10. <URL: https://dl.acm.org/doi/10.1145/2414416.2414791>.
-## 
-## A BibTeX entry for LaTeX users is
-## 
-##   @Article{,
-##     author = {M. A. Suchard and S. E. Simpson and I. Zorych and P. Ryan and D. Madigan},
-##     title = {Massive parallelization of serial inference algorithms for complex generalized linear models},
-##     journal = {ACM Transactions on Modeling and Computer Simulation},
-##     volume = {23},
-##     pages = {10},
-##     year = {2013},
-##     url = {https://dl.acm.org/doi/10.1145/2414416.2414791},
-##   }
-

This work is supported in part through the National Science Foundation grant IIS 1251151.

+citation("Cyclops")
+
## 
+## To cite Cyclops in publications use:
+## 
+##   Suchard MA, Simpson SE, Zorych I, Ryan P, Madigan D (2013). "Massive
+##   parallelization of serial inference algorithms for complex
+##   generalized linear models." _ACM Transactions on Modeling and
+##   Computer Simulation_, *23*, 10.
+##   <https://dl.acm.org/doi/10.1145/2414416.2414791>.
+## 
+## A BibTeX entry for LaTeX users is
+## 
+##   @Article{,
+##     author = {M. A. Suchard and S. E. Simpson and I. Zorych and P. Ryan and D. Madigan},
+##     title = {Massive parallelization of serial inference algorithms for complex generalized linear models},
+##     journal = {ACM Transactions on Modeling and Computer Simulation},
+##     volume = {23},
+##     pages = {10},
+##     year = {2013},
+##     url = {https://dl.acm.org/doi/10.1145/2414416.2414791},
+##   }
+

This work is supported in part through the National Science +Foundation grant IIS 1251151.

@@ -562,11 +697,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.3.

@@ -575,5 +712,7 @@

+ + diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-17-1.png index 27e335a..41bb373 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-17-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-19-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-19-1.png index bd31408..2b02862 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-19-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-19-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-21-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-21-1.png index 17104ed..7d4fef6 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-21-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-21-1.png differ diff --git a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-23-1.png b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-23-1.png index d411353..33dcde5 100644 Binary files a/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-23-1.png and b/docs/articles/MultipleAnalyses_files/figure-html/unnamed-chunk-23-1.png differ diff --git a/docs/articles/SingleStudies.html b/docs/articles/SingleStudies.html index aae312f..abe2009 100644 --- a/docs/articles/SingleStudies.html +++ b/docs/articles/SingleStudies.html @@ -19,6 +19,8 @@ + +
+
-
-

-Introduction

-

This vignette describes how you can use the SelfControlledCaseSeries package to perform a single Self-Controlled Case Series (SCCS) study. We will walk through all the steps needed to perform an exemplar study, and we have selected the well-studied topic of the effect of NSAIDs on gastrointestinal (GI) bleeding-related hospitalization. For simplicity, we focus on one NSAID: diclofenac.

-
-
-

-Installation instructions

-

Before installing the SelfControlledCaseSeries package make sure you have Java available. For Windows users, RTools is also necessary. See these instructions for properly configuring your R environment.

-

The SelfControlledCaseSeries package is maintained in a Github repository, and can be downloaded and installed from within R using the remotes package:

+
+

Introduction +

+

This vignette describes how you can use the +SelfControlledCaseSeries package to perform a single +Self-Controlled Case Series (SCCS) study. We will walk through all the +steps needed to perform an exemplar study, and we have selected the +well-studied topic of the effect of NSAIDs on gastrointestinal (GI) +bleeding-related hospitalization. For simplicity, we focus on one NSAID: +diclofenac.

+
+
+

Installation instructions +

+

Before installing the SelfControlledCaseSeries package +make sure you have Java available. For Windows users, RTools is also +necessary. See these +instructions for properly configuring your R environment.

+

The SelfControlledCaseSeries package is maintained in a +Github +repository, and can be downloaded and installed from within R using +the remotes package:

-install.packages("remotes")
-library(remotes)
-install_github("ohdsi/SelfControlledCaseSeries") 
-

Once installed, you can type library(SelfControlledCaseSeries) to load the package.

-
-
-

-Overview

-

In the SelfControlledCaseSeries package a study requires at least three steps:

+install.packages("remotes") +library(remotes) +install_github("ohdsi/SelfControlledCaseSeries")
+

Once installed, you can type +library(SelfControlledCaseSeries) to load the package.

+
+
+

Overview +

+

In the SelfControlledCaseSeries package a study requires +at least three steps:

  1. Loading the necessary data from the database.

  2. -
  3. Transforming the data into a format suitable for an SCCS study. This step includes the creation of covariates based on the variables extracted from the database, such as defining risk windows based on exposures. It also includes transforming the data into non-overlapping time intervals, with information on the various covariates and outcomes per interval.

  4. +
  5. Transforming the data into a format suitable for an SCCS study. +This step includes the creation of covariates based on the variables +extracted from the database, such as defining risk windows based on +exposures. It also includes transforming the data into non-overlapping +time intervals, with information on the various covariates and outcomes +per interval.

  6. Fitting the model using conditional Poisson regression.

-

In the following sections these steps will be demonstrated for increasingly complex studies.

-
-
-

-Studies with a single drug

-
-

-Configuring the connection to the server

-

We need to tell R how to connect to the server where the data are. SelfControlledCaseSeries uses the DatabaseConnector package, which provides the createConnectionDetails function. Type ?createConnectionDetails for the specific settings required for the various database management systems (DBMS). For example, one might connect to a PostgreSQL database using this code:

+

In the following sections these steps will be demonstrated for +increasingly complex studies.

+
+
+

Studies with a single drug +

+
+

Configuring the connection to the server +

+

We need to tell R how to connect to the server where the data are. +SelfControlledCaseSeries uses the +DatabaseConnector package, which provides the +createConnectionDetails function. Type +?createConnectionDetails for the specific settings required +for the various database management systems (DBMS). For example, one +might connect to a PostgreSQL database using this code:

 connectionDetails <- createConnectionDetails(dbms = "postgresql", 
                                              server = "localhost/ohdsi", 
@@ -142,15 +166,28 @@ 

cdmDatabaseSchema <- "my_cdm_data" cohortDatabaseSchema <- "my_results" -options(sqlRenderTempEmulationSchema = NULL) +options(sqlRenderTempEmulationSchema = NULL) cdmVersion <- "5"

-

The last three lines define the cdmDatabaseSchema and cohortDatabaseSchema variables,as well as the CDM version. We’ll use these later to tell R where the data in CDM format live, where we have stored our cohorts of interest, and what version CDM is used. Note that for Microsoft SQL Server, databaseschemas need to specify both the database and the schema, so for example cdmDatabaseSchema <- "my_cdm_data.dbo".

+

The last three lines define the cdmDatabaseSchema and +cohortDatabaseSchema variables,as well as the CDM version. +We’ll use these later to tell R where the data in CDM format live, where +we have stored our cohorts of interest, and what version CDM is used. +Note that for Microsoft SQL Server, databaseschemas need to specify both +the database and the schema, so for example +cdmDatabaseSchema <- "my_cdm_data.dbo".

-
-

-Preparing the health outcome of interest

-

We need to define the exposures and outcomes for our study. One way to do this is by writing SQL statements against the OMOP CDM that populate a table of events in which we are interested. The resulting table should have the same structure as the cohort table in the CDM. This means it should have the fields cohort_definition_id, cohort_start_date, cohort_end_date, and subject_id.

-

For our example study, we have created a file called vignette.sql with the following contents:

+
+

Preparing the health outcome of interest +

+

We need to define the exposures and outcomes for our study. One way +to do this is by writing SQL statements against the OMOP CDM that +populate a table of events in which we are interested. The resulting +table should have the same structure as the cohort table in +the CDM. This means it should have the fields +cohort_definition_id, cohort_start_date, +cohort_end_date, and subject_id.

+

For our example study, we have created a file called +vignette.sql with the following contents:

/***********************************
 File vignette.sql 
 ***********************************/
@@ -172,7 +209,14 @@ 

WHERE ancestor_concept_id = 192671 -- GI - Gastrointestinal haemorrhage ) AND visit_occurrence.visit_concept_id IN (9201, 9203);

-

This is parameterized SQL which can be used by the SqlRender package. We use parameterized SQL so we do not have to pre-specify the names of the CDM and cohort schemas. That way, if we want to run the SQL on a different schema, we only need to change the parameter values; we do not have to change the SQL code. By also making use of translation functionality in SqlRender, we can make sure the SQL code can be run in many different environments.

+

This is parameterized SQL which can be used by the +SqlRender package. We use parameterized SQL so we do not +have to pre-specify the names of the CDM and cohort schemas. That way, +if we want to run the SQL on a different schema, we only need to change +the parameter values; we do not have to change the SQL code. By also +making use of translation functionality in SqlRender, we +can make sure the SQL code can be run in many different +environments.

library(SqlRender)
 sql <- readSql("vignette.sql")
 sql <- render(sql,
@@ -183,25 +227,31 @@ 

connection <- connect(connectionDetails) executeSql(connection, sql)

-

In this code, we first read the SQL from the file into memory. In the next line, we replace the three parameter names with the actual values. We then translate the SQL into the dialect appropriate for the DBMS we already specified in the connectionDetails. Next, we connect to the server, and submit the rendered and translated SQL.

-

If all went well, we now have a table with the outcome of interest. We can see how many events:

+

In this code, we first read the SQL from the file into memory. In the +next line, we replace the three parameter names with the actual values. +We then translate the SQL into the dialect appropriate for the DBMS we +already specified in the connectionDetails. Next, we +connect to the server, and submit the rendered and translated SQL.

+

If all went well, we now have a table with the outcome of interest. +We can see how many events:

-sql <- paste("SELECT cohort_definition_id, COUNT(*) AS count",
+sql <- paste("SELECT cohort_definition_id, COUNT(*) AS count",
              "FROM @cohortDatabaseSchema.@outcomeTable",
              "GROUP BY cohort_definition_id")
-sql <- render(sql, 
+sql <- render(sql, 
               cohortDatabaseSchema = cohortDatabaseSchema, 
               outcomeTable = "my_outcomes")
-sql <- translate(sql, targetDialect = connectionDetails$dbms)
+sql <- translate(sql, targetDialect = connectionDetails$dbms)
 
 querySql(connection, sql)
-
##   cohort_concept_id   count
-## 1                 1 1029443
+
##   cohort_concept_id   count
+## 1                 1 1029443
-
-

-Extracting the data from the server

-

Now we can tell SelfControlledCaseSeries to extract all necessary data for our analysis:

+
+

Extracting the data from the server +

+

Now we can tell SelfControlledCaseSeries to extract all +necessary data for our analysis:

 diclofenac <- 1124300
 
@@ -215,67 +265,97 @@ 

exposureIds = diclofenac, cdmVersion = cdmVersion) sccsData

-
## # SccsData object
-## 
-## Exposure cohort ID(s): 1124300
-## Outcome cohort ID(s): 1
-## 
-## Inherits from Andromeda:
-## # Andromeda object
-## # Physical location:  C:\Users\mschuemi.EU\AppData\Local\Temp\RtmpqGWYIv\file4bc075d73371.sqlite
-## 
-## Tables:
-## $cases (observationPeriodId, caseId, personId, observationDays, startYear, startMonth, startDay, ageInDays, censoredDays, noninformativeEndCensor)
-## $eraRef (eraType, eraId, eraName)
-## $eras (eraType, caseId, eraId, value, startDay, endDay)
-

There are many parameters, but they are all documented in the SelfControlledCaseSeries manual. In short, we are pointing the function to the table created earlier and indicating which cohort ID in that table identifies the outcome. Note that it is possible to fetch the data for multiple outcomes at once. We further point the function to the drug_era table, and specify the concept ID of our exposure of interest: diclofenac. Again, note that it is also possible to fetch data for multiple drugs at once. In fact, when we do not specify any exposure IDs the function will retrieve the data for all the drugs found in the drug_era table.

-

All data about the patients, outcomes and exposures are extracted from the server and stored in the sccsData object. This object uses the Andromeda package to store information in a way that ensures R does not run out of memory, even when the data are large.

-

We can use the generic summary() function to view some more information of the data we extracted:

+
## # SccsData object
+## 
+## Exposure cohort ID(s): 1124300
+## Outcome cohort ID(s): 1
+## 
+## Inherits from Andromeda:
+## # Andromeda object
+## # Physical location:  C:\Users\mschuemi.EU\AppData\Local\Temp\RtmpI7A2d4\filec394202148e8.sqlite
+## 
+## Tables:
+## $cases (observationPeriodId, caseId, personId, observationDays, startYear, startMonth, startDay, ageInDays, censoredDays, noninformativeEndCensor)
+## $eraRef (eraType, eraId, eraName)
+## $eras (eraType, caseId, eraId, value, startDay, endDay)
+

There are many parameters, but they are all documented in the +SelfControlledCaseSeries manual. In short, we are pointing +the function to the table created earlier and indicating which cohort ID +in that table identifies the outcome. Note that it is possible to fetch +the data for multiple outcomes at once. We further point the function to +the drug_era table, and specify the concept ID of our +exposure of interest: diclofenac. Again, note that it is also possible +to fetch data for multiple drugs at once. In fact, when we do not +specify any exposure IDs the function will retrieve the data for all the +drugs found in the drug_era table.

+

All data about the patients, outcomes and exposures are extracted +from the server and stored in the sccsData object. This +object uses the Andromeda package to store information in a +way that ensures R does not run out of memory, even when the data are +large.

+

We can use the generic summary() function to view some +more information of the data we extracted:

-summary(sccsData)
-
## SccsData object summary
-## 
-## Exposure cohort ID(s): 1124300
-## Outcome cohort ID(s): 1
-## 
-## Outcome counts:
-##   Outcome Subjects Outcome Events Outcome Observation Periods
-## 1           441552        3167367                      445224
-## 
-## Eras:
-## Number of era types: 2
-## Number of eras: 3282443
-
-

-Saving the data to file

-

Creating the sccsData file can take considerable computing time, and it is probably a good idea to save it for future sessions. Because sccsData uses Andromeda, we cannot use R’s regular save function. Instead, we’ll have to use the saveSccsData() function:

+summary(sccsData)
+
## SccsData object summary
+## 
+## Exposure cohort ID(s): 1124300
+## Outcome cohort ID(s): 1
+## 
+## Outcome counts:
+##   Outcome Subjects Outcome Events Outcome Observation Periods
+## 1           441552        3167367                      445224
+## 
+## Eras:
+## Number of era types: 2
+## Number of eras: 3282443
+
+

Saving the data to file +

+

Creating the sccsData file can take considerable +computing time, and it is probably a good idea to save it for future +sessions. Because sccsData uses Andromeda, we +cannot use R’s regular save function. Instead, we’ll have to use the +saveSccsData() function:

 saveSccsData(sccsData, "diclofenacAndGiBleed.zip")
-

We can use the loadSccsData() function to load the data in a future session.

+

We can use the loadSccsData() function to load the data +in a future session.

-
-

-Creating the study population

-

From the data fetched from the server we can now define the population we wish to study. If we retrieved data for multiple outcomes, we should now select only one, and possibly impose further restrictions:

+
+

Creating the study population +

+

From the data fetched from the server we can now define the +population we wish to study. If we retrieved data for multiple outcomes, +we should now select only one, and possibly impose further +restrictions:

 studyPop <- createStudyPopulation(sccsData = sccsData,
                                   outcomeId = 1,
                                   firstOutcomeOnly = FALSE,
                                   naivePeriod = 180)
-

Here we specify we wish to study the outcome with ID 1. Since this was the only outcome for which we fetched the data, we could also have skipped this argument. We furthermore specify that the first 180 days of observation of every person, the so-called ‘naive period’, will be excluded from the analysis. Note that data in the naive period will be used to determine exposure status at the start of follow-up (after the end of the naive period). We also specify we will use all occurrences of the outcome, not just the first one per person.

-

We can find out how many people (if any) were removed by any restrictions we imposed:

+

Here we specify we wish to study the outcome with ID 1. Since this +was the only outcome for which we fetched the data, we could also have +skipped this argument. We furthermore specify that the first 180 days of +observation of every person, the so-called ‘naive period’, will be +excluded from the analysis. Note that data in the naive period will be +used to determine exposure status at the start of follow-up (after the +end of the naive period). We also specify we will use all occurrences of +the outcome, not just the first one per person.

+

We can find out how many people (if any) were removed by any +restrictions we imposed:

-
## # A tibble: 2 x 5
-##   outcomeId description           outcomeSubjects outcomeEvents outcomeObsPerio~
-##       <dbl> <chr>                           <dbl>         <dbl>            <dbl>
-## 1         1 Outcomes                       441552       3167367           445224
-## 2         1 180 days naive period          397221       2873478           399607
-
-
-

-Defining a simple model

+
## # A tibble: 2 x 5
+##   outcomeId description           outcomeSubjects outcomeEvents outcomeObsPerio~
+##       <dbl> <chr>                           <dbl>         <dbl>            <dbl>
+## 1         1 Outcomes                       441552       3167367           445224
+## 2         1 180 days naive period          397221       2873478           399607
+
+
+

Defining a simple model +

Next, we can use the data to define a simple model to fit:

 covarDiclofenac <- createEraCovariateSettings(label = "Exposure of interest",
@@ -288,23 +368,40 @@ 

sccsData = sccsData, eraCovariateSettings = covarDiclofenac) -summary(sccsIntervalData)

-
## SccsIntervalData object summary
-## 
-## Outcome cohort ID: 1
-## 
-## Number of cases (observation periods): 49985
-## Number of eras (spans of time): 99970
-## Number of outcomes: 380313
-## Number of covariates: 1
-## Number of non-zero covariate values: 49985
-

In this example, we use the createEraCovariateSettings to define a single covariate: exposure to diclofenac. We specify that the risk window is from start of exposure to the end by setting start and end to 0, and defining the anchor for the end to be the era end, which for drug eras is the end of exposure.

-

We then use the covariate definition in the createSccsIntervalData function to generate the sccsIntervalData. This represents the data in non-overlapping time intervals, with information on the various covariates and outcomes per interval.

-
-
-

-Power calculations

-

Before we start fitting an outcome model, we might be interested to know whether we have sufficient power to detect a particular effect size. It makes sense to perform these power calculations once the study population has been fully defined, so taking into account loss to the various inclusion and exclusion criteria. This means we will use the sccsIntervalData object we’ve just created as the basis for our power calculations. Since the sample size is fixed in retrospective studies (the data has already been collected), and the true effect size is unknown, the SelfControlledCaseSeries package provides a function to compute the minimum detectable relative risk (MDRR) instead:

+summary(sccsIntervalData)
+
## SccsIntervalData object summary
+## 
+## Outcome cohort ID: 1
+## 
+## Number of cases (observation periods): 49985
+## Number of eras (spans of time): 99970
+## Number of outcomes: 380313
+## Number of covariates: 1
+## Number of non-zero covariate values: 49985
+

In this example, we use the createEraCovariateSettings +to define a single covariate: exposure to diclofenac. We specify that +the risk window is from start of exposure to the end by setting start +and end to 0, and defining the anchor for the end to be the era end, +which for drug eras is the end of exposure.

+

We then use the covariate definition in the +createSccsIntervalData function to generate the +sccsIntervalData. This represents the data in +non-overlapping time intervals, with information on the various +covariates and outcomes per interval.

+
+
+

Power calculations +

+

Before we start fitting an outcome model, we might be interested to +know whether we have sufficient power to detect a particular effect +size. It makes sense to perform these power calculations once the study +population has been fully defined, so taking into account loss to the +various inclusion and exclusion criteria. This means we will use the +sccsIntervalData object we’ve just created as the basis for our power +calculations. Since the sample size is fixed in retrospective studies +(the data has already been collected), and the true effect size is +unknown, the SelfControlledCaseSeries package provides a function to +compute the minimum detectable relative risk (MDRR) instead:

 computeMdrr(sccsIntervalData,
             exposureCovariateId = 1000,
@@ -312,40 +409,54 @@ 

power = 0.8, twoSided = TRUE, method = "binomial")

-
## # A tibble: 1 x 6
-##   timeExposed timeTotal propTimeExposed propPopExposued events  mdrr
-##         <dbl>     <dbl>           <dbl>           <dbl>  <dbl> <dbl>
-## 1     8767035 155897307          0.0562               1 380313  1.02
-

Note that we have to provide the covariate ID of the exposure of interest, which we learned by calling summary on sccsIntervalData earlier. This is because we may have many covariates in our model, but will likely only be interested in the MDRR of one.

-
-
-

-Model fitting

+
## # A tibble: 1 x 6
+##   timeExposed timeTotal propTimeExposed propPopulationExposed events  mdrr
+##         <dbl>     <dbl>           <dbl>                 <dbl>  <dbl> <dbl>
+## 1     8767035 155897307          0.0562                     1 380313  1.02
+

Note that we have to provide the covariate ID of the exposure of +interest, which we learned by calling summary on +sccsIntervalData earlier. This is because we may have many covariates in +our model, but will likely only be interested in the MDRR of one.

+
+
+

Model fitting +

The fitSccsModel function is used to fit the model:

 model <- fitSccsModel(sccsIntervalData)

We can inspect the resulting model:

 model
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1          397221       2873478            399607
-## 
-## Estimates:
-## # A tibble: 1 x 7
-##   Name                                ID Estimate LB95CI UB95CI LogRr SeLogRr
-##   <chr>                            <dbl>    <dbl>  <dbl>  <dbl> <dbl>   <dbl>
-## 1 Exposure of interest: diclofenac  1000     1.31   1.29   1.33 0.272 0.00711
-

This tells us what the estimated relative risk (the incidence rate ratio) is during exposure to diclofenac compared to non-exposed time.

-
-
-

-Adding a pre-exposure window

-

The fact that NSAIDs like diclofenac can cause GI bleeds is well known to doctors, and this knowledge affects prescribing behavior. For example, a patient who has just had a GI bleed is not likely to be prescribed diclofenac. This may lead to underestimation of the rate during unexposed time, because the unexposed time includes time just prior to exposure where observing of the outcome is unlikely because of this behavior. One solution to this problem that is often used is to introduce a separate ‘risk window’ just prior to exposure, to separate it from the remaining unexposed time. We can add such a ‘pre-exposure window’ to our analysis:

+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1          397221       2873478            399607
+## 
+## Estimates:
+## # A tibble: 1 x 7
+##   Name                                ID Estimate LB95CI UB95CI LogRr SeLogRr
+##   <chr>                            <dbl>    <dbl>  <dbl>  <dbl> <dbl>   <dbl>
+## 1 Exposure of interest: diclofenac  1000     1.31   1.29   1.33 0.272 0.00711
+

This tells us what the estimated relative risk (the incidence rate +ratio) is during exposure to diclofenac compared to non-exposed +time.

+
+
+

Adding a pre-exposure window +

+

The fact that NSAIDs like diclofenac can cause GI bleeds is well +known to doctors, and this knowledge affects prescribing behavior. For +example, a patient who has just had a GI bleed is not likely to be +prescribed diclofenac. This may lead to underestimation of the rate +during unexposed time, because the unexposed time includes time just +prior to exposure where observing of the outcome is unlikely because of +this behavior. One solution to this problem that is often used is to +introduce a separate ‘risk window’ just prior to exposure, to separate +it from the remaining unexposed time. We can add such a ‘pre-exposure +window’ to our analysis:

 covarPreDiclofenac <- createEraCovariateSettings(label = "Pre-exposure",
                                                  includeEraIds = diclofenac,
@@ -355,81 +466,115 @@ 

sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, sccsData = sccsData, - eraCovariateSettings = list(covarDiclofenac, + eraCovariateSettings = list(covarDiclofenac, covarPreDiclofenac)) model <- fitSccsModel(sccsIntervalData)

-

Here we created a new covariate definition in addition to the first one. We define the risk window to start 60 days prior to exposure, and end on the day just prior to exposure. We combine the two covariate settings in a list for the createSccsIntervalData function. Again, we can take a look at the results:

+

Here we created a new covariate definition in addition to the first +one. We define the risk window to start 60 days prior to exposure, and +end on the day just prior to exposure. We combine the two covariate +settings in a list for the createSccsIntervalData function. +Again, we can take a look at the results:

 model
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1          397221       2873478            399607
-## 
-## Estimates:
-## # A tibble: 2 x 7
-##   Name                                ID Estimate LB95CI UB95CI  LogRr SeLogRr
-##   <chr>                            <dbl>    <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
-## 1 Exposure of interest: diclofenac  1000    1.29   1.27   1.31   0.255 0.00719
-## 2 Pre-exposure: diclofenac          1001    0.797  0.783  0.811 -0.227 0.00883
-

Here we indeed see a lower relative risk in the time preceding the exposure, indicating the outcome might be a contra-indication for the drug of interest.

-
-
-

-Splitting risk windows

-

Often we will want to split the risk windows into smaller parts and compute estimates for each part. This can give us insight into the temporal distribution of the risk. We can add this to the model:

+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1          397221       2873478            399607
+## 
+## Estimates:
+## # A tibble: 2 x 7
+##   Name                                ID Estimate LB95CI UB95CI  LogRr SeLogRr
+##   <chr>                            <dbl>    <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
+## 1 Exposure of interest: diclofenac  1000    1.29   1.27   1.31   0.255 0.00719
+## 2 Pre-exposure: diclofenac          1001    0.797  0.783  0.811 -0.227 0.00883
+

Here we indeed see a lower relative risk in the time preceding the +exposure, indicating the outcome might be a contra-indication for the +drug of interest.

+
+
+

Splitting risk windows +

+

Often we will want to split the risk windows into smaller parts and +compute estimates for each part. This can give us insight into the +temporal distribution of the risk. We can add this to the model:

 covarDiclofenacSplit <- createEraCovariateSettings(label = "Exposure of interest",
                                                    includeEraIds = diclofenac,
                                                    start = 0,
                                                    end = 0,
                                                    endAnchor = "era end",
-                                                   splitPoints = c(7, 14))
+                                                   splitPoints = c(7, 14))
 
 covarPreDiclofenacSplit <- createEraCovariateSettings(label = "Pre-exposure",
                                                       includeEraIds = diclofenac,
                                                       start = -60,
                                                       end = -1,
                                                       endAnchor = "era start",
-                                                      splitPoints = c(-30))
+                                                      splitPoints = c(-30))
 
 sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
                                            sccsData = sccsData,
-                                           eraCovariateSettings = list(covarDiclofenacSplit,
+                                           eraCovariateSettings = list(covarDiclofenacSplit,
                                                                        covarPreDiclofenacSplit))
-

Here we’ve redefined out covariate definitions: We kept the same start and end dates, but enforced split points for the main exposure windows at 7 and 14 days. For the pre-exposure window we divided the window into two, at day 30 before the exposure start. Note that the split point dates indicate the end date of the preceding part, so the exposure is now split into day 0 to (and including) day 7, day 8 to (and including) day 14, and day 15 until the end of exposure. The results are:

+

Here we’ve redefined out covariate definitions: We kept the same +start and end dates, but enforced split points for the main exposure +windows at 7 and 14 days. For the pre-exposure window we divided the +window into two, at day 30 before the exposure start. Note that the +split point dates indicate the end date of the preceding part, so the +exposure is now split into day 0 to (and including) day 7, day 8 to (and +including) day 14, and day 15 until the end of exposure. The results +are:

 model
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1          397221       2873478            399607
-## 
-## Estimates:
-## # A tibble: 5 x 7
-##   Name                                 ID Estimate LB95CI UB95CI   LogRr SeLogRr
-##   <chr>                             <dbl>    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
-## 1 Exposure of interest: diclofenac~  1000    0.971  0.931  1.01  -0.0292 0.0212 
-## 2 Exposure of interest: diclofenac~  1001    1.48   1.42   1.54   0.392  0.0193 
-## 3 Exposure of interest: diclofenac~  1002    1.31   1.29   1.33   0.271  0.00801
-## 4 Pre-exposure: diclofenac, day -6~  1003    0.884  0.865  0.904 -0.123  0.0113 
-## 5 Pre-exposure: diclofenac, day -2~  1004    0.701  0.683  0.720 -0.355  0.0132
-

We see that the risk for the three exposure windows is more or less the same, suggesting a constant risk. We also see that the period 60 to 30 days prior to exposure does not seem to show a decreased risk, suggesting the effect of the contra-indication does not extend more than 30 days before the exposure.

-
-
-

-Including age, seasonality, and calendar time

-

Often both the rate of exposure and the outcome change with age, and can even depend on the season or calendar time in general (e.g. rates may be higher in 2021 compared to 2020). This may lead to confounding and may bias our estimates. To correct for this we can include age, season, and/or calendar time into the model.

-

For computational reasons we assume the effect of age, season, and calendar time are constant within each calendar month. We assume that the rate from one month to the next can be different, but we also assume that subsequent months have somewhat similar rates. This is implemented by using cubic spline functions.

-

Spline for seasonalityFigure 1. Example of how a spline is used for seasonality: within a month, the risk attributable to seasonality is assumed to be constant, but from month to month the risks are assumed to follow a cyclic cubic spline.

-

Note that the by default all people that have the outcome will be used to estimate the effect of age and seasonality on the outcome, so not just the people exposed to the drug of interest. We can add age, seasonality, and calendar time like this:

+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1          397221       2873478            399607
+## 
+## Estimates:
+## # A tibble: 5 x 7
+##   Name                                 ID Estimate LB95CI UB95CI   LogRr SeLogRr
+##   <chr>                             <dbl>    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
+## 1 Exposure of interest: diclofenac~  1000    0.971  0.931  1.01  -0.0292 0.0212 
+## 2 Exposure of interest: diclofenac~  1001    1.48   1.42   1.54   0.392  0.0193 
+## 3 Exposure of interest: diclofenac~  1002    1.31   1.29   1.33   0.271  0.00801
+## 4 Pre-exposure: diclofenac, day -6~  1003    0.884  0.865  0.904 -0.123  0.0113 
+## 5 Pre-exposure: diclofenac, day -2~  1004    0.701  0.683  0.720 -0.355  0.0132
+

We see that the risk for the three exposure windows is more or less +the same, suggesting a constant risk. We also see that the period 60 to +30 days prior to exposure does not seem to show a decreased risk, +suggesting the effect of the contra-indication does not extend more than +30 days before the exposure.

+
+
+

Including age, seasonality, and calendar time +

+

Often both the rate of exposure and the outcome change with age, and +can even depend on the season or calendar time in general (e.g. rates +may be higher in 2021 compared to 2020). This may lead to confounding +and may bias our estimates. To correct for this we can include age, +season, and/or calendar time into the model.

+

For computational reasons we assume the effect of age, season, and +calendar time are constant within each calendar month. We assume that +the rate from one month to the next can be different, but we also assume +that subsequent months have somewhat similar rates. This is implemented +by using cubic spline functions.

+

Spline for seasonalityFigure +1. Example of how a spline is used for seasonality: within a month, +the risk attributable to seasonality is assumed to be constant, but from +month to month the risks are assumed to follow a cyclic cubic +spline.

+

Note that the by default all people that have the outcome will be +used to estimate the effect of age and seasonality on the outcome, so +not just the people exposed to the drug of interest. We can add age, +seasonality, and calendar time like this:

 ageCovariateSettings <- createAgeCovariateSettings(ageKnots = 5)
 
@@ -439,7 +584,7 @@ 

sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, sccsData = sccsData, - eraCovariateSettings = list(covarDiclofenacSplit, + eraCovariateSettings = list(covarDiclofenacSplit, covarPreDiclofenacSplit), ageCovariateSettings = ageCovariateSettings, @@ -450,37 +595,39 @@

Again, we can inspect the model:

 model
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1          397221       2873478            399607
-## 
-## Estimates:
-## # A tibble: 18 x 7
-##    Name                               ID Estimate LB95CI UB95CI   LogRr  SeLogRr
-##    <chr>                           <dbl>    <dbl>  <dbl>  <dbl>   <dbl>    <dbl>
-##  1 Age spline component 1            100    1.75  NA     NA      0.557  NA      
-##  2 Age spline component 2            101    2.08  NA     NA      0.731  NA      
-##  3 Age spline component 3            102    3.62  NA     NA      1.29   NA      
-##  4 Age spline component 4            103    3.44  NA     NA      1.23   NA      
-##  5 Age spline component 5            104    3.72  NA     NA      1.31   NA      
-##  6 Seasonality spline component 1    200    0.909 NA     NA     -0.0952 NA      
-##  7 Seasonality spline component 2    201    1.15  NA     NA      0.137  NA      
-##  8 Seasonality spline component 3    202    0.847 NA     NA     -0.166  NA      
-##  9 Calendar time spline component~   300    2.22  NA     NA      0.798  NA      
-## 10 Calendar time spline component~   301    2.79  NA     NA      1.02   NA      
-## 11 Calendar time spline component~   302    6.70  NA     NA      1.90   NA      
-## 12 Calendar time spline component~   303   13.9   NA     NA      2.63   NA      
-## 13 Calendar time spline component~   304    7.66  NA     NA      2.04   NA      
-## 14 Exposure of interest: diclofen~  1000    0.949  0.910  0.989 -0.0524  0.0212 
-## 15 Exposure of interest: diclofen~  1001    1.46   1.40   1.51   0.377   0.0194 
-## 16 Exposure of interest: diclofen~  1002    1.42   1.40   1.44   0.351   0.00802
-## 17 Pre-exposure: diclofenac, day ~  1003    0.865  0.846  0.884 -0.145   0.0114 
-## 18 Pre-exposure: diclofenac, day ~  1004    0.685  0.667  0.703 -0.378   0.0132
-

We see that our estimates for exposed and pre-exposure time have not changes much. We can plot the spline curves for age, season, and calendar time to learn more:

+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1          397221       2873478            399607
+## 
+## Estimates:
+## # A tibble: 18 x 7
+##    Name                               ID Estimate LB95CI UB95CI   LogRr  SeLogRr
+##    <chr>                           <dbl>    <dbl>  <dbl>  <dbl>   <dbl>    <dbl>
+##  1 Age spline component 1            100    1.75  NA     NA      0.557  NA      
+##  2 Age spline component 2            101    2.08  NA     NA      0.731  NA      
+##  3 Age spline component 3            102    3.62  NA     NA      1.29   NA      
+##  4 Age spline component 4            103    3.44  NA     NA      1.23   NA      
+##  5 Age spline component 5            104    3.72  NA     NA      1.31   NA      
+##  6 Seasonality spline component 1    200    0.909 NA     NA     -0.0952 NA      
+##  7 Seasonality spline component 2    201    1.15  NA     NA      0.137  NA      
+##  8 Seasonality spline component 3    202    0.847 NA     NA     -0.166  NA      
+##  9 Calendar time spline component~   300    2.22  NA     NA      0.798  NA      
+## 10 Calendar time spline component~   301    2.79  NA     NA      1.02   NA      
+## 11 Calendar time spline component~   302    6.70  NA     NA      1.90   NA      
+## 12 Calendar time spline component~   303   13.9   NA     NA      2.63   NA      
+## 13 Calendar time spline component~   304    7.66  NA     NA      2.04   NA      
+## 14 Exposure of interest: diclofen~  1000    0.949  0.910  0.989 -0.0524  0.0212 
+## 15 Exposure of interest: diclofen~  1001    1.46   1.40   1.51   0.377   0.0194 
+## 16 Exposure of interest: diclofen~  1002    1.42   1.40   1.44   0.351   0.00802
+## 17 Pre-exposure: diclofenac, day ~  1003    0.865  0.846  0.884 -0.145   0.0114 
+## 18 Pre-exposure: diclofenac, day ~  1004    0.685  0.667  0.703 -0.378   0.0132
+

We see that our estimates for exposed and pre-exposure time have not +changes much. We can plot the spline curves for age, season, and +calendar time to learn more:

@@ -490,16 +637,31 @@

-

We see a strong effect for age on the outcome, but this effect is spread out over many years and so it less likely to affect the estimates for any individual, since most people are only observed for a few years in the database. We do not see a strong effect for season, but we do see an increasing trend over the years, with a drop near the end of observation time.

+

We see a strong effect for age on the outcome, but this effect is +spread out over many years and so it less likely to affect the estimates +for any individual, since most people are only observed for a few years +in the database. We do not see a strong effect for season, but we do see +an increasing trend over the years, with a drop near the end of +observation time.

-
-

-Considering event-dependent observation time

-

The SCCS method requires that observation periods are independent of outcome times. This requirement is violated when outcomes increase the mortality rate, since censoring of the observation periods is then event-dependent. A modification to the SCCS has been proposed that attempts to correct for this. First, several models are fitted to estimate the amount and shape of the event-dependent censoring, and the best fitting model is selected. Next, this model is used to reweigh various parts of the observation time. This approach is also implemented in this package, and can be turned on using the eventDependentObservation argument of the createSccsIntervalData function:

+
+

Considering event-dependent observation time +

+

The SCCS method requires that observation periods are independent of +outcome times. This requirement is violated when outcomes increase the +mortality rate, since censoring of the observation periods is then +event-dependent. A modification to the SCCS has been proposed that +attempts to correct for this. First, several models are fitted to +estimate the amount and shape of the event-dependent censoring, and the +best fitting model is selected. Next, this model is used to reweigh +various parts of the observation time. This approach is also implemented +in this package, and can be turned on using the +eventDependentObservation argument of the +createSccsIntervalData function:

 sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop,
                                            sccsData = sccsData,
-                                           eraCovariateSettings = list(covarDiclofenacSplit,
+                                           eraCovariateSettings = list(covarDiclofenacSplit,
                                                                        covarPreDiclofenacSplit),
                                            ageCovariateSettings = ageCovariateSettings,
                                            seasonalityCovariateSettings = seasonalityCovariateSettings,
@@ -510,44 +672,51 @@ 

Again, we can inspect the model:

 model
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1          397221       2873478            399607
-## 
-## Estimates:
-## # A tibble: 13 x 7
-##    Name                               ID Estimate LB95CI UB95CI   LogRr  SeLogRr
-##    <chr>                           <dbl>    <dbl>  <dbl>  <dbl>   <dbl>    <dbl>
-##  1 Seasonality spline component 1    200   0.870  NA     NA     -0.139  NA      
-##  2 Seasonality spline component 2    201   1.25   NA     NA      0.225  NA      
-##  3 Seasonality spline component 3    202   0.868  NA     NA     -0.142  NA      
-##  4 Calendar time spline component~   300   0.478  NA     NA     -0.738  NA      
-##  5 Calendar time spline component~   301   0.266  NA     NA     -1.32   NA      
-##  6 Calendar time spline component~   302   0.128  NA     NA     -2.06   NA      
-##  7 Calendar time spline component~   303   0.0659 NA     NA     -2.72   NA      
-##  8 Calendar time spline component~   304   0.0165 NA     NA     -4.11   NA      
-##  9 Exposure of interest: diclofen~  1000   0.972   0.932  1.01  -0.0284  0.0211 
-## 10 Exposure of interest: diclofen~  1001   1.49    1.44   1.55   0.400   0.0193 
-## 11 Exposure of interest: diclofen~  1002   1.47    1.45   1.49   0.384   0.00790
-## 12 Pre-exposure: diclofenac, day ~  1003   0.916   0.896  0.936 -0.0879  0.0114 
-## 13 Pre-exposure: diclofenac, day ~  1004   0.712   0.693  0.730 -0.340   0.0132
-

-
-
-

-Studies with more than one drug

-

Although we are usually interested in the effect of a single drug or drug class, it could be beneficial to add exposure to other drugs to the analysis if we believe those drugs represent time-varying confounders that we wish to correct for.

-
-

-Adding a class of drugs

-

For example, oftentimes diclofenac is co-prescribed with proton-pump inhibitors (PPIs) to mitigate the risk of GI bleeding. We would like our estimate to represent just the effect of the diclofenac, so we need to keep the effect of the PPIs separate. First we have to retrieve the information on PPI exposure from the database:

+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1          397221       2873478            399607
+## 
+## Estimates:
+## # A tibble: 13 x 7
+##    Name                               ID Estimate LB95CI UB95CI   LogRr  SeLogRr
+##    <chr>                           <dbl>    <dbl>  <dbl>  <dbl>   <dbl>    <dbl>
+##  1 Seasonality spline component 1    200   0.870  NA     NA     -0.139  NA      
+##  2 Seasonality spline component 2    201   1.25   NA     NA      0.225  NA      
+##  3 Seasonality spline component 3    202   0.868  NA     NA     -0.142  NA      
+##  4 Calendar time spline component~   300   0.478  NA     NA     -0.738  NA      
+##  5 Calendar time spline component~   301   0.266  NA     NA     -1.32   NA      
+##  6 Calendar time spline component~   302   0.128  NA     NA     -2.06   NA      
+##  7 Calendar time spline component~   303   0.0659 NA     NA     -2.72   NA      
+##  8 Calendar time spline component~   304   0.0165 NA     NA     -4.11   NA      
+##  9 Exposure of interest: diclofen~  1000   0.972   0.932  1.01  -0.0284  0.0211 
+## 10 Exposure of interest: diclofen~  1001   1.49    1.44   1.55   0.400   0.0193 
+## 11 Exposure of interest: diclofen~  1002   1.47    1.45   1.49   0.384   0.00790
+## 12 Pre-exposure: diclofenac, day ~  1003   0.916   0.896  0.936 -0.0879  0.0114 
+## 13 Pre-exposure: diclofenac, day ~  1004   0.712   0.693  0.730 -0.340   0.0132
+
+
+
+

Studies with more than one drug +

+

Although we are usually interested in the effect of a single drug or +drug class, it could be beneficial to add exposure to other drugs to the +analysis if we believe those drugs represent time-varying confounders +that we wish to correct for.

+
+

Adding a class of drugs +

+

For example, oftentimes diclofenac is co-prescribed with proton-pump +inhibitors (PPIs) to mitigate the risk of GI bleeding. We would like our +estimate to represent just the effect of the diclofenac, so we need to +keep the effect of the PPIs separate. First we have to retrieve the +information on PPI exposure from the database:

 diclofenac <- 1124300
-ppis <- c(911735, 929887, 923645, 904453, 948078, 19039926)
+ppis <- c(911735, 929887, 923645, 904453, 948078, 19039926)
 
 sccsData <- getDbSccsData(connectionDetails = connectionDetails,
                           cdmDatabaseSchema = cdmDatabaseSchema,
@@ -556,22 +725,22 @@ 

outcomeIds = 1, exposureDatabaseSchema = cdmDatabaseSchema, exposureTable = "drug_era", - exposureIds = c(diclofenac, ppis), + exposureIds = c(diclofenac, ppis), cdmVersion = cdmVersion) sccsData

-
## # SccsData object
-## 
-## Exposure cohort ID(s): 1124300,911735,929887,923645,904453,948078,19039926
-## Outcome cohort ID(s): 1
-## 
-## Inherits from Andromeda:
-## # Andromeda object
-## # Physical location:  C:\Users\mschuemi.EU\AppData\Local\Temp\RtmpqGWYIv\file4bc0672a4051.sqlite
-## 
-## Tables:
-## $cases (observationPeriodId, caseId, personId, observationDays, startYear, startMonth, startDay, ageInDays, censoredDays, noninformativeEndCensor)
-## $eraRef (eraType, eraId, eraName)
-## $eras (eraType, caseId, eraId, value, startDay, endDay)
+
## # SccsData object
+## 
+## Exposure cohort ID(s): 1124300,911735,929887,923645,904453,948078,19039926
+## Outcome cohort ID(s): 1
+## 
+## Inherits from Andromeda:
+## # Andromeda object
+## # Physical location:  C:\Users\mschuemi.EU\AppData\Local\Temp\RtmpI7A2d4\filec394208b29e2.sqlite
+## 
+## Tables:
+## $cases (observationPeriodId, caseId, personId, observationDays, startYear, startMonth, startDay, ageInDays, censoredDays, noninformativeEndCensor)
+## $eraRef (eraType, eraId, eraName)
+## $eras (eraType, caseId, eraId, value, startDay, endDay)

Once retrieved, we can use the data to build and fit our model:

 studyPop <- createStudyPopulation(sccsData = sccsData,
@@ -588,7 +757,7 @@ 

sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, sccsData = sccsData, - eraCovariateSettings = list(covarDiclofenacSplit, + eraCovariateSettings = list(covarDiclofenacSplit, covarPreDiclofenacSplit, covarPpis), ageCovariateSettings = ageCovariateSettings, @@ -597,41 +766,54 @@

eventDependentObservation = TRUE) model <- fitSccsModel(sccsIntervalData)

-

Here, we added a new covariate based on the list of concept IDs for the various PPIs. In this example we set stratifyById to FALSE, meaning that we will estimate a single incidence rate ratio for all PPIs, so one estimate for the entire class of drugs. Note that duplicates will be removed: if a person is exposed to two PPIs on the same day, this will be counted only once when fitting the model. Furthermore, we have set the start day to 1 instead of 0. The reason for this is that PPIs will also be used to treat GI bleeds, and are likely to be prescribed on the same day as the event. If we would include day 0, the risk of the outcome would be attributed to the PPI used for treatment, not the other factors that caused the GI bleed such as any exposure to our drug of interest. Again, we can inspect the model:

+

Here, we added a new covariate based on the list of concept IDs for +the various PPIs. In this example we set stratifyById to +FALSE, meaning that we will estimate a single incidence rate ratio for +all PPIs, so one estimate for the entire class of drugs. Note that +duplicates will be removed: if a person is exposed to two PPIs on the +same day, this will be counted only once when fitting the model. +Furthermore, we have set the start day to 1 instead of 0. +The reason for this is that PPIs will also be used to treat GI bleeds, +and are likely to be prescribed on the same day as the event. If we +would include day 0, the risk of the outcome would be attributed to the +PPI used for treatment, not the other factors that caused the GI bleed +such as any exposure to our drug of interest. Again, we can inspect the +model:

 model
-
## SccsModel object
-## 
-## Outcome ID: 1
-## 
-## Outcome count:
-##   outcomeSubjects outcomeEvents outcomeObsPeriods
-## 1          397221       2873478            399607
-## 
-## Estimates:
-## # A tibble: 14 x 7
-##    Name                               ID Estimate LB95CI UB95CI   LogRr  SeLogRr
-##    <chr>                           <dbl>    <dbl>  <dbl>  <dbl>   <dbl>    <dbl>
-##  1 Seasonality spline component 1    200   0.950  NA     NA     -0.0513 NA      
-##  2 Seasonality spline component 2    201   1.25   NA     NA      0.221  NA      
-##  3 Seasonality spline component 3    202   0.943  NA     NA     -0.0588 NA      
-##  4 Calendar time spline component~   300   0.527  NA     NA     -0.641  NA      
-##  5 Calendar time spline component~   301   0.201  NA     NA     -1.60   NA      
-##  6 Calendar time spline component~   302   0.123  NA     NA     -2.09   NA      
-##  7 Calendar time spline component~   303   0.0653 NA     NA     -2.73   NA      
-##  8 Calendar time spline component~   304   0.0157 NA     NA     -4.16   NA      
-##  9 Exposure of interest: diclofen~  1000   0.984   0.944  1.03  -0.0163  0.0212 
-## 10 Exposure of interest: diclofen~  1001   1.51    1.45   1.57   0.412   0.0192 
-## 11 Exposure of interest: diclofen~  1002   1.48    1.45   1.50   0.389   0.00791
-## 12 Pre-exposure: diclofenac, day ~  1003   0.920   0.900  0.941 -0.0830  0.0115 
-## 13 Pre-exposure: diclofenac, day ~  1004   0.717   0.698  0.735 -0.333   0.0132 
-## 14 PPIs                             1005   0.665   0.663  0.668 -0.408   0.00191
+
## SccsModel object
+## 
+## Outcome ID: 1
+## 
+## Outcome count:
+##   outcomeSubjects outcomeEvents outcomeObsPeriods
+## 1          397221       2873478            399607
+## 
+## Estimates:
+## # A tibble: 14 x 7
+##    Name                               ID Estimate LB95CI UB95CI   LogRr  SeLogRr
+##    <chr>                           <dbl>    <dbl>  <dbl>  <dbl>   <dbl>    <dbl>
+##  1 Seasonality spline component 1    200   0.950  NA     NA     -0.0513 NA      
+##  2 Seasonality spline component 2    201   1.25   NA     NA      0.221  NA      
+##  3 Seasonality spline component 3    202   0.943  NA     NA     -0.0588 NA      
+##  4 Calendar time spline component~   300   0.527  NA     NA     -0.641  NA      
+##  5 Calendar time spline component~   301   0.201  NA     NA     -1.60   NA      
+##  6 Calendar time spline component~   302   0.123  NA     NA     -2.09   NA      
+##  7 Calendar time spline component~   303   0.0653 NA     NA     -2.73   NA      
+##  8 Calendar time spline component~   304   0.0157 NA     NA     -4.16   NA      
+##  9 Exposure of interest: diclofen~  1000   0.984   0.944  1.03  -0.0163  0.0212 
+## 10 Exposure of interest: diclofen~  1001   1.51    1.45   1.57   0.412   0.0192 
+## 11 Exposure of interest: diclofen~  1002   1.48    1.45   1.50   0.389   0.00791
+## 12 Pre-exposure: diclofenac, day ~  1003   0.920   0.900  0.941 -0.0830  0.0115 
+## 13 Pre-exposure: diclofenac, day ~  1004   0.717   0.698  0.735 -0.333   0.0132 
+## 14 PPIs                             1005   0.665   0.663  0.668 -0.408   0.00191

We do see a decrease in risk when people are exposed to PPIs.

-
-

-Adding all drugs

-

Another approach could be to add all drugs into the model. Again, the first step is to get all the relevant data from the database:

+
+

Adding all drugs +

+

Another approach could be to add all drugs into the model. Again, the +first step is to get all the relevant data from the database:

 sccsData <- getDbSccsData(connectionDetails = connectionDetails,
                           cdmDatabaseSchema = cdmDatabaseSchema,
@@ -640,9 +822,11 @@ 

outcomeIds = 1, exposureDatabaseSchema = cdmDatabaseSchema, exposureTable = "drug_era", - exposureIds = c(), + exposureIds = c(), cdmVersion = cdmVersion)

-

Note that the exposureIds argument is left empty. This will cause data for all concepts in the exposure table to be retrieved. Next, we simply create a new set of covariates, and fit the model:

+

Note that the exposureIds argument is left empty. This +will cause data for all concepts in the exposure table to be retrieved. +Next, we simply create a new set of covariates, and fit the model:

 studyPop <- createStudyPopulation(sccsData = sccsData,
                                   outcomeId = 1,
@@ -659,7 +843,7 @@ 

sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, sccsData = sccsData, - eraCovariateSettings = list(covarDiclofenacSplit, + eraCovariateSettings = list(covarDiclofenacSplit, covarPreDiclofenacSplit, covarAllDrugs), ageCovariateSettings = ageCovariateSettings, @@ -668,147 +852,203 @@

eventDependentObservation = TRUE) model <- fitSccsModel(sccsIntervalData)

-

The first thing to note is that we have defined the new covariates to be all drugs except diclofenac by not specifying the includeEraIds and setting the excludeEraIds to the concept ID of diclofenac. Furthermore, we have specified that stratifyById is TRUE, meaning an estimate will be produced for each drug.

-

We have set allowRegularization to TRUE, meaning we will use regularization for all estimates in this new covariate set. Regularization means we will impose a prior distribution on the effect size, effectually penalizing large estimates. This helps fit the model, for example when some drugs are rare, and when drugs are almost often prescribed together and their individual effects are difficult to untangle.

-

Because there are now so many estimates, we will export all estimates to a data frame using getModel():

+

The first thing to note is that we have defined the new covariates to +be all drugs except diclofenac by not specifying the +includeEraIds and setting the excludeEraIds to +the concept ID of diclofenac. Furthermore, we have specified that +stratifyById is TRUE, meaning an estimate will be produced +for each drug.

+

We have set allowRegularization to TRUE, meaning we will +use regularization for all estimates in this new covariate set. +Regularization means we will impose a prior distribution on the effect +size, effectually penalizing large estimates. This helps fit the model, +for example when some drugs are rare, and when drugs are almost often +prescribed together and their individual effects are difficult to +untangle.

+

Because there are now so many estimates, we will export all estimates +to a data frame using getModel():

   estimates <- getModel(model)
   estimates[estimates$originalEraId == diclofenac, ]
-
## # A tibble: 5 x 10
-##   name                   id estimate lb95Ci ub95Ci   logRr seLogRr originalEraId
-##   <chr>               <dbl>    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>         <dbl>
-## 1 Exposure of intere~  1000    0.971  0.923  1.02  -0.0295  0.0250       1124300
-## 2 Exposure of intere~  1001    1.50   1.43   1.56   0.403   0.0218       1124300
-## 3 Exposure of intere~  1002    1.50   1.47   1.54   0.408   0.0107       1124300
-## 4 Pre-exposure: dicl~  1003    0.925  0.900  0.951 -0.0779  0.0142       1124300
-## 5 Pre-exposure: dicl~  1004    0.720  0.699  0.741 -0.329   0.0151       1124300
-## # ... with 2 more variables: originalEraType <chr>, originalEraName <chr>
-

Here we see that despite the extensive adjustments that are made in the model, the effect estimates for diclofenac have remained nearly the same.

-

In case we’re interested, we can also look at the effect sizes for the PPIs:

+
## # A tibble: 5 x 10
+##   name                   id estimate lb95Ci ub95Ci   logRr seLogRr originalEraId
+##   <chr>               <dbl>    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>         <dbl>
+## 1 Exposure of intere~  1000    0.971  0.923  1.02  -0.0295  0.0250       1124300
+## 2 Exposure of intere~  1001    1.50   1.43   1.56   0.403   0.0218       1124300
+## 3 Exposure of intere~  1002    1.50   1.47   1.54   0.408   0.0107       1124300
+## 4 Pre-exposure: dicl~  1003    0.925  0.900  0.951 -0.0779  0.0142       1124300
+## 5 Pre-exposure: dicl~  1004    0.720  0.699  0.741 -0.329   0.0151       1124300
+## # ... with 2 more variables: originalEraType <chr>, originalEraName <chr>
+

Here we see that despite the extensive adjustments that are made in +the model, the effect estimates for diclofenac have remained nearly the +same.

+

In case we’re interested, we can also look at the effect sizes for +the PPIs:

-estimates[estimates$originalEraId %in% ppis, ]
-
## # A tibble: 6 x 10
-##   name                    id estimate lb95Ci ub95Ci  logRr seLogRr originalEraId
-##   <chr>                <dbl>    <dbl>  <dbl>  <dbl>  <dbl>   <dbl>         <dbl>
-## 1 Other exposures: la~  1266    0.693     NA     NA -0.367      NA        929887
-## 2 Other exposures: es~  1776    0.688     NA     NA -0.374      NA        904453
-## 3 Other exposures: ra~  1893    0.823     NA     NA -0.194      NA        911735
-## 4 Other exposures: pa~  2280    0.624     NA     NA -0.472      NA        948078
-## 5 Other exposures: de~  2533    0.734     NA     NA -0.309      NA      19039926
-## 6 Other exposures: om~  2734    0.670     NA     NA -0.401      NA        923645
-## # ... with 2 more variables: originalEraType <chr>, originalEraName <chr>
-

Note that because we used regularization, we are not able to compute the confidence intervals for these estimates. We do again see that PPIs all have relative risks lower than 1 as we would expect.

-
-
-
-

-Diagnostics

-

We can perform several diagnostics on the data to verify whether our assumptions underlying the SCCS are met.

-
-

-Time from exposure start to event

-

To gain a better understanding of when the event occurs relative to the start of exposure, we can plot their relationship. Note that we specify the naive period, so this can be applied to the data prior to showing the plot. This will make the plot better in line with the data we ended up fitting:

+estimates[estimates$originalEraId %in% ppis, ]
+
## # A tibble: 6 x 10
+##   name                    id estimate lb95Ci ub95Ci  logRr seLogRr originalEraId
+##   <chr>                <dbl>    <dbl>  <dbl>  <dbl>  <dbl>   <dbl>         <dbl>
+## 1 Other exposures: la~  1266    0.693     NA     NA -0.367      NA        929887
+## 2 Other exposures: es~  1776    0.688     NA     NA -0.374      NA        904453
+## 3 Other exposures: ra~  1893    0.823     NA     NA -0.194      NA        911735
+## 4 Other exposures: pa~  2280    0.624     NA     NA -0.472      NA        948078
+## 5 Other exposures: de~  2533    0.734     NA     NA -0.309      NA      19039926
+## 6 Other exposures: om~  2734    0.670     NA     NA -0.401      NA        923645
+## # ... with 2 more variables: originalEraType <chr>, originalEraName <chr>
+

Note that because we used regularization, we are not able to compute +the confidence intervals for these estimates. We do again see that PPIs +all have relative risks lower than 1 as we would expect.

+
+
+
+

Diagnostics +

+

We can perform several diagnostics on the data to verify whether our +assumptions underlying the SCCS are met.

+
+

Time from exposure start to event +

+

To gain a better understanding of when the event occurs relative to +the start of exposure, we can plot their relationship. Note that we +specify the naive period, so this can be applied to the data prior to +showing the plot. This will make the plot better in line with the data +we ended up fitting:

 plotExposureCentered(studyPop, sccsData, exposureEraId = diclofenac)
-
## Warning: Removed 52 rows containing missing values (geom_rect).
+
## Warning: Removed 52 rows containing missing values (geom_rect).

-

This plot suggests an increased rate of events in the first few weeks following the start of exposure, perhaps because of an acute effect.

+

This plot suggests an increased rate of events in the first few weeks +following the start of exposure, perhaps because of an acute effect.

-
-

-Ages covered per subject

-

We can visualize which age ranges are covered by each subject’s observation time:

+
+

Ages covered per subject +

+

We can visualize which age ranges are covered by each subject’s +observation time:

 plotAgeSpans(studyPop)
-
## Warning in plotAgeSpans(studyPop): There are 399607 cases. Random sampling 10000
-## cases.
+
## Warning in plotAgeSpans(studyPop): There are 399607 cases. Random sampling 10000
+## cases.

-

Here we see that most observation periods span only a small age range, making it unlikely that any within-person age-related effect will be large.

+

Here we see that most observation periods span only a small age +range, making it unlikely that any within-person age-related effect will +be large.

-
-

-Dependency between events and observation end

-

To understand whether censoring is dependent on the event, which would violate one of the assumptions of the SCCS, we can plot the difference in distribution between censored and uncensored events. By ‘censored’ we mean periods that end before we would normally expect. Here, we define periods to be uncensored if they end at either the study end date (if specified), database end date (i.e. the date after which no data is captured in the database), or maximum age (if specified). All other periods are assumed to be censored.

+
+

Dependency between events and observation end +

+

To understand whether censoring is dependent on the event, which +would violate one of the assumptions of the SCCS, we can plot the +difference in distribution between censored and uncensored events. By +‘censored’ we mean periods that end before we would normally expect. +Here, we define periods to be uncensored if they end at either the study +end date (if specified), database end date (i.e. the date after which no +data is captured in the database), or maximum age (if specified). All +other periods are assumed to be censored.

-

Here we see that overall the two distributions are somewhat similar, with little evidence that censoring tends to lead to shorter times to the end of observation.

+

Here we see that overall the two distributions are somewhat similar, +with little evidence that censoring tends to lead to shorter times to +the end of observation.

-
-

-Stability of the outcome over calendar time

-

If the rate of the outcome changes as a function of calendar time, this could introduce bias. For example, if the outcome is more prevalent during winter, and the exposure also tends to occur in winter, this will create an association between the two that likely doesn’t imply causation. We can check for patterns over time:

+
+

Stability of the outcome over calendar time +

+

If the rate of the outcome changes as a function of calendar time, +this could introduce bias. For example, if the outcome is more prevalent +during winter, and the exposure also tends to occur in winter, this will +create an association between the two that likely doesn’t imply +causation. We can check for patterns over time:

-

In the top of the plot, we see the rate of the outcome (within those persons that are observed) does change over time.

-

Earlier, we’ve seen we can adjust for these types of patterns by including splines, for example for age, season, and calendar time. We can use the same plot to evaluate if these adjustments have been sufficient. For example, using the model we fitted earlier adjusting for age, season, and calendar time:

+

In the top of the plot, we see the rate of the outcome (within those +persons that are observed) does change over time.

+

Earlier, we’ve seen we can adjust for these types of patterns by +including splines, for example for age, season, and calendar time. We +can use the same plot to evaluate if these adjustments have been +sufficient. For example, using the model we fitted earlier adjusting for +age, season, and calendar time:

 plotEventToCalendarTime(studyPop, model)

-

Here, the top plot shows the rate of the outcome, after adjusting for the age, season, and calendar time effect. We see that the adjustment wasn’t completely effective, and in fact may have over-adjusted. A more formal way to evaluate this is by using:

+

Here, the top plot shows the rate of the outcome, after adjusting for +the age, season, and calendar time effect. We see that the adjustment +wasn’t completely effective, and in fact may have over-adjusted. A more +formal way to evaluate this is by using:

 diagnostic <- computeTimeStability(studyPop, model)
-head(diagnostic[, c("monthStartDate", "monthEndDate", "p", "alpha", "stable")])
-
## # A tibble: 6 x 5
-##   monthStartDate monthEndDate     p    alpha stable
-##   <date>         <date>       <dbl>    <dbl> <lgl> 
-## 1 2000-06-01     2000-06-30       0 0.000197 FALSE 
-## 2 2000-07-01     2000-07-31       0 0.000197 FALSE 
-## 3 2000-08-01     2000-08-31       0 0.000197 FALSE 
-## 4 2000-09-01     2000-09-30       0 0.000197 FALSE 
-## 5 2000-10-01     2000-10-31       0 0.000197 FALSE 
-## 6 2000-11-01     2000-11-30       0 0.000197 FALSE
-

This shows that, even after adjustment using the splines, there are months where the rate differs significantly from the mean rate, indicating temporal instability. We may want to change the knots in the splines, or possibly restrict our analysis to calendar time that is more stable.

-
-
-
-

-Acknowledgments

-

Considerable work has been dedicated to provide the SelfControlledCaseSeries package.

+head(diagnostic[, c("monthStartDate", "monthEndDate", "p", "alpha", "stable")])
+
## # A tibble: 6 x 5
+##   monthStartDate monthEndDate     p    alpha stable
+##   <date>         <date>       <dbl>    <dbl> <lgl> 
+## 1 2000-06-01     2000-06-30       0 0.000197 FALSE 
+## 2 2000-07-01     2000-07-31       0 0.000197 FALSE 
+## 3 2000-08-01     2000-08-31       0 0.000197 FALSE 
+## 4 2000-09-01     2000-09-30       0 0.000197 FALSE 
+## 5 2000-10-01     2000-10-31       0 0.000197 FALSE 
+## 6 2000-11-01     2000-11-30       0 0.000197 FALSE
+

This shows that, even after adjustment using the splines, there are +months where the rate differs significantly from the mean rate, +indicating temporal instability. We may want to change the knots in the +splines, or possibly restrict our analysis to calendar time that is more +stable.

+
+
+
+

Acknowledgments +

+

Considerable work has been dedicated to provide the +SelfControlledCaseSeries package.

-citation("SelfControlledCaseSeries")
-
## 
-## To cite package 'SelfControlledCaseSeries' in publications use:
-## 
-##   Martijn Schuemie, Patrick Ryan, Trevor Shaddox and Marc Suchard
-##   (2022). SelfControlledCaseSeries: Self-Controlled Case Series. R
-##   package version 3.2.1.
-##   https://github.com/OHDSI/SelfControlledCaseSeries
-## 
-## A BibTeX entry for LaTeX users is
-## 
-##   @Manual{,
-##     title = {SelfControlledCaseSeries: Self-Controlled Case Series},
-##     author = {Martijn Schuemie and Patrick Ryan and Trevor Shaddox and Marc Suchard},
-##     year = {2022},
-##     note = {R package version 3.2.1},
-##     url = {https://github.com/OHDSI/SelfControlledCaseSeries},
-##   }
-

Furthermore, SelfControlledCaseSeries makes extensive use of the Cyclops package.

+citation("SelfControlledCaseSeries")
+
## 
+## To cite package 'SelfControlledCaseSeries' in publications use:
+## 
+##   Schuemie M, Ryan P, Shaddox T, Suchard M (2022).
+##   _SelfControlledCaseSeries: Self-Controlled Case Series_. R package
+##   version 3.3.0, <https://github.com/OHDSI/SelfControlledCaseSeries>.
+## 
+## A BibTeX entry for LaTeX users is
+## 
+##   @Manual{,
+##     title = {SelfControlledCaseSeries: Self-Controlled Case Series},
+##     author = {Martijn Schuemie and Patrick Ryan and Trevor Shaddox and Marc Suchard},
+##     year = {2022},
+##     note = {R package version 3.3.0},
+##     url = {https://github.com/OHDSI/SelfControlledCaseSeries},
+##   }
+

Furthermore, SelfControlledCaseSeries makes extensive +use of the Cyclops package.

-citation("Cyclops")
-
## 
-## To cite Cyclops in publications use:
-## 
-## Suchard MA, Simpson SE, Zorych I, Ryan P, Madigan D (2013). "Massive
-## parallelization of serial inference algorithms for complex generalized
-## linear models." _ACM Transactions on Modeling and Computer Simulation_,
-## *23*, 10. <URL: https://dl.acm.org/doi/10.1145/2414416.2414791>.
-## 
-## A BibTeX entry for LaTeX users is
-## 
-##   @Article{,
-##     author = {M. A. Suchard and S. E. Simpson and I. Zorych and P. Ryan and D. Madigan},
-##     title = {Massive parallelization of serial inference algorithms for complex generalized linear models},
-##     journal = {ACM Transactions on Modeling and Computer Simulation},
-##     volume = {23},
-##     pages = {10},
-##     year = {2013},
-##     url = {https://dl.acm.org/doi/10.1145/2414416.2414791},
-##   }
-

Part of the code (related to event-dependent observation periods) is based on the SCCS package by Yonas Ghebremichael-Weldeselassie, Heather Whitaker, and Paddy Farrington.

-

This work is supported in part through the National Science Foundation grant IIS 1251151.

+citation("Cyclops")
+
## 
+## To cite Cyclops in publications use:
+## 
+##   Suchard MA, Simpson SE, Zorych I, Ryan P, Madigan D (2013). "Massive
+##   parallelization of serial inference algorithms for complex
+##   generalized linear models." _ACM Transactions on Modeling and
+##   Computer Simulation_, *23*, 10.
+##   <https://dl.acm.org/doi/10.1145/2414416.2414791>.
+## 
+## A BibTeX entry for LaTeX users is
+## 
+##   @Article{,
+##     author = {M. A. Suchard and S. E. Simpson and I. Zorych and P. Ryan and D. Madigan},
+##     title = {Massive parallelization of serial inference algorithms for complex generalized linear models},
+##     journal = {ACM Transactions on Modeling and Computer Simulation},
+##     volume = {23},
+##     pages = {10},
+##     year = {2013},
+##     url = {https://dl.acm.org/doi/10.1145/2414416.2414791},
+##   }
+

Part of the code (related to event-dependent observation periods) is +based on the SCCS package by Yonas Ghebremichael-Weldeselassie, Heather +Whitaker, and Paddy Farrington.

+

This work is supported in part through the National Science +Foundation grant IIS 1251151.

@@ -823,11 +1063,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.3.

@@ -836,5 +1078,7 @@

+ + diff --git a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-56-1.png b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-56-1.png index 713f35a..ff9d8d6 100644 Binary files a/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-56-1.png and b/docs/articles/SingleStudies_files/figure-html/unnamed-chunk-56-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 516bc20..c4da514 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,66 +1,12 @@ - - - - - - - -Articles • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • SelfControlledCaseSeries - - + + - - -
-
- -
- - -
- +
- - + + diff --git a/docs/authors.html b/docs/authors.html index f83188e..ffc55ab 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,66 +1,12 @@ - - - - - - - -Authors • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • SelfControlledCaseSeries - - - - + + - -
-
-
- -
+
- @@ -156,22 +107,20 @@

Authors

-
- +
- - + + diff --git a/docs/index.html b/docs/index.html index 81b23b8..fb190ad 100644 --- a/docs/index.html +++ b/docs/index.html @@ -26,6 +26,8 @@ + +
-
- +
+ -

SelfControlledCaseSeries is part of HADES.

+

SelfControlledCaseSeries is part of HADES.

-
-

-Introduction

+
+

Introduction +

SelfControlledCaseSeries is an R package for performing Self-Controlled Case Series (SCCS) analyses in an observational database in the OMOP Common Data Model.

-
-

-Features

+
+

Features +

  • Extracts the necessary data from a database in OMOP Common Data Model format.
  • Optionally add seasonality using a spline function.
  • @@ -120,9 +116,9 @@

  • Also provides the self-controlled risk interval design as a special case of the SCCS.
-
-

-Example

+
+

Example +

 sccsData <- getDbSccsData(connectionDetails = connectionDetails,
                           cdmDatabaseSchema = cdmDatabaseSchema,
@@ -159,73 +155,65 @@ 

# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 Exposure of interest: Diclofenac 1000 1.18 1.13 1.24 0.167 0.0230

-
-

-Technology

+
+

Technology +

SelfControlledCaseSeries is an R package, with some functions implemented in C++.

-
-

-System Requirements

-

Requires R (version 3.2.2 or higher). Installation on Windows requires RTools. Libraries used in SelfControlledCaseSeries require Java.

+
+

System Requirements +

+

Requires R (version 3.2.2 or higher). Installation on Windows requires RTools. Libraries used in SelfControlledCaseSeries require Java.

-
-

-Installation

-
    -
  1. See the instructions here for configuring your R environment, including Java.

  2. +
    +

    Installation +

    +
      +
    1. See the instructions here for configuring your R environment, including Java.

    2. In R, use the following commands to download and install MethodEvaluation:

    -install.packages("remotes")
    -remotes::install_github("ohdsi/SelfControlledCaseSeries")
    +install.packages("remotes") +remotes::install_github("ohdsi/SelfControlledCaseSeries")
-
-

-User Documentation

-

Documentation can be found on the package website.

-

PDF versions of the documentation are also available:

- +
+

User Documentation +

+

Documentation can be found on the package website.

+

PDF versions of the documentation are also available: * Vignette: Single studies using the SelfControlledCaseSeries package * Vignette: Running multiple analyses at once using the SelfControlledCaseSeries package * Package manual: SelfControlledCaseSeries.pdf

-
-

-Support

+
+

Support +

-
-

-Contributing

-

Read here how you can contribute to this package.

+
+

Contributing +

+

Read here how you can contribute to this package.

-
-

-License

+
+

License +

SelfControlledCaseSeries is licensed under Apache License 2.0

-
-

-Development

+
+

Development +

SelfControlledCaseSeries is being developed in R Studio.

-
-

-Development status

+
+

Development status +

Beta

-
-

-Acknowledgements

+
+

Acknowledgements +

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.3.

@@ -287,5 +285,7 @@

Dev status

+ + diff --git a/docs/news/index.html b/docs/news/index.html index 135eb3e..051bea1 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,66 +1,12 @@ - - - - - - - -Changelog • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Changelog • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
-
-

-SelfControlledCaseSeries 3.2.1

+
+ +

Changes

+
  1. Setting the default Cyclops control object to use resetCoefficients = TRUE to ensure we always get the exact same model, irrespective of the number of threads used.

  2. +
  3. Added maxRatio argument to computeTimeStability().

  4. +

Bug fixes

+
  1. Fixing missing months in plotEventToCalendarTime() when there are no observation period starts and ends in those months.

  2. +
  3. Now limiting the number of unexposed cases when including only a calendar time spline (as set by minCasesForTimeCovariates).

  4. +
  5. Fix error when calling computeMdrr() on an sccsIntervalData object that does not contain the exposure of interest.

  6. +
  7. Fixed typo in computeMdrr(), renaming propPopExposued to propPopulationExposed.

  8. +
+
+

BugFixes

-
    -
  1. Remove dependency on develop branch of SqlRender.
  2. -
-
-
-

-SelfControlledCaseSeries 3.2.0

+
  1. Remove dependency on develop branch of SqlRender.
  2. +
+
+

Changes

-
    -
  1. Adding optional calendar time covariate.

  2. +
    1. Adding optional calendar time covariate.

    2. Added the analysesToExclude argument to runSccsAnalyses(), allowing the users to specify exposure-outcome-analysis combinations to exclude from execution.

    3. Fixing seed for regularization cross-validation to improve reproducibility.

    4. Added the computeTimeStability() function.

    5. -
    -
-
-

-SelfControlledCaseSeries 3.1.0

+
+
+

Changes

-
    -
  1. Adding likelihood profile to SCCS model objects if profileLikelihood argument is set to TRUE when calling createEraCovariateSettings().

  2. +
    1. Adding likelihood profile to SCCS model objects if profileLikelihood argument is set to TRUE when calling createEraCovariateSettings().

    2. Deprecating oracleTempSchema argument in favor of tempEmulationSchema in accordance with new SqlRender convention.

    3. Adding optional title argument to all plotting functions.

    4. Adding highlightExposedEvents argument to plotExposureCentered function.

    5. Switching power calculation default method to signed root likelihood ratio as recommended by Musonda et al. (2005).

    6. -
    -

    BugFixes

    -
      -
    1. Prevent error when excluding variable not in data from regularization.

    2. +

    BugFixes

    +
    1. Prevent error when excluding variable not in data from regularization.

    2. Removing unexposed subjects when computing power to avoid overestimating statistical power.

    3. -
    -
-
-

-SelfControlledCaseSeries 3.0.0

+
+
+

Changes

-
    -
  1. Adding the self-controlled risk interval design.

  2. +
    1. Adding the self-controlled risk interval design.

    2. Downloading person and observation period IDs as strings to avoid issues with 64-bit integers. (These IDs are not used by SCCS, and are used for reference only).

    3. Outputting log likelihood ratio as part of estimates.

    4. Computing meta-data on covariates.

    5. -
    -

    BugFixes

    -
      -
    1. Fixed syntax error in SQL when using a nesting cohort.

    2. +

    BugFixes

    +
    1. Fixed syntax error in SQL when using a nesting cohort.

    2. Fixing error when sampled cohort is empty.

    3. Fixing nesting.

    4. Attrition table now also includes lines where remaining count is zero.

    5. Fixing custom covariates download.

    6. Fixing error on Oracle due to long temp table name.

    7. Fixing computation of confidence intervals (CIs) when not all estimates for which CIs are computed have data.

    8. -
    -
-
-

-SelfControlledCaseSeries 2.0.0

+
+
+

Changes

-
    -
  1. Switching from ff to Andromeda for storing large data objects.

  2. +
    1. Switching from ff to Andromeda for storing large data objects.

    2. Making creation of the study population a separate step, with the new createStudyPopulation function.

    3. The data on cohorts, exposure, and outcome eras retrieved from the database is now consistently referred to as ‘eras’. Data transformed to non-overlapping intervals is now referred to as ‘sccsIntervalData’.

    4. Adding tracking of attrition.

    5. Automatically removing age spline if selected censoring model already adjusts for age.

    6. -
    -

    BugFixes

    -
      -
    1. Generating sequential case IDs instead of observation period IDs to avoid collisions due to loss of precision when converting BIGINT to R’s numeric.

    2. +

    BugFixes

    +
    1. Generating sequential case IDs instead of observation period IDs to avoid collisions due to loss of precision when converting BIGINT to R’s numeric.

    2. Added more heuristics to detect ill-behaving censoring functions when adjusting for event-dependent censoring.

    3. -
    -
-
-

-SelfControlledCaseSeries 1.4.2

+
+
+

Bugfixes

-
    -
  1. Fixed errors introduced by R 4.0.0.

  2. +
    1. Fixed errors introduced by R 4.0.0.

    2. Gracefully handling when fitting the outcome model hits the max number of iterations.

    3. -
    -
-
-

-SelfControlledCaseSeries 1.4.1

+
+
+

Bugfixes

-
    -
  1. Several workaround for issues with the ff package.

  2. +
    1. Several workaround for issues with the ff package.

    2. Fixed bug causing age to be read incorrectly when creating eras.

    3. -
    -
+
+
-
- +
- - + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index 1273238..80ea5b8 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -264,31 +263,26 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, code, pre code { background-color: #f8f8f8; color: #333; } +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -305,9 +299,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ @@ -365,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048f..6f0eee4 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 2301d03..fb05f1b 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,8 +1,8 @@ -pandoc: 2.16.2 -pkgdown: 1.6.1 +pandoc: 2.17.1.1 +pkgdown: 2.0.3 pkgdown_sha: ~ articles: MultipleAnalyses: MultipleAnalyses.html SingleStudies: SingleStudies.html -last_built: 2022-01-10T08:45Z +last_built: 2022-05-25T06:52Z diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html index 0ea2756..c11d2fa 100644 --- a/docs/pull_request_template.html +++ b/docs/pull_request_template.html @@ -1,66 +1,12 @@ - - - - - - - -NA • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NA • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
+
+
-
- +
- - + + diff --git a/docs/reference/SccsData-class.html b/docs/reference/SccsData-class.html index a896515..aabc8a5 100644 --- a/docs/reference/SccsData-class.html +++ b/docs/reference/SccsData-class.html @@ -1,69 +1,14 @@ - - - - - - - -SCCS Data — SccsData-class • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -SCCS Data — SccsData-class • SelfControlledCaseSeries - - - - - - - - - + + - - - - -
-
- -
- -
+
-

SccsData is an S4 class that inherits from Andromeda. It contains information on the cases and their covariates.

-

A SccsData is typically created using getDbSccsData(), can only be saved using -saveSccsData(), and loaded using loadSccsData().

+

SccsData is an S4 class that inherits from Andromeda. It contains information on the cases and their covariates.

+

A SccsData is typically created using getDbSccsData(), can only be saved using +saveSccsData(), and loaded using loadSccsData().

-
# S4 method for SccsData
-show(object)
+    
+
# S4 method for SccsData
+show(object)
 
-# S4 method for SccsData
-summary(object)
- -

Arguments

- - - - - - -
object

An object of type SccsData.

+# S4 method for SccsData +summary(object)
+
+
+

Arguments

+
object
+

An object of type SccsData.

+
+
-
- +
- - + + diff --git a/docs/reference/SccsIntervalData-class.html b/docs/reference/SccsIntervalData-class.html index 55b9b57..564c51b 100644 --- a/docs/reference/SccsIntervalData-class.html +++ b/docs/reference/SccsIntervalData-class.html @@ -1,70 +1,15 @@ - - - - - - - -SCCS Interval Data — SccsIntervalData-class • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -SCCS Interval Data — SccsIntervalData-class • SelfControlledCaseSeries - - - - - - - - - + + - - - - -
-
- -
- -
+
-

SccsIntervalData` is an S4 class that inherits from Andromeda. It contains +

SccsIntervalData` is an S4 class that inherits from Andromeda. It contains information on the cases and their covariates, divided in non-overlapping time intervals.

-

A SccsIntervalData is typically created using createSccsIntervalData(), can only be saved using -saveSccsIntervalData(), and loaded using loadSccsIntervalData().

+

A SccsIntervalData is typically created using createSccsIntervalData(), can only be saved using +saveSccsIntervalData(), and loaded using loadSccsIntervalData().

-
# S4 method for SccsIntervalData
-show(object)
+    
+
# S4 method for SccsIntervalData
+show(object)
 
-# S4 method for SccsIntervalData
-summary(object)
- -

Arguments

- - - - - - -
object

An object of type SccsIntervalData.

+# S4 method for SccsIntervalData +summary(object)
+
+
+

Arguments

+
object
+

An object of type SccsIntervalData.

+
+
-
- +
- - + + diff --git a/docs/reference/SelfControlledCaseSeries-package.html b/docs/reference/SelfControlledCaseSeries-package.html index 698cce7..4dca16e 100644 --- a/docs/reference/SelfControlledCaseSeries-package.html +++ b/docs/reference/SelfControlledCaseSeries-package.html @@ -1,67 +1,12 @@ - - - - - - - -SelfControlledCaseSeries: Self-Controlled Case Series — SelfControlledCaseSeries-package • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -SelfControlledCaseSeries: Self-Controlled Case Series — SelfControlledCaseSeries-package • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -139,51 +69,41 @@

SelfControlledCaseSeries: Self-Controlled Case Series

- -

See also

- - -

Author

- -

Maintainer: Martijn Schuemie schuemie@ohdsi.org

-

Authors:

+
-
- +
- - + + diff --git a/docs/reference/computeMdrr.html b/docs/reference/computeMdrr.html index c311b2c..e83a99d 100644 --- a/docs/reference/computeMdrr.html +++ b/docs/reference/computeMdrr.html @@ -1,67 +1,12 @@ - - - - - - - -Compute the minimum detectable relative risk — computeMdrr • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Compute the minimum detectable relative risk — computeMdrr • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,86 +68,73 @@

Compute the minimum detectable relative risk

Compute the minimum detectable relative risk

-
computeMdrr(
-  sccsIntervalData,
-  exposureCovariateId,
-  alpha = 0.05,
-  power = 0.8,
-  twoSided = TRUE,
-  method = "SRL1"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
sccsIntervalData

An object of type SccsIntervalData as created using the -createSccsIntervalData function.

exposureCovariateId

Covariate Id for the health exposure of interest.

alpha

Type I error.

power

1 - beta, where beta is the type II error.

twoSided

Consider a two-sided test?

method

The type of sample size formula that will be used. Allowable values are -"proportion", "binomial", "SRL1", "SRL2", or "ageEffects". Currently "ageEffects" -is not supported.

- -

Value

+
+
computeMdrr(
+  sccsIntervalData,
+  exposureCovariateId,
+  alpha = 0.05,
+  power = 0.8,
+  twoSided = TRUE,
+  method = "SRL1"
+)
+
+
+

Arguments

+
sccsIntervalData
+

An object of type SccsIntervalData as created using the +createSccsIntervalData function.

+
exposureCovariateId
+

Covariate Id for the health exposure of interest.

+
alpha
+

Type I error.

+
power
+

1 - beta, where beta is the type II error.

+
twoSided
+

Consider a two-sided test?

+
method
+

The type of sample size formula that will be used. Allowable values are +"proportion", "binomial", "SRL1", "SRL2", or "ageEffects". Currently "ageEffects" +is not supported.

+
+
+

Value

A data frame with the MDRR, number of events, time at risk, and total time.

-

Details

- +
+
+

Details

Compute the minimum detectable relative risk (MDRR) for a given study population, using the observed time at risk and total time in days and number of events. Five sample size formulas are implemented: sampling proportion, binomial proportion, 2 signed root likelihood ratio methods, and likelihood extension for age effects. The expressions by Musonda (2006) are used.

-

References

- +
+
+

References

Musonda P, Farrington CP, Whitaker HJ (2006) Samples sizes for self-controlled case series studies, Statistics in Medicine, 15;25(15):2618-31

+
+
-
- +
- - + + diff --git a/docs/reference/computeTimeStability.html b/docs/reference/computeTimeStability.html index a08ccb6..8ffbe21 100644 --- a/docs/reference/computeTimeStability.html +++ b/docs/reference/computeTimeStability.html @@ -1,67 +1,12 @@ - - - - - - - -Compute stability of outcome rate over time — computeTimeStability • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Compute stability of outcome rate over time — computeTimeStability • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,64 +68,65 @@

Compute stability of outcome rate over time

Compute stability of outcome rate over time

-
computeTimeStability(studyPopulation, sccsModel = NULL, alpha = 0.05)
- -

Arguments

- - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

sccsModel

Optional: A fitted SCCS model as created using fitSccsModel(). If the -model contains splines for seasonality and or calendar time these will be adjusted -for before computing stability.

alpha

The alpha (type 1 error) used to test for stability. A Bonferroni correction will -be applied for the number of months tested.

- -

Value

+
+
computeTimeStability(
+  studyPopulation,
+  sccsModel = NULL,
+  maxRatio = 1.1,
+  alpha = 0.05
+)
+
+
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
sccsModel
+

Optional: A fitted SCCS model as created using fitSccsModel(). If the +model contains splines for seasonality and or calendar time these will be adjusted +for before computing stability.

+
maxRatio
+

The maximum ratio between the (adjusted) rate in a month, and the mean (adjusted) rate that +we would consider to be irrelevant.

+
alpha
+

The alpha (type 1 error) used to test for stability. A Bonferroni correction will +be applied for the number of months tested.

+
+
+

Value

A tibble with information on the temporal stability per month. The column stable indicates whether the rate of the outcome is within the expected range for that month, assuming the rate is constant over time.

-

Details

- +
+
+

Details

Computes for each calendar month the rate of the outcome, and evaluates whether that rate is constant over time. If splines are used to adjust for seasonality and/or calendar time, these adjustments are taken into consideration. For each -month a two-sided p-value is computed against the null hypothesis that the rate in that month equals the mean rate. This -p-value is compared to an alpha value, using a Bonferroni correction to adjust for the multiple testing across months.

+month a two-sided p-value is computed against the null hypothesis that the rate in that month deviates from the mean rate +no more than maxRatio. This p-value is compared to an alpha value, using a Bonferroni correction to adjust for the +multiple testing across months.

+
+
-
- +
- - + + diff --git a/docs/reference/createAgeCovariateSettings.html b/docs/reference/createAgeCovariateSettings.html index 0c40bf2..c7125a8 100644 --- a/docs/reference/createAgeCovariateSettings.html +++ b/docs/reference/createAgeCovariateSettings.html @@ -1,67 +1,12 @@ - - - - - - - -Create age covariate settings — createAgeCovariateSettings • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create age covariate settings — createAgeCovariateSettings • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,72 +68,64 @@

Create age covariate settings

Create age covariate settings

-
createAgeCovariateSettings(
-  ageKnots = 5,
-  allowRegularization = FALSE,
-  computeConfidenceIntervals = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - -
ageKnots

If a single number is provided this is assumed to indicate the +

+
createAgeCovariateSettings(
+  ageKnots = 5,
+  allowRegularization = FALSE,
+  computeConfidenceIntervals = FALSE
+)
+
+ +
+

Arguments

+
ageKnots
+

If a single number is provided this is assumed to indicate the number of knots to use for the spline, and the knots are automatically spaced according to equal percentiles of the data. If more than one number is provided these are assumed to be the -exact location of the knots in age-days

allowRegularization

When fitting the model, should the covariates defined here be -allowed to be regularized?

computeConfidenceIntervals

Should confidence intervals be computed for the covariates +exact location of the knots in age-days

+
allowRegularization
+

When fitting the model, should the covariates defined here be +allowed to be regularized?

+
computeConfidenceIntervals
+

Should confidence intervals be computed for the covariates defined here? Setting this to FALSE might save computing time when fitting the model. Will be turned to FALSE automatically -when allowRegularization = TRUE.

- -

Value

- +when allowRegularization = TRUE.

+
+
+

Value

An object of type AgeCovariateSettings.

-

Details

- +
+
+

Details

Create an object specifying whether and how age should be included in the model. Age can be included by splitting patient time into calendar months. During a month, the relative risk attributed to age is assumed to be constant, and the risk from month to month is modeled using a cubic spline.

+
+
-
- +
- - + + diff --git a/docs/reference/createCalendarTimeCovariateSettings.html b/docs/reference/createCalendarTimeCovariateSettings.html index ee844b8..ec31014 100644 --- a/docs/reference/createCalendarTimeCovariateSettings.html +++ b/docs/reference/createCalendarTimeCovariateSettings.html @@ -1,67 +1,12 @@ - - - - - - - -Create calendar time settings — createCalendarTimeCovariateSettings • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create calendar time settings — createCalendarTimeCovariateSettings • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,74 +68,66 @@

Create calendar time settings

Create calendar time settings

-
createCalendarTimeCovariateSettings(
-  calendarTimeKnots = 5,
-  allowRegularization = FALSE,
-  computeConfidenceIntervals = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - -
calendarTimeKnots

If a single number is provided this is assumed to indicate the +

+
createCalendarTimeCovariateSettings(
+  calendarTimeKnots = 5,
+  allowRegularization = FALSE,
+  computeConfidenceIntervals = FALSE
+)
+
+ +
+

Arguments

+
calendarTimeKnots
+

If a single number is provided this is assumed to indicate the number of knots to use for the spline, and the knots are automatically spaced according to equal percentiles of the data. If a series of dates is provided these are assumed to be the exact location of -the knots.

allowRegularization

When fitting the model, should the covariates defined here be -allowed to be regularized?

computeConfidenceIntervals

Should confidence intervals be computed for the covariates +the knots.

+
allowRegularization
+

When fitting the model, should the covariates defined here be +allowed to be regularized?

+
computeConfidenceIntervals
+

Should confidence intervals be computed for the covariates defined here? Setting this to FALSE might save computing time when fitting the model. Will be turned to FALSE automatically -when allowRegularization = TRUE.

- -

Value

- +when allowRegularization = TRUE.

+
+
+

Value

An object of type seasonalitySettings.

-

Details

- +
+
+

Details

Create an object specifying whether and how calendar time should be included in the model. Calendar time can be included by splitting patient time into calendar months. During a month, the relative risk attributed to calendar time is assumed to be constant, and the risk from month to month is modeled using a cubic spline.

Whereas the seasonality covariate uses a cyclic spline, repeating every year, this calendar time covariate can model trends over years.

+
+
-
- +
- - + + diff --git a/docs/reference/createControlIntervalSettings.html b/docs/reference/createControlIntervalSettings.html index 5706a2e..6555014 100644 --- a/docs/reference/createControlIntervalSettings.html +++ b/docs/reference/createControlIntervalSettings.html @@ -1,67 +1,12 @@ - - - - - - - -Create control interval settings — createControlIntervalSettings • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create control interval settings — createControlIntervalSettings • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,87 +68,71 @@

Create control interval settings

Create control interval settings

-
createControlIntervalSettings(
-  includeEraIds = NULL,
-  excludeEraIds = NULL,
-  start = 0,
-  startAnchor = "era start",
-  end = 0,
-  endAnchor = "era end",
-  firstOccurrenceOnly = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
includeEraIds

One or more IDs of variables in the SccsData object that should be -used to construct this covariate. If no IDs are specified, all -variables will be used.

excludeEraIds

One or more IDs of variables in the [SccsData] object that should not -be used to construct this covariate.

start

The start of the control interval (in days) relative to the startAnchor.

startAnchor

The anchor point for the start of the control interval. Can be "era start" -or "era end".

end

The end of the control interval (in days) relative to the endAnchor.

endAnchor

The anchor point for the end of the control interval. Can be "era start" -or "era end".

firstOccurrenceOnly

Should only the first occurrence of the exposure be used?

- -

Value

+
+
createControlIntervalSettings(
+  includeEraIds = NULL,
+  excludeEraIds = NULL,
+  start = 0,
+  startAnchor = "era start",
+  end = 0,
+  endAnchor = "era end",
+  firstOccurrenceOnly = FALSE
+)
+
+
+

Arguments

+
includeEraIds
+

One or more IDs of variables in the SccsData object that should be +used to construct this covariate. If no IDs are specified, all +variables will be used.

+
excludeEraIds
+

One or more IDs of variables in the [SccsData] object that should not +be used to construct this covariate.

+
start
+

The start of the control interval (in days) relative to the startAnchor.

+
startAnchor
+

The anchor point for the start of the control interval. Can be "era start" +or "era end".

+
end
+

The end of the control interval (in days) relative to the endAnchor.

+
endAnchor
+

The anchor point for the end of the control interval. Can be "era start" +or "era end".

+
firstOccurrenceOnly
+

Should only the first occurrence of the exposure be used?

+
+
+

Value

An object of type ControlSettings.

-

Details

- +
+
+

Details

Create an object specifying how to create a control interval for the self-controlled risk interval (SCRI) design.

+
+
-
- +
- - + + diff --git a/docs/reference/createCreateSccsIntervalDataArgs.html b/docs/reference/createCreateSccsIntervalDataArgs.html index 6c6b50d..393a235 100644 --- a/docs/reference/createCreateSccsIntervalDataArgs.html +++ b/docs/reference/createCreateSccsIntervalDataArgs.html @@ -1,67 +1,12 @@ - - - - - - - -Create a parameter object for the function createSccsIntervalData — createCreateSccsIntervalDataArgs • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a parameter object for the function createSccsIntervalData — createCreateSccsIntervalDataArgs • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,78 +68,61 @@

Create a parameter object for the function createSccsIntervalData

Create a parameter object for the function createSccsIntervalData

-
createCreateSccsIntervalDataArgs(
-  eraCovariateSettings,
-  ageCovariateSettings = NULL,
-  seasonalityCovariateSettings = NULL,
-  calendarTimeCovariateSettings = NULL,
-  minCasesForAgeSeason = NULL,
-  minCasesForTimeCovariates = 10000,
-  eventDependentObservation = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
eraCovariateSettings

Either an object of type EraCovariateSettings as created using the createEraCovariateSettings() function, or a list of such objects.

ageCovariateSettings

An object of type ageCovariateSettings as created using the createAgeCovariateSettings() function.

seasonalityCovariateSettings

An object of type seasonalityCovariateSettings as created using the createSeasonalityCovariateSettings() function.

calendarTimeCovariateSettings

An object of type calendarTimeCovariateSettings as created using the createCalendarTimeCovariateSettings() function.

minCasesForAgeSeason

DEPRECATED: Use minCasesForTimeCovariates instead.

minCasesForTimeCovariates

Minimum number of cases to use to fit age, season and calendar time splines. If needed (and available), cases that are not exposed will be included.

eventDependentObservation

Should the extension proposed by Farrington et al. be used to adjust for event-dependent observation time?

- -

Details

+
+
createCreateSccsIntervalDataArgs(
+  eraCovariateSettings,
+  ageCovariateSettings = NULL,
+  seasonalityCovariateSettings = NULL,
+  calendarTimeCovariateSettings = NULL,
+  minCasesForAgeSeason = NULL,
+  minCasesForTimeCovariates = 10000,
+  eventDependentObservation = FALSE
+)
+
+
+

Arguments

+
eraCovariateSettings
+

Either an object of type EraCovariateSettings as created using the createEraCovariateSettings() function, or a list of such objects.

+
ageCovariateSettings
+

An object of type ageCovariateSettings as created using the createAgeCovariateSettings() function.

+
seasonalityCovariateSettings
+

An object of type seasonalityCovariateSettings as created using the createSeasonalityCovariateSettings() function.

+
calendarTimeCovariateSettings
+

An object of type calendarTimeCovariateSettings as created using the createCalendarTimeCovariateSettings() function.

+
minCasesForAgeSeason
+

DEPRECATED: Use minCasesForTimeCovariates instead.

+
minCasesForTimeCovariates
+

Minimum number of cases to use to fit age, season and calendar time splines. If needed (and available), cases that are not exposed will be included.

+
eventDependentObservation
+

Should the extension proposed by Farrington et al. be used to adjust for event-dependent observation time?

+
+
+

Details

Create an object defining the parameter values.

+
+
-
- +
- - + + diff --git a/docs/reference/createCreateScriIntervalDataArgs.html b/docs/reference/createCreateScriIntervalDataArgs.html index 8e7e0a1..905702c 100644 --- a/docs/reference/createCreateScriIntervalDataArgs.html +++ b/docs/reference/createCreateScriIntervalDataArgs.html @@ -1,67 +1,12 @@ - - - - - - - -Create a parameter object for the function createScriIntervalData — createCreateScriIntervalDataArgs • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a parameter object for the function createScriIntervalData — createCreateScriIntervalDataArgs • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,50 +68,43 @@

Create a parameter object for the function createScriIntervalData

Create a parameter object for the function createScriIntervalData

-
createCreateScriIntervalDataArgs(eraCovariateSettings, controlIntervalSettings)
- -

Arguments

- - - - - - - - - - -
eraCovariateSettings

Either an object of type EraCovariateSettings as created using the createEraCovariateSettings() function, or a list of such objects.

controlIntervalSettings

An object of type ControlIntervalSettings as created using the createControlIntervalSettings() function.

- -

Details

+
+
createCreateScriIntervalDataArgs(eraCovariateSettings, controlIntervalSettings)
+
+
+

Arguments

+
eraCovariateSettings
+

Either an object of type EraCovariateSettings as created using the createEraCovariateSettings() function, or a list of such objects.

+
controlIntervalSettings
+

An object of type ControlIntervalSettings as created using the createControlIntervalSettings() function.

+
+
+

Details

Create an object defining the parameter values.

+
+
-
- +
- - + + diff --git a/docs/reference/createCreateStudyPopulationArgs.html b/docs/reference/createCreateStudyPopulationArgs.html index a04de3c..772ee02 100644 --- a/docs/reference/createCreateStudyPopulationArgs.html +++ b/docs/reference/createCreateStudyPopulationArgs.html @@ -1,67 +1,12 @@ - - - - - - - -Create a parameter object for the function createStudyPopulation — createCreateStudyPopulationArgs • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a parameter object for the function createStudyPopulation — createCreateStudyPopulationArgs • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,63 +68,52 @@

Create a parameter object for the function createStudyPopulation

Create a parameter object for the function createStudyPopulation

-
createCreateStudyPopulationArgs(
-  firstOutcomeOnly = FALSE,
-  naivePeriod = 0,
-  minAge = NULL,
-  maxAge = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
firstOutcomeOnly

Whether only the first occurrence of an outcome should be considered.

naivePeriod

The number of days at the start of a patient's observation period that should not be included in the risk calculations. Note that the naive period can be used to determine current covariate status right after the naive period, and whether an outcome is the first one.

minAge

Minimum age at which patient time will be included in the analysis. Note that information prior to the min age is still used to determine exposure status after the minimum age (e.g. when a prescription was started just prior to reaching the minimum age). Also, outcomes occurring before the minimum age is reached will be considered as prior outcomes when using first outcomes only. Age should be specified in years, but non-integer values are allowed. If not specified, no age restriction will be applied.

maxAge

Maximum age at which patient time will be included in the analysis. Age should be specified in years, but non-integer values are allowed. If not specified, no age restriction will be applied.

- -

Details

+
+
createCreateStudyPopulationArgs(
+  firstOutcomeOnly = FALSE,
+  naivePeriod = 0,
+  minAge = NULL,
+  maxAge = NULL
+)
+
+
+

Arguments

+
firstOutcomeOnly
+

Whether only the first occurrence of an outcome should be considered.

+
naivePeriod
+

The number of days at the start of a patient's observation period that should not be included in the risk calculations. Note that the naive period can be used to determine current covariate status right after the naive period, and whether an outcome is the first one.

+
minAge
+

Minimum age at which patient time will be included in the analysis. Note that information prior to the min age is still used to determine exposure status after the minimum age (e.g. when a prescription was started just prior to reaching the minimum age). Also, outcomes occurring before the minimum age is reached will be considered as prior outcomes when using first outcomes only. Age should be specified in years, but non-integer values are allowed. If not specified, no age restriction will be applied.

+
maxAge
+

Maximum age at which patient time will be included in the analysis. Age should be specified in years, but non-integer values are allowed. If not specified, no age restriction will be applied.

+
+
+

Details

Create an object defining the parameter values.

+
+
-
- +
- - + + diff --git a/docs/reference/createEraCovariateSettings.html b/docs/reference/createEraCovariateSettings.html index 8e46ec3..c372298 100644 --- a/docs/reference/createEraCovariateSettings.html +++ b/docs/reference/createEraCovariateSettings.html @@ -1,67 +1,12 @@ - - - - - - - -Create era covariate settings — createEraCovariateSettings • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create era covariate settings — createEraCovariateSettings • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,123 +68,97 @@

Create era covariate settings

Create era covariate settings

-
createEraCovariateSettings(
-  includeEraIds = NULL,
-  excludeEraIds = NULL,
-  label = "Covariates",
-  stratifyById = TRUE,
-  start = 0,
-  startAnchor = "era start",
-  end = 0,
-  endAnchor = "era end",
-  firstOccurrenceOnly = FALSE,
-  splitPoints = c(),
-  allowRegularization = FALSE,
-  profileLikelihood = FALSE
-)
+
+
createEraCovariateSettings(
+  includeEraIds = NULL,
+  excludeEraIds = NULL,
+  label = "Covariates",
+  stratifyById = TRUE,
+  start = 0,
+  startAnchor = "era start",
+  end = 0,
+  endAnchor = "era end",
+  firstOccurrenceOnly = FALSE,
+  splitPoints = c(),
+  allowRegularization = FALSE,
+  profileLikelihood = FALSE
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
includeEraIds

One or more IDs of variables in the SccsData object that should be +

+

Arguments

+
includeEraIds
+

One or more IDs of variables in the SccsData object that should be used to construct this covariate. If no IDs are specified, all -variables will be used.

excludeEraIds

One or more IDs of variables in the [SccsData] object that should not -be used to construct this covariate.

label

A label used to identify the covariates created using these settings.

stratifyById

Should a single covariate be created for every ID in the SccsData +variables will be used.

+
excludeEraIds
+

One or more IDs of variables in the [SccsData] object that should not +be used to construct this covariate.

+
label
+

A label used to identify the covariates created using these settings.

+
stratifyById
+

Should a single covariate be created for every ID in the SccsData object, or should a single covariate be constructed? For example, if the IDs identify exposures to different drugs, should a covariate be constructed for every drug, or a single covariate for exposure to any -of these drugs. Note that overlap will be considered a single exposure.

start

The start of the risk window (in days) relative to the startAnchor.

startAnchor

The anchor point for the start of the risk window. Can be "era start" -or "era end".

end

The end of the risk window (in days) relative to the endAnchor.

endAnchor

The anchor point for the end of the risk window. Can be "era start" -or "era end".

firstOccurrenceOnly

Should only the first occurrence of the exposure be used?

splitPoints

To split the risk window into several smaller windows, specify the end +of these drugs. Note that overlap will be considered a single exposure.

+
start
+

The start of the risk window (in days) relative to the startAnchor.

+
startAnchor
+

The anchor point for the start of the risk window. Can be "era start" +or "era end".

+
end
+

The end of the risk window (in days) relative to the endAnchor.

+
endAnchor
+

The anchor point for the end of the risk window. Can be "era start" +or "era end".

+
firstOccurrenceOnly
+

Should only the first occurrence of the exposure be used?

+
splitPoints
+

To split the risk window into several smaller windows, specify the end of each sub- window relative to the start of the main risk window. If add ExposedDaysToStart is TRUE, the split points will be considered to -be relative to the end of the main risk window instead.

allowRegularization

When fitting the model, should the covariates defined here be allowed -to be regularized?

profileLikelihood

When fitting the model, should the likelihood profile be computed for +be relative to the end of the main risk window instead.

+
allowRegularization
+

When fitting the model, should the covariates defined here be allowed +to be regularized?

+
profileLikelihood
+

When fitting the model, should the likelihood profile be computed for the covariate defined here? The likelihood profile can be used to avoid making normal approximations on the likelihood and can be used in methods specifically designed to make use of the profile, but may take a -while to compute.

- -

Value

- +while to compute.

+
+
+

Value

An object of type EraCovariateSettings.

-

Details

- +
+
+

Details

Create an object specifying how to create a (set of) era-based covariates.

+
+
-
- +
- - + + diff --git a/docs/reference/createExposureOutcome.html b/docs/reference/createExposureOutcome.html index 87cc4ca..5fe0cde 100644 --- a/docs/reference/createExposureOutcome.html +++ b/docs/reference/createExposureOutcome.html @@ -1,67 +1,12 @@ - - - - - - - -Create a exposure-outcome combination. — createExposureOutcome • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a exposure-outcome combination. — createExposureOutcome • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,58 +68,49 @@

Create a exposure-outcome combination.

Create a exposure-outcome combination.

-
createExposureOutcome(exposureId, outcomeId, ...)
+
+
createExposureOutcome(exposureId, outcomeId, ...)
+
-

Arguments

- - - - - - - - - - - - - - -
exposureId

A concept ID identifying the target drug in the exposure table. If multiple +

+

Arguments

+
exposureId
+

A concept ID identifying the target drug in the exposure table. If multiple strategies for picking the exposure will be tested in the analysis, a named list of numbers can be provided instead. In the analysis, the name of the number to be used can be specified using the exposureType parameter in the -createSccsAnalysis function.

outcomeId

A concept ID identifying the outcome in the outcome table.

...

Custom variables, to be used in the analyses.

- -

Details

- -

Create a set of hypotheses of interest, to be used with the runSccsAnalyses function.

+createSccsAnalysis function.

+
outcomeId
+

A concept ID identifying the outcome in the outcome table.

+
...
+

Custom variables, to be used in the analyses.

+
+
+

Details

+

Create a set of hypotheses of interest, to be used with the runSccsAnalyses function.

+
+
-
- +

- - + + diff --git a/docs/reference/createFitSccsModelArgs.html b/docs/reference/createFitSccsModelArgs.html index 07a8b4c..b8df4e4 100644 --- a/docs/reference/createFitSccsModelArgs.html +++ b/docs/reference/createFitSccsModelArgs.html @@ -1,67 +1,12 @@ - - - - - - - -Create a parameter object for the function fitSccsModel — createFitSccsModelArgs • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a parameter object for the function fitSccsModel — createFitSccsModelArgs • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,64 +68,53 @@

Create a parameter object for the function fitSccsModel

Create a parameter object for the function fitSccsModel

-
createFitSccsModelArgs(
-  prior = createPrior("laplace", useCrossValidation = TRUE),
-  control = createControl(cvType = "auto", selectorType = "byPid", startingVariance =
-    0.1, seed = 1, noiseLevel = "quiet"),
-  profileGrid = NULL,
-  profileBounds = c(log(0.1), log(10))
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
prior

The prior used to fit the model. See Cyclops::createPrior for details.

control

The control object used to control the cross-validation used to determine the hyperparameters of the prior (if applicable). See Cyclops::createControl for details.

profileGrid

A one-dimensional grid of points on the log(relative risk) scale where the likelihood for coefficient of variables is sampled. See details.

profileBounds

The bounds (on the log relative risk scale) for the adaptive sampling of the likelihood function.

- -

Details

+
+
createFitSccsModelArgs(
+  prior = createPrior("laplace", useCrossValidation = TRUE),
+  control = createControl(cvType = "auto", selectorType = "byPid", startingVariance =
+    0.1, seed = 1, resetCoefficients = TRUE, noiseLevel = "quiet"),
+  profileGrid = NULL,
+  profileBounds = c(log(0.1), log(10))
+)
+
+
+

Arguments

+
prior
+

The prior used to fit the model. See Cyclops::createPrior for details.

+
control
+

The control object used to control the cross-validation used to determine the hyperparameters of the prior (if applicable). See Cyclops::createControl for details.

+
profileGrid
+

A one-dimensional grid of points on the log(relative risk) scale where the likelihood for coefficient of variables is sampled. See details.

+
profileBounds
+

The bounds (on the log relative risk scale) for the adaptive sampling of the likelihood function.

+
+
+

Details

Create an object defining the parameter values.

+
+
-
- +
- - + + diff --git a/docs/reference/createGetDbSccsDataArgs.html b/docs/reference/createGetDbSccsDataArgs.html index 872b071..880c0d9 100644 --- a/docs/reference/createGetDbSccsDataArgs.html +++ b/docs/reference/createGetDbSccsDataArgs.html @@ -1,67 +1,12 @@ - - - - - - - -Create a parameter object for the function getDbSccsData — createGetDbSccsDataArgs • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a parameter object for the function getDbSccsData — createGetDbSccsDataArgs • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,88 +68,67 @@

Create a parameter object for the function getDbSccsData

Create a parameter object for the function getDbSccsData

-
createGetDbSccsDataArgs(
-  useCustomCovariates = FALSE,
-  useNestingCohort = FALSE,
-  nestingCohortId = NULL,
-  deleteCovariatesSmallCount = 100,
-  studyStartDate = "",
-  studyEndDate = "",
-  maxCasesPerOutcome = 0,
-  exposureIds = "exposureId",
-  customCovariateIds = ""
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
useCustomCovariates

Create covariates from a custom table?

useNestingCohort

Should the study be nested in a cohort (e.g. people with a specific indication)? If not, the study will be nested in the general population.

nestingCohortId

A cohort definition ID identifying the records in the nestingCohortTable to use as nesting cohort.

deleteCovariatesSmallCount

The minimum count for a covariate to appear in the data to be kept.

studyStartDate

A calendar date specifying the minimum date where data is used. Date format is 'yyyymmdd'.

studyEndDate

A calendar date specifying the maximum date where data is used. Date format is 'yyyymmdd'.

maxCasesPerOutcome

If there are more than this number of cases for a single outcome cases will be sampled to this size. maxCasesPerOutcome = 0 indicates no maximum size.

exposureIds

A list of identifiers to define the exposures of interest. If exposureTable = DRUG_ERA, exposureIds should be CONCEPT_ID. If exposureTable <> DRUG_ERA, exposureIds is used to select the cohort_concept_id in the cohort-like table. If no exposureIds are provided, all drugs or cohorts in the exposureTable are included as exposures.

customCovariateIds

A list of cohort definition IDS identifying the records in the customCovariateTable to use for building custom covariates.

- -

Details

+
+
createGetDbSccsDataArgs(
+  useCustomCovariates = FALSE,
+  useNestingCohort = FALSE,
+  nestingCohortId = NULL,
+  deleteCovariatesSmallCount = 100,
+  studyStartDate = "",
+  studyEndDate = "",
+  maxCasesPerOutcome = 0,
+  exposureIds = "exposureId",
+  customCovariateIds = ""
+)
+
+
+

Arguments

+
useCustomCovariates
+

Create covariates from a custom table?

+
useNestingCohort
+

Should the study be nested in a cohort (e.g. people with a specific indication)? If not, the study will be nested in the general population.

+
nestingCohortId
+

A cohort definition ID identifying the records in the nestingCohortTable to use as nesting cohort.

+
deleteCovariatesSmallCount
+

The minimum count for a covariate to appear in the data to be kept.

+
studyStartDate
+

A calendar date specifying the minimum date where data is used. Date format is 'yyyymmdd'.

+
studyEndDate
+

A calendar date specifying the maximum date where data is used. Date format is 'yyyymmdd'.

+
maxCasesPerOutcome
+

If there are more than this number of cases for a single outcome cases will be sampled to this size. maxCasesPerOutcome = 0 indicates no maximum size.

+
exposureIds
+

A list of identifiers to define the exposures of interest. If exposureTable = DRUG_ERA, exposureIds should be CONCEPT_ID. If exposureTable <> DRUG_ERA, exposureIds is used to select the cohort_concept_id in the cohort-like table. If no exposureIds are provided, all drugs or cohorts in the exposureTable are included as exposures.

+
customCovariateIds
+

A list of cohort definition IDS identifying the records in the customCovariateTable to use for building custom covariates.

+
+
+

Details

Create an object defining the parameter values.

+
+
-
- +
- - + + diff --git a/docs/reference/createSccsAnalysis.html b/docs/reference/createSccsAnalysis.html index 4e71d1a..b761d28 100644 --- a/docs/reference/createSccsAnalysis.html +++ b/docs/reference/createSccsAnalysis.html @@ -1,67 +1,12 @@ - - - - - - - -Create a SelfControlledCaseSeries analysis specification — createSccsAnalysis • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a SelfControlledCaseSeries analysis specification — createSccsAnalysis • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,104 +68,81 @@

Create a SelfControlledCaseSeries analysis specification

Create a SelfControlledCaseSeries analysis specification

-
createSccsAnalysis(
-  analysisId = 1,
-  description = "",
-  exposureType = NULL,
-  outcomeType = NULL,
-  getDbSccsDataArgs,
-  createStudyPopulationArgs,
-  design = "SCCS",
-  createSccsIntervalDataArgs = NULL,
-  createScriIntervalDataArgs = NULL,
-  fitSccsModelArgs
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
analysisId

An integer that will be used later to refer to this specific set -of analysis choices.

description

A short description of the analysis.

exposureType

If more than one exposure is provided for each +

+
createSccsAnalysis(
+  analysisId = 1,
+  description = "",
+  exposureType = NULL,
+  outcomeType = NULL,
+  getDbSccsDataArgs,
+  createStudyPopulationArgs,
+  design = "SCCS",
+  createSccsIntervalDataArgs = NULL,
+  createScriIntervalDataArgs = NULL,
+  fitSccsModelArgs
+)
+
+ +
+

Arguments

+
analysisId
+

An integer that will be used later to refer to this specific set +of analysis choices.

+
description
+

A short description of the analysis.

+
exposureType
+

If more than one exposure is provided for each exposureOutcome, this field should be used to select the -specific exposure to use in this analysis.

outcomeType

If more than one outcome is provided for each exposureOutcome, +specific exposure to use in this analysis.

+
outcomeType
+

If more than one outcome is provided for each exposureOutcome, this field should be used to select the specific outcome to use -in this analysis.

getDbSccsDataArgs

An object representing the arguments to be used when calling the -getDbSccsData function.

createStudyPopulationArgs

An object representing the arguments to be used when calling the -getDbSccsData function.

design

Either "SCCS" for the general self-controlled case series design, -or "SCRI" for the self-controlled risk interval design.

createSccsIntervalDataArgs

An object representing the arguments to be used when calling the -createSccsIntervalData function. Ignored when design = "SCRI".

createScriIntervalDataArgs

An object representing the arguments to be used when calling the -createScriIntervalData function. Ignored when design = "SCCS".

fitSccsModelArgs

An object representing the arguments to be used when calling the -fitSccsModel function.

- -

Details

- -

Create a set of analysis choices, to be used with the runSccsAnalyses function.

+in this analysis.

+
getDbSccsDataArgs
+

An object representing the arguments to be used when calling the +getDbSccsData function.

+
createStudyPopulationArgs
+

An object representing the arguments to be used when calling the +getDbSccsData function.

+
design
+

Either "SCCS" for the general self-controlled case series design, +or "SCRI" for the self-controlled risk interval design.

+
createSccsIntervalDataArgs
+

An object representing the arguments to be used when calling the +createSccsIntervalData function. Ignored when design = "SCRI".

+
createScriIntervalDataArgs
+

An object representing the arguments to be used when calling the +createScriIntervalData function. Ignored when design = "SCCS".

+
fitSccsModelArgs
+

An object representing the arguments to be used when calling the +fitSccsModel function.

+
+
+

Details

+

Create a set of analysis choices, to be used with the runSccsAnalyses function.

+
+
-
- +
- - + + diff --git a/docs/reference/createSccsIntervalData.html b/docs/reference/createSccsIntervalData.html index faeebc4..7583e92 100644 --- a/docs/reference/createSccsIntervalData.html +++ b/docs/reference/createSccsIntervalData.html @@ -1,67 +1,12 @@ - - - - - - - -Create SCCS era data — createSccsIntervalData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create SCCS era data — createSccsIntervalData • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,107 +68,88 @@

Create SCCS era data

Create SCCS era data

-
createSccsIntervalData(
-  studyPopulation,
-  sccsData,
-  eraCovariateSettings,
-  ageCovariateSettings = NULL,
-  seasonalityCovariateSettings = NULL,
-  calendarTimeCovariateSettings = NULL,
-  minCasesForAgeSeason = NULL,
-  minCasesForTimeCovariates = 10000,
-  eventDependentObservation = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

sccsData

An object of type SccsData as created using the -getDbSccsData function.

eraCovariateSettings

Either an object of type EraCovariateSettings as created -using the createEraCovariateSettings() function, or a -list of such objects.

ageCovariateSettings

An object of type ageCovariateSettings as created using the -createAgeCovariateSettings() function.

seasonalityCovariateSettings

An object of type seasonalityCovariateSettings as created using the -createSeasonalityCovariateSettings() function.

calendarTimeCovariateSettings

An object of type calendarTimeCovariateSettings as created using the -createCalendarTimeCovariateSettings() function.

minCasesForAgeSeason

DEPRECATED: Use minCasesForTimeCovariates instead.

minCasesForTimeCovariates

Minimum number of cases to use to fit age, season and calendar time splines. If -needed (and available), cases that are not exposed will be included.

eventDependentObservation

Should the extension proposed by Farrington et al. be used to -adjust for event-dependent observation time?

- -

Value

- -

An object of type SccsIntervalData.

-

Details

+
+
createSccsIntervalData(
+  studyPopulation,
+  sccsData,
+  eraCovariateSettings,
+  ageCovariateSettings = NULL,
+  seasonalityCovariateSettings = NULL,
+  calendarTimeCovariateSettings = NULL,
+  minCasesForAgeSeason = NULL,
+  minCasesForTimeCovariates = 10000,
+  eventDependentObservation = FALSE
+)
+
+
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
sccsData
+

An object of type SccsData as created using the +getDbSccsData function.

+
eraCovariateSettings
+

Either an object of type EraCovariateSettings as created +using the createEraCovariateSettings() function, or a +list of such objects.

+
ageCovariateSettings
+

An object of type ageCovariateSettings as created using the +createAgeCovariateSettings() function.

+
seasonalityCovariateSettings
+

An object of type seasonalityCovariateSettings as created using the +createSeasonalityCovariateSettings() function.

+
calendarTimeCovariateSettings
+

An object of type calendarTimeCovariateSettings as created using the +createCalendarTimeCovariateSettings() function.

+
minCasesForAgeSeason
+

DEPRECATED: Use minCasesForTimeCovariates instead.

+
minCasesForTimeCovariates
+

Minimum number of cases to use to fit age, season and calendar time splines. If +needed (and available), cases that are not exposed will be included.

+
eventDependentObservation
+

Should the extension proposed by Farrington et al. be used to +adjust for event-dependent observation time?

+
+
+

Value

+

An object of type SccsIntervalData.

+
+
+

Details

This function creates covariates based on the data in the sccsData argument, according to the provided settings. It chops patient time into periods during which all covariates remain constant. The output details these periods, their durations, and a sparse representation of the covariate values.

-

References

- +
+
+

References

Farrington, C. P., Anaya-Izquierdo, A., Whitaker, H. J., Hocine, M.N., Douglas, I., and Smeeth, L. (2011). Self-Controlled case series analysis with event-dependent observation periods. Journal of the American Statistical Association 106 (494), 417-426

+
+
-
- +
- - + + diff --git a/docs/reference/createSccsSimulationSettings.html b/docs/reference/createSccsSimulationSettings.html index d9c6266..5832f0e 100644 --- a/docs/reference/createSccsSimulationSettings.html +++ b/docs/reference/createSccsSimulationSettings.html @@ -1,67 +1,12 @@ - - - - - - - -Create SCCS simulation settings — createSccsSimulationSettings • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create SCCS simulation settings — createSccsSimulationSettings • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,155 +68,111 @@

Create SCCS simulation settings

Create SCCS simulation settings

-
createSccsSimulationSettings(
-  meanPatientTime = 4 * 365,
-  sdPatientTime = 2 * 365,
-  minAge = 18 * 365,
-  maxAge = 65 * 365,
-  minBaselineRate = 0.001,
-  maxBaselineRate = 0.01,
-  minCalendarTime = as.Date("2000-01-01"),
-  maxCalendarTime = as.Date("2010-01-01"),
-  eraIds = c(1, 2),
-  patientUsages = c(0.2, 0.1),
-  usageRate = c(0.01, 0.01),
-  meanPrescriptionDurations = c(14, 30),
-  sdPrescriptionDurations = c(7, 14),
-  simulationRiskWindows = list(createSimulationRiskWindow(relativeRisks = 1),
-    createSimulationRiskWindow(relativeRisks = 1.5)),
-  includeAgeEffect = TRUE,
-  ageKnots = 5,
-  includeSeasonality = TRUE,
-  seasonKnots = 5,
-  includeCalendarTimeEffect = TRUE,
-  calendarTimeKnots = 5,
-  outcomeId = 10
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
meanPatientTime

Mean number of observation days per patient.

sdPatientTime

Standard deviation of the observation days per patient.

minAge

The minimum age in days.

maxAge

The maximum age in days.

minBaselineRate

The minimum baseline rate (per day).

maxBaselineRate

The maximum baseline rate (per day).

minCalendarTime

The minimum date patients are to be observed.

maxCalendarTime

The maximum date patients are to be observed.

eraIds

The IDs for the covariates to be generated.

patientUsages

The fraction of patients that use the drugs.

usageRate

The rate of prescriptions per person that uses the drug.

meanPrescriptionDurations

The mean duration of a prescription, per drug.

sdPrescriptionDurations

The standard deviation of the duration of a prescription, per -drug.

simulationRiskWindows

One or a list of objects of type simulationRiskWindow as -created using the createSimulationRiskWindow -function.

includeAgeEffect

Include an age effect for the outcome?

ageKnots

Number of knots in the age spline.

includeSeasonality

Include seasonality for the outcome?

seasonKnots

Number of knots in the seasonality spline.

includeCalendarTimeEffect

Include a calendar time effect for the outcome?

calendarTimeKnots

Number of knots in the calendar time spline.

outcomeId

The ID to be used for the outcome.

- -

Value

+
+
createSccsSimulationSettings(
+  meanPatientTime = 4 * 365,
+  sdPatientTime = 2 * 365,
+  minAge = 18 * 365,
+  maxAge = 65 * 365,
+  minBaselineRate = 0.001,
+  maxBaselineRate = 0.01,
+  minCalendarTime = as.Date("2000-01-01"),
+  maxCalendarTime = as.Date("2010-01-01"),
+  eraIds = c(1, 2),
+  patientUsages = c(0.2, 0.1),
+  usageRate = c(0.01, 0.01),
+  meanPrescriptionDurations = c(14, 30),
+  sdPrescriptionDurations = c(7, 14),
+  simulationRiskWindows = list(createSimulationRiskWindow(relativeRisks = 1),
+    createSimulationRiskWindow(relativeRisks = 1.5)),
+  includeAgeEffect = TRUE,
+  ageKnots = 5,
+  includeSeasonality = TRUE,
+  seasonKnots = 5,
+  includeCalendarTimeEffect = TRUE,
+  calendarTimeKnots = 5,
+  outcomeId = 10
+)
+
+
+

Arguments

+
meanPatientTime
+

Mean number of observation days per patient.

+
sdPatientTime
+

Standard deviation of the observation days per patient.

+
minAge
+

The minimum age in days.

+
maxAge
+

The maximum age in days.

+
minBaselineRate
+

The minimum baseline rate (per day).

+
maxBaselineRate
+

The maximum baseline rate (per day).

+
minCalendarTime
+

The minimum date patients are to be observed.

+
maxCalendarTime
+

The maximum date patients are to be observed.

+
eraIds
+

The IDs for the covariates to be generated.

+
patientUsages
+

The fraction of patients that use the drugs.

+
usageRate
+

The rate of prescriptions per person that uses the drug.

+
meanPrescriptionDurations
+

The mean duration of a prescription, per drug.

+
sdPrescriptionDurations
+

The standard deviation of the duration of a prescription, per +drug.

+
simulationRiskWindows
+

One or a list of objects of type simulationRiskWindow as +created using the createSimulationRiskWindow +function.

+
includeAgeEffect
+

Include an age effect for the outcome?

+
ageKnots
+

Number of knots in the age spline.

+
includeSeasonality
+

Include seasonality for the outcome?

+
seasonKnots
+

Number of knots in the seasonality spline.

+
includeCalendarTimeEffect
+

Include a calendar time effect for the outcome?

+
calendarTimeKnots
+

Number of knots in the calendar time spline.

+
outcomeId
+

The ID to be used for the outcome.

+
+
+

Value

An object of type sccsSimulationSettings.

-

Details

- +
+
+

Details

Create an object of settings for an SCCS simulation.

+
+
-
- +
- - + + diff --git a/docs/reference/createScriIntervalData.html b/docs/reference/createScriIntervalData.html index c920cc6..db5e9a2 100644 --- a/docs/reference/createScriIntervalData.html +++ b/docs/reference/createScriIntervalData.html @@ -1,67 +1,12 @@ - - - - - - - -Create Self-Controlled Risk Interval (SCRI) era data — createScriIntervalData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create Self-Controlled Risk Interval (SCRI) era data — createScriIntervalData • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,80 +68,71 @@

Create Self-Controlled Risk Interval (SCRI) era data

Create Self-Controlled Risk Interval (SCRI) era data

-
createScriIntervalData(
-  studyPopulation,
-  sccsData,
-  eraCovariateSettings,
-  controlIntervalSettings
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

sccsData

An object of type SccsData as created using the -getDbSccsData function.

eraCovariateSettings

Either an object of type EraCovariateSettings as created -using the createEraCovariateSettings() function, or a -list of such objects.

controlIntervalSettings

An object of type ControlIntervalSettings as created -using the createControlIntervalSettings() function.

- -

Value

- -

An object of type SccsIntervalData.

-

Details

+
+
createScriIntervalData(
+  studyPopulation,
+  sccsData,
+  eraCovariateSettings,
+  controlIntervalSettings
+)
+
+
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
sccsData
+

An object of type SccsData as created using the +getDbSccsData function.

+
eraCovariateSettings
+

Either an object of type EraCovariateSettings as created +using the createEraCovariateSettings() function, or a +list of such objects.

+
controlIntervalSettings
+

An object of type ControlIntervalSettings as created +using the createControlIntervalSettings() function.

+
+
+

Value

+

An object of type SccsIntervalData.

+
+
+

Details

This function creates interval data according to the elf-Controlled Risk Interval (SCRI) design. Unlike the generic SCCS design, where all patient time is used to establish a background rate, in the SCRI design a specific control interval (relative to the exposure) needs to be defined. The final model will only include time that is either part of the risk interval (defined using the eraCovariateSettings argument, or the control interval (defined using controlIntervalSettings).

-

References

- +
+
+

References

Greene SK, Kulldorff M, Lewis EM, Li R, Yin R, Weintraub ES, Fireman BH, Lieu TA, Nordin JD, Glanz JM, Baxter R, Jacobsen SJ, Broder KR, Lee GM. Near real-time surveillance for influenza vaccine safety: proof-of-concept in the Vaccine Safety Datalink Project. Am J Epidemiol. 2010 Jan 15;171(2):177-88. doi: 10.1093/aje/kwp345.

+
+
-
- +
- - + + diff --git a/docs/reference/createSeasonalityCovariateSettings.html b/docs/reference/createSeasonalityCovariateSettings.html index aa19ca3..6cb3c25 100644 --- a/docs/reference/createSeasonalityCovariateSettings.html +++ b/docs/reference/createSeasonalityCovariateSettings.html @@ -1,67 +1,12 @@ - - - - - - - -Create seasonality settings — createSeasonalityCovariateSettings • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create seasonality settings — createSeasonalityCovariateSettings • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,72 +68,64 @@

Create seasonality settings

Create seasonality settings

-
createSeasonalityCovariateSettings(
-  seasonKnots = 5,
-  allowRegularization = FALSE,
-  computeConfidenceIntervals = FALSE
-)
- -

Arguments

- - - - - - - - - - - - - - -
seasonKnots

If a single number is provided this is assumed to indicate the +

+
createSeasonalityCovariateSettings(
+  seasonKnots = 5,
+  allowRegularization = FALSE,
+  computeConfidenceIntervals = FALSE
+)
+
+ +
+

Arguments

+
seasonKnots
+

If a single number is provided this is assumed to indicate the number of knots to use for the spline, and the knots are automatically equally spaced across the year. If more than one number is provided these are assumed to be the exact location of -the knots in days relative to the start of the year.

allowRegularization

When fitting the model, should the covariates defined here be -allowed to be regularized?

computeConfidenceIntervals

Should confidence intervals be computed for the covariates +the knots in days relative to the start of the year.

+
allowRegularization
+

When fitting the model, should the covariates defined here be +allowed to be regularized?

+
computeConfidenceIntervals
+

Should confidence intervals be computed for the covariates defined here? Setting this to FALSE might save computing time when fitting the model. Will be turned to FALSE automatically -when allowRegularization = TRUE.

- -

Value

- +when allowRegularization = TRUE.

+
+
+

Value

An object of type seasonalitySettings.

-

Details

- +
+
+

Details

Create an object specifying whether and how seasonality should be included in the model. Seasonality can be included by splitting patient time into calendar months. During a month, the relative risk attributed to season is assumed to be constant, and the risk from month to month is modeled using a cyclic cubic spline.

+
+

-
- +
- - + + diff --git a/docs/reference/createSimulationRiskWindow.html b/docs/reference/createSimulationRiskWindow.html index 37eb4ee..3c12325 100644 --- a/docs/reference/createSimulationRiskWindow.html +++ b/docs/reference/createSimulationRiskWindow.html @@ -1,67 +1,12 @@ - - - - - - - -Create a risk window definition for simulation — createSimulationRiskWindow • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a risk window definition for simulation — createSimulationRiskWindow • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,71 +68,58 @@

Create a risk window definition for simulation

Create a risk window definition for simulation

-
createSimulationRiskWindow(
-  start = 0,
-  end = 0,
-  endAnchor = "era end",
-  splitPoints = c(),
-  relativeRisks = c(0)
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
start

Start of the risk window relative to exposure start.

end

The end of the risk window (in days) relative to the endAnchor.

endAnchor

The anchor point for the end of the risk window. Can be "era start" -or "era end".

splitPoints

Subdivision of the risk window in to smaller sub-windows.

relativeRisks

Either a single number representing the relative risk in the risk -window, or when splitPoints have been defined a vector of relative -risks, one for each sub-window.

- -

Value

+
+
createSimulationRiskWindow(
+  start = 0,
+  end = 0,
+  endAnchor = "era end",
+  splitPoints = c(),
+  relativeRisks = c(0)
+)
+
+
+

Arguments

+
start
+

Start of the risk window relative to exposure start.

+
end
+

The end of the risk window (in days) relative to the endAnchor.

+
endAnchor
+

The anchor point for the end of the risk window. Can be "era start" +or "era end".

+
splitPoints
+

Subdivision of the risk window in to smaller sub-windows.

+
relativeRisks
+

Either a single number representing the relative risk in the risk +window, or when splitPoints have been defined a vector of relative +risks, one for each sub-window.

+
+
+

Value

An object of type simulationRiskWindow.

+
+
-
- +
- - + + diff --git a/docs/reference/createStudyPopulation.html b/docs/reference/createStudyPopulation.html index a99e5fd..aceadaf 100644 --- a/docs/reference/createStudyPopulation.html +++ b/docs/reference/createStudyPopulation.html @@ -1,67 +1,12 @@ - - - - - - - -Create a study population — createStudyPopulation • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a study population — createStudyPopulation • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,86 +68,70 @@

Create a study population

Create a study population

-
createStudyPopulation(
-  sccsData,
-  outcomeId = NULL,
-  firstOutcomeOnly = FALSE,
-  naivePeriod = 0,
-  minAge = NULL,
-  maxAge = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
sccsData

An object of type SccsData as created using the -getDbSccsData function.

outcomeId

The outcome to create the era data for. If not specified it is +

+
createStudyPopulation(
+  sccsData,
+  outcomeId = NULL,
+  firstOutcomeOnly = FALSE,
+  naivePeriod = 0,
+  minAge = NULL,
+  maxAge = NULL
+)
+
+ +
+

Arguments

+
sccsData
+

An object of type SccsData as created using the +getDbSccsData function.

+
outcomeId
+

The outcome to create the era data for. If not specified it is assumed to be the one outcome for which the data was loaded from -the database.

firstOutcomeOnly

Whether only the first occurrence of an outcome should be -considered.

naivePeriod

The number of days at the start of a patient's observation period +the database.

+
firstOutcomeOnly
+

Whether only the first occurrence of an outcome should be +considered.

+
naivePeriod
+

The number of days at the start of a patient's observation period that should not be included in the risk calculations. Note that the naive period can be used to determine current covariate status right after the naive period, and whether an outcome is -the first one.

minAge

Minimum age at which patient time will be included in the analysis. Note +the first one.

+
minAge
+

Minimum age at which patient time will be included in the analysis. Note that information prior to the min age is still used to determine exposure status after the minimum age (e.g. when a prescription was started just prior to reaching the minimum age). Also, outcomes occurring before the minimum age is reached will be considered as prior outcomes when using first outcomes only. Age should be specified in years, but non-integer values are allowed. If not -specified, no age restriction will be applied.

maxAge

Maximum age at which patient time will be included in the analysis. Age should +specified, no age restriction will be applied.

+
maxAge
+

Maximum age at which patient time will be included in the analysis. Age should be specified in years, but non-integer values are allowed. If not -specified, no age restriction will be applied.

- +specified, no age restriction will be applied.

+
+
-
- +
- - + + diff --git a/docs/reference/cyclicSplineDesign.html b/docs/reference/cyclicSplineDesign.html index d1caa41..554ce49 100644 --- a/docs/reference/cyclicSplineDesign.html +++ b/docs/reference/cyclicSplineDesign.html @@ -1,67 +1,12 @@ - - - - - - - -Create a design matrix for a cyclic spline — cyclicSplineDesign • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a design matrix for a cyclic spline — cyclicSplineDesign • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,54 +68,45 @@

Create a design matrix for a cyclic spline

Create a design matrix for a cyclic spline

-
cyclicSplineDesign(x, knots, ord = 4)
- -

Arguments

- - - - - - - - - - - - - - -
x

Vector of coordinates of the points to be interpolated.

knots

Location of the knots.

ord

Order of the spline function.

- -

Details

+
+
cyclicSplineDesign(x, knots, ord = 4)
+
+
+

Arguments

+
x
+

Vector of coordinates of the points to be interpolated.

+
knots
+

Location of the knots.

+
ord
+

Order of the spline function.

+
+
+

Details

This function is used by other functions in this package.

+
+
-
- +
- - + + diff --git a/docs/reference/fitSccsModel.html b/docs/reference/fitSccsModel.html index 45c5937..26296bf 100644 --- a/docs/reference/fitSccsModel.html +++ b/docs/reference/fitSccsModel.html @@ -1,67 +1,12 @@ - - - - - - - -Fit the SCCS model — fitSccsModel • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Fit the SCCS model — fitSccsModel • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,89 +68,78 @@

Fit the SCCS model

Fit the SCCS model

-
fitSccsModel(
-  sccsIntervalData,
-  prior = createPrior("laplace", useCrossValidation = TRUE),
-  control = createControl(cvType = "auto", selectorType = "byPid", startingVariance =
-    0.1, seed = 1, noiseLevel = "quiet"),
-  profileGrid = NULL,
-  profileBounds = c(log(0.1), log(10))
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - -
sccsIntervalData

An object of type SccsIntervalData as created using the -createSccsIntervalData function.

prior

The prior used to fit the model. See Cyclops::createPrior for -details.

control

The control object used to control the cross-validation used to determine the -hyperparameters of the prior (if applicable). See -Cyclops::createControl for details.

profileGrid

A one-dimensional grid of points on the log(relative risk) scale where -the likelihood for coefficient of variables is sampled. See details.

profileBounds

The bounds (on the log relative risk scale) for the adaptive sampling -of the likelihood function.

- -

Value

+
+
fitSccsModel(
+  sccsIntervalData,
+  prior = createPrior("laplace", useCrossValidation = TRUE),
+  control = createControl(cvType = "auto", selectorType = "byPid", startingVariance =
+    0.1, seed = 1, resetCoefficients = TRUE, noiseLevel = "quiet"),
+  profileGrid = NULL,
+  profileBounds = c(log(0.1), log(10))
+)
+
+
+

Arguments

+
sccsIntervalData
+

An object of type SccsIntervalData as created using the +createSccsIntervalData function.

+
prior
+

The prior used to fit the model. See Cyclops::createPrior for +details.

+
control
+

The control object used to control the cross-validation used to determine the +hyperparameters of the prior (if applicable). See +Cyclops::createControl for details.

+
profileGrid
+

A one-dimensional grid of points on the log(relative risk) scale where +the likelihood for coefficient of variables is sampled. See details.

+
profileBounds
+

The bounds (on the log relative risk scale) for the adaptive sampling +of the likelihood function.

+
+
+

Value

An object of type SccsModel. Generic functions print, coef, and confint are available.

-

Details

- +
+
+

Details

Fits the SCCS model as a conditional Poisson regression. When allowed, coefficients for some or all covariates can be regularized.

Likelihood profiling is only done for variables for which profileLikelihood is set to TRUE when -calling createEraCovariateSettings(). Either specify the profileGrid for a completely user- +calling createEraCovariateSettings(). Either specify the profileGrid for a completely user- defined grid, or profileBounds for an adaptive grid. Both should be defined on the log IRR scale. When both profileGrid and profileGrid are NULL likelihood profiling is disabled.

-

References

- +
+
+

References

Suchard, M.A., Simpson, S.E., Zorych, I., Ryan, P., and Madigan, D. (2013). Massive parallelization of serial inference algorithms for complex generalized linear models. ACM Transactions on Modeling and Computer Simulation 23, 10

+
+
-
- +
- - + + diff --git a/docs/reference/getAttritionTable.html b/docs/reference/getAttritionTable.html index 34d71d8..f7551c1 100644 --- a/docs/reference/getAttritionTable.html +++ b/docs/reference/getAttritionTable.html @@ -1,67 +1,12 @@ - - - - - - - -Get the attrition table for a population — getAttritionTable • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get the attrition table for a population — getAttritionTable • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,49 +68,44 @@

Get the attrition table for a population

Get the attrition table for a population

-
getAttritionTable(object)
- -

Arguments

- - - - - - -
object

Either an object of type SccsData, a population object generated by -functions like createStudyPopulation, or an object of type -outcomeModel.

- -

Value

+
+
getAttritionTable(object)
+
+
+

Arguments

+
object
+

Either an object of type SccsData, a population object generated by +functions like createStudyPopulation, or an object of type +outcomeModel.

+
+
+

Value

A tibble specifying the number of people and exposures in the population after specific steps of filtering.

+
+
-
- +
- - + + diff --git a/docs/reference/getDbSccsData.html b/docs/reference/getDbSccsData.html index 5270a23..44df4d2 100644 --- a/docs/reference/getDbSccsData.html +++ b/docs/reference/getDbSccsData.html @@ -1,67 +1,12 @@ - - - - - - - -Load data for SCCS from the database — getDbSccsData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Load data for SCCS from the database — getDbSccsData • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,224 +68,170 @@

Load data for SCCS from the database

Load all data needed to perform an SCCS analysis from the database.

-
getDbSccsData(
-  connectionDetails,
-  cdmDatabaseSchema,
-  oracleTempSchema = NULL,
-  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
-  outcomeDatabaseSchema = cdmDatabaseSchema,
-  outcomeTable = "condition_era",
-  outcomeIds,
-  exposureDatabaseSchema = cdmDatabaseSchema,
-  exposureTable = "drug_era",
-  exposureIds = c(),
-  useCustomCovariates = FALSE,
-  customCovariateDatabaseSchema = cdmDatabaseSchema,
-  customCovariateTable = "cohort",
-  customCovariateIds = c(),
-  useNestingCohort = FALSE,
-  nestingCohortDatabaseSchema = cdmDatabaseSchema,
-  nestingCohortTable = "cohort",
-  nestingCohortId = NULL,
-  deleteCovariatesSmallCount = 100,
-  studyStartDate = "",
-  studyEndDate = "",
-  cdmVersion = "5",
-  maxCasesPerOutcome = 0
-)
+
+
getDbSccsData(
+  connectionDetails,
+  cdmDatabaseSchema,
+  oracleTempSchema = NULL,
+  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
+  outcomeDatabaseSchema = cdmDatabaseSchema,
+  outcomeTable = "condition_era",
+  outcomeIds,
+  exposureDatabaseSchema = cdmDatabaseSchema,
+  exposureTable = "drug_era",
+  exposureIds = c(),
+  useCustomCovariates = FALSE,
+  customCovariateDatabaseSchema = cdmDatabaseSchema,
+  customCovariateTable = "cohort",
+  customCovariateIds = c(),
+  useNestingCohort = FALSE,
+  nestingCohortDatabaseSchema = cdmDatabaseSchema,
+  nestingCohortTable = "cohort",
+  nestingCohortId = NULL,
+  deleteCovariatesSmallCount = 100,
+  studyStartDate = "",
+  studyEndDate = "",
+  cdmVersion = "5",
+  maxCasesPerOutcome = 0
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
connectionDetails

An R object of type ConnectionDetails created using -the function DatabaseConnector::createConnectionDetails() function.

cdmDatabaseSchema

The name of the database schema that contains the OMOP CDM +

+

Arguments

+
connectionDetails
+

An R object of type ConnectionDetails created using +the function DatabaseConnector::createConnectionDetails() function.

+
cdmDatabaseSchema
+

The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should specify both the database and the -schema, so for example 'cdm_instance.dbo'.

oracleTempSchema

DEPRECATED: use tempEmulationSchema instead.

tempEmulationSchema

Some database platforms like Oracle and Impala do not truly support temp tables. To +schema, so for example 'cdm_instance.dbo'.

+
oracleTempSchema
+

DEPRECATED: use tempEmulationSchema instead.

+
tempEmulationSchema
+

Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables -can be created.

outcomeDatabaseSchema

The name of the database schema that is the location where +can be created.

+
outcomeDatabaseSchema
+

The name of the database schema that is the location where the data used to define the outcome cohorts is available. If outcomeTable = CONDITION_ERA, outcomeDatabaseSchema is not -used. Requires read permissions to this database.

outcomeTable

The tablename that contains the outcome cohorts. If +used. Requires read permissions to this database.

+
outcomeTable
+

The tablename that contains the outcome cohorts. If outcomeTable is not CONDITION_OCCURRENCE or CONDITION_ERA, then expectation is outcomeTable has format of COHORT table: COHORT_DEFINITION_ID, SUBJECT_ID, COHORT_START_DATE, -COHORT_END_DATE.

outcomeIds

A list of ids used to define outcomes. If outcomeTable = +COHORT_END_DATE.

+
outcomeIds
+

A list of ids used to define outcomes. If outcomeTable = CONDITION_OCCURRENCE, the list is a set of ancestor CONCEPT_IDs, and all occurrences of all descendant concepts will be selected. If outcomeTable <> CONDITION_OCCURRENCE, the list contains records found in COHORT_DEFINITION_ID -field.

exposureDatabaseSchema

The name of the database schema that is the location where +field.

+
exposureDatabaseSchema
+

The name of the database schema that is the location where the exposure data used to define the exposure cohorts is available. If exposureTable = DRUG_ERA, exposureDatabaseSchema is not used but assumed to be -cdmSchema. Requires read permissions to this database.

exposureTable

The tablename that contains the exposure cohorts. If +cdmSchema. Requires read permissions to this database.

+
exposureTable
+

The tablename that contains the exposure cohorts. If exposureTable <> DRUG_ERA, then expectation is exposureTable has format of COHORT table: cohort_concept_id, SUBJECT_ID, -COHORT_START_DATE, COHORT_END_DATE.

exposureIds

A list of identifiers to define the exposures of interest. If +COHORT_START_DATE, COHORT_END_DATE.

+
exposureIds
+

A list of identifiers to define the exposures of interest. If exposureTable = DRUG_ERA, exposureIds should be CONCEPT_ID. If exposureTable <> DRUG_ERA, exposureIds is used to select the cohort_concept_id in the cohort-like table. If no exposureIds are provided, all drugs or cohorts in the -exposureTable are included as exposures.

useCustomCovariates

Create covariates from a custom table?

customCovariateDatabaseSchema

The name of the database schema that is the location where -the custom covariate data is available.

customCovariateTable

Name of the table holding the custom covariates. This table -should have the same structure as the cohort table.

customCovariateIds

A list of cohort definition IDS identifying the records in +exposureTable are included as exposures.

+
useCustomCovariates
+

Create covariates from a custom table?

+
customCovariateDatabaseSchema
+

The name of the database schema that is the location where +the custom covariate data is available.

+
customCovariateTable
+

Name of the table holding the custom covariates. This table +should have the same structure as the cohort table.

+
customCovariateIds
+

A list of cohort definition IDS identifying the records in the customCovariateTable to use for building custom -covariates.

useNestingCohort

Should the study be nested in a cohort (e.g. people with +covariates.

+
useNestingCohort
+

Should the study be nested in a cohort (e.g. people with a specific indication)? If not, the study will be nested -in the general population.

nestingCohortDatabaseSchema

The name of the database schema that is the location -where the nesting cohort is defined.

nestingCohortTable

Name of the table holding the nesting cohort. This table -should have the same structure as the cohort table.

nestingCohortId

A cohort definition ID identifying the records in the -nestingCohortTable to use as nesting cohort.

deleteCovariatesSmallCount

The minimum count for a covariate to appear in the data to be -kept.

studyStartDate

A calendar date specifying the minimum date where data is -used. Date format is 'yyyymmdd'.

studyEndDate

A calendar date specifying the maximum date where data is -used. Date format is 'yyyymmdd'.

cdmVersion

Define the OMOP CDM version used: currently support "4" and -"5".

maxCasesPerOutcome

If there are more than this number of cases for a single +in the general population.

+
nestingCohortDatabaseSchema
+

The name of the database schema that is the location +where the nesting cohort is defined.

+
nestingCohortTable
+

Name of the table holding the nesting cohort. This table +should have the same structure as the cohort table.

+
nestingCohortId
+

A cohort definition ID identifying the records in the +nestingCohortTable to use as nesting cohort.

+
deleteCovariatesSmallCount
+

The minimum count for a covariate to appear in the data to be +kept.

+
studyStartDate
+

A calendar date specifying the minimum date where data is +used. Date format is 'yyyymmdd'.

+
studyEndDate
+

A calendar date specifying the maximum date where data is +used. Date format is 'yyyymmdd'.

+
cdmVersion
+

Define the OMOP CDM version used: currently support "4" and +"5".

+
maxCasesPerOutcome
+

If there are more than this number of cases for a single outcome cases will be sampled to this size. maxCasesPerOutcome = 0 -indicates no maximum size.

- -

Value

- -

An SccsData object.

-

Details

- -

This function downloads several types of information:

    -
  • Information on the occurrences of the outcome(s) of interest. Note that information for +indicates no maximum size.

    +
+
+

Value

+

An SccsData object.

+
+
+

Details

+

This function downloads several types of information:

  • Information on the occurrences of the outcome(s) of interest. Note that information for multiple outcomes can be fetched in one go, and later the specific outcome can be specified for which we want to build a model.

  • Information on the observation time and age for the people with the outcomes.

  • Information on exposures of interest which we want to include in the model.

  • -
- -

Five different database schemas can be specified, for five different types of information: The

    -
  • cdmDatabaseSchema is used to extract patient age and observation period. The

  • +

Five different database schemas can be specified, for five different types of information: The

  • cdmDatabaseSchema is used to extract patient age and observation period. The

  • outcomeDatabaseSchema is used to extract information about the outcomes, the

  • exposureDatabaseSchema is used to retrieve information on exposures, and the

  • customCovariateDatabaseSchema is optionally used to find additional, user-defined covariates. All four locations could point to the same database schema.

  • nestingCohortDatabaseSchema is optionally used to define a cohort in which the analysis is nested, for example a cohort of diabetics patients.

  • -
- -

All five locations could point to the same database schema.

+

All five locations could point to the same database schema.

+
+
- - - + + diff --git a/docs/reference/getModel.html b/docs/reference/getModel.html index e7cca52..ecf5a22 100644 --- a/docs/reference/getModel.html +++ b/docs/reference/getModel.html @@ -1,67 +1,12 @@ - - - - - - - -Output the full model — getModel • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Output the full model — getModel • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,48 +68,43 @@

Output the full model

Output the full model

-
getModel(sccsModel)
- -

Arguments

- - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

- -

Value

+
+
getModel(sccsModel)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
+
+

Value

A data frame with the coefficients and confidence intervals (when not-regularized) for all covariates in the model.

+
+
- - - + + diff --git a/docs/reference/hasAgeEffect.html b/docs/reference/hasAgeEffect.html index f9e1610..2943ec9 100644 --- a/docs/reference/hasAgeEffect.html +++ b/docs/reference/hasAgeEffect.html @@ -1,67 +1,12 @@ - - - - - - - -Does the model contain an age effect? — hasAgeEffect • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Does the model contain an age effect? — hasAgeEffect • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,47 +68,42 @@

Does the model contain an age effect?

Does the model contain an age effect?

-
hasAgeEffect(sccsModel)
- -

Arguments

- - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

- -

Value

+
+
hasAgeEffect(sccsModel)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
+
+

Value

TRUE if the model contains an age effect, otherwise FALSE.

+
+
- - - + + diff --git a/docs/reference/hasCalendarTimeEffect.html b/docs/reference/hasCalendarTimeEffect.html index 64b086f..a6ee0fa 100644 --- a/docs/reference/hasCalendarTimeEffect.html +++ b/docs/reference/hasCalendarTimeEffect.html @@ -1,67 +1,12 @@ - - - - - - - -Does the model contain an age effect? — hasCalendarTimeEffect • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Does the model contain an age effect? — hasCalendarTimeEffect • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,47 +68,42 @@

Does the model contain an age effect?

Does the model contain an age effect?

-
hasCalendarTimeEffect(sccsModel)
- -

Arguments

- - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

- -

Value

+
+
hasCalendarTimeEffect(sccsModel)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
+
+

Value

TRUE if the model contains an age effect, otherwise FALSE.

+
+
- - - + + diff --git a/docs/reference/hasSeasonality.html b/docs/reference/hasSeasonality.html index a98a01b..599d6f9 100644 --- a/docs/reference/hasSeasonality.html +++ b/docs/reference/hasSeasonality.html @@ -1,67 +1,12 @@ - - - - - - - -Does the model contain an age effect? — hasSeasonality • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Does the model contain an age effect? — hasSeasonality • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,47 +68,42 @@

Does the model contain an age effect?

Does the model contain an age effect?

-
hasSeasonality(sccsModel)
- -

Arguments

- - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

- -

Value

+
+
hasSeasonality(sccsModel)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
+
+

Value

TRUE if the model contains an age effect, otherwise FALSE.

+
+
- - - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 10f0ded..fcd5462 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,66 +1,12 @@ - - - - - - - -Function reference • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
- - - - - - - - - - -
-

All functions

+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+

All functions

+

computeMdrr()

Compute the minimum detectable relative risk

+

computeTimeStability()

Compute stability of outcome rate over time

+

createAgeCovariateSettings()

Create age covariate settings

+

createCalendarTimeCovariateSettings()

Create calendar time settings

+

createControlIntervalSettings()

Create control interval settings

+

createCreateSccsIntervalDataArgs()

Create a parameter object for the function createSccsIntervalData

+

createCreateScriIntervalDataArgs()

Create a parameter object for the function createScriIntervalData

+

createCreateStudyPopulationArgs()

Create a parameter object for the function createStudyPopulation

+

createEraCovariateSettings()

Create era covariate settings

+

createExposureOutcome()

Create a exposure-outcome combination.

+

createFitSccsModelArgs()

Create a parameter object for the function fitSccsModel

+

createGetDbSccsDataArgs()

Create a parameter object for the function getDbSccsData

+

createSccsAnalysis()

Create a SelfControlledCaseSeries analysis specification

+

createSccsIntervalData()

Create SCCS era data

+

createSccsSimulationSettings()

Create SCCS simulation settings

+

createScriIntervalData()

Create Self-Controlled Risk Interval (SCRI) era data

+

createSeasonalityCovariateSettings()

Create seasonality settings

+

createSimulationRiskWindow()

Create a risk window definition for simulation

+

createStudyPopulation()

Create a study population

+

cyclicSplineDesign()

Create a design matrix for a cyclic spline

+

fitSccsModel()

Fit the SCCS model

+

getAttritionTable()

Get the attrition table for a population

+

getDbSccsData()

Load data for SCCS from the database

+

getModel()

Output the full model

+

hasAgeEffect()

Does the model contain an age effect?

+

hasCalendarTimeEffect()

Does the model contain an age effect?

+

hasSeasonality()

Does the model contain an age effect?

+

isSccsData()

Check whether an object is a SccsData object

+

isSccsIntervalData()

Check whether an object is a SccsIntervalData object

+

loadExposureOutcomeList()

Load a list of exposureOutcome from file

+

loadSccsAnalysisList()

Load a list of sccsAnalysis from file

+

loadSccsData()

Load the cohort method data from a file

+

loadSccsIntervalData()

Load the cohort method data from a file

+

plotAgeEffect()

Plot the age effect

+

plotAgeSpans()

Plot the age ranges spanned by each observation period.

+

plotCalendarTimeEffect()

Plot the calendar time effect

+

plotCalendarTimeSpans()

Plot the calendar time ranges spanned by each observation period.

+

plotEventObservationDependence()

Plot time from event to observation end for censored and uncensored time.

+

plotEventToCalendarTime()

Plot the count of events over calendar time.

+

plotExposureCentered()

Plot information centered around the start of exposure

+

plotSeasonality()

Plot the seasonality effect

+

runSccsAnalyses()

Run a list of analyses

+

saveExposureOutcomeList()

Save a list of exposureOutcome to file

+

saveSccsAnalysisList()

Save a list of sccsAnalysis to file

+

saveSccsData()

Save the cohort method data to file

+

saveSccsIntervalData()

Save the cohort method data to file

+

show(<SccsData>) summary(<SccsData>)

SCCS Data

+

show(<SccsIntervalData>) summary(<SccsIntervalData>)

SCCS Interval Data

+

simulateSccsData()

Simulate SCCS data

+

summarizeSccsAnalyses()

Create a summary report of the analyses

- +
+
-
- +
- - + + diff --git a/docs/reference/isSccsData.html b/docs/reference/isSccsData.html index 9ddcab2..8ac7695 100644 --- a/docs/reference/isSccsData.html +++ b/docs/reference/isSccsData.html @@ -1,67 +1,12 @@ - - - - - - - -Check whether an object is a SccsData object — isSccsData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Check whether an object is a SccsData object — isSccsData • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,46 +68,41 @@

Check whether an object is a SccsData object

Check whether an object is a SccsData object

-
isSccsData(x)
- -

Arguments

- - - - - - -
x

The object to check.

- -

Value

+
+
isSccsData(x)
+
+
+

Arguments

+
x
+

The object to check.

+
+
+

Value

A logical value.

+
+
- - - + + diff --git a/docs/reference/isSccsIntervalData.html b/docs/reference/isSccsIntervalData.html index 52422f5..e167ff3 100644 --- a/docs/reference/isSccsIntervalData.html +++ b/docs/reference/isSccsIntervalData.html @@ -1,67 +1,12 @@ - - - - - - - -Check whether an object is a SccsIntervalData object — isSccsIntervalData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Check whether an object is a SccsIntervalData object — isSccsIntervalData • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,46 +68,41 @@

Check whether an object is a SccsIntervalData object

Check whether an object is a SccsIntervalData object

-
isSccsIntervalData(x)
- -

Arguments

- - - - - - -
x

The object to check.

- -

Value

+
+
isSccsIntervalData(x)
+
+
+

Arguments

+
x
+

The object to check.

+
+
+

Value

A logical value.

+
+
- - - + + diff --git a/docs/reference/loadExposureOutcomeList.html b/docs/reference/loadExposureOutcomeList.html index e87d5ae..3eed187 100644 --- a/docs/reference/loadExposureOutcomeList.html +++ b/docs/reference/loadExposureOutcomeList.html @@ -1,67 +1,12 @@ - - - - - - - -Load a list of exposureOutcome from file — loadExposureOutcomeList • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Load a list of exposureOutcome from file — loadExposureOutcomeList • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,46 +68,41 @@

Load a list of exposureOutcome from file

Load a list of objects of type exposureOutcome from file. The file is in JSON format.

-
loadExposureOutcomeList(file)
- -

Arguments

- - - - - - -
file

The name of the file

- -

Value

+
+
loadExposureOutcomeList(file)
+
+
+

Arguments

+
file
+

The name of the file

+
+
+

Value

A list of objects of type exposureOutcome.

+
+
- - - + + diff --git a/docs/reference/loadSccsAnalysisList.html b/docs/reference/loadSccsAnalysisList.html index c3f314a..f1f998d 100644 --- a/docs/reference/loadSccsAnalysisList.html +++ b/docs/reference/loadSccsAnalysisList.html @@ -1,67 +1,12 @@ - - - - - - - -Load a list of sccsAnalysis from file — loadSccsAnalysisList • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Load a list of sccsAnalysis from file — loadSccsAnalysisList • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,46 +68,41 @@

Load a list of sccsAnalysis from file

Load a list of objects of type sccsAnalysis from file. The file is in JSON format.

-
loadSccsAnalysisList(file)
- -

Arguments

- - - - - - -
file

The name of the file

- -

Value

+
+
loadSccsAnalysisList(file)
+
+
+

Arguments

+
file
+

The name of the file

+
+
+

Value

A list of objects of type sccsAnalysis.

+
+
- - - + + diff --git a/docs/reference/loadSccsData.html b/docs/reference/loadSccsData.html index 5ffb0ee..fec9d62 100644 --- a/docs/reference/loadSccsData.html +++ b/docs/reference/loadSccsData.html @@ -1,67 +1,12 @@ - - - - - - - -Load the cohort method data from a file — loadSccsData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Load the cohort method data from a file — loadSccsData • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
-

Loads an object of type SccsData from a file in the file system.

+

Loads an object of type SccsData from a file in the file system.

-
loadSccsData(file)
- -

Arguments

- - - - - - -
file

The name of the file containing the data.

- -

Value

+
+
loadSccsData(file)
+
-

An object of class SccsData.

+
+

Arguments

+
file
+

The name of the file containing the data.

+
+
+

Value

+

An object of class SccsData.

+
+
-
- +
- - + + diff --git a/docs/reference/loadSccsIntervalData.html b/docs/reference/loadSccsIntervalData.html index 8cbbaba..facd9dc 100644 --- a/docs/reference/loadSccsIntervalData.html +++ b/docs/reference/loadSccsIntervalData.html @@ -1,67 +1,12 @@ - - - - - - - -Load the cohort method data from a file — loadSccsIntervalData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Load the cohort method data from a file — loadSccsIntervalData • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
-

Loads an object of type SccsIntervalData from a file in the file system.

+

Loads an object of type SccsIntervalData from a file in the file system.

-
loadSccsIntervalData(file)
- -

Arguments

- - - - - - -
file

The name of the file containing the data.

- -

Value

+
+
loadSccsIntervalData(file)
+
-

An object of class SccsIntervalData.

+
+

Arguments

+
file
+

The name of the file containing the data.

+
+
+

Value

+

An object of class SccsIntervalData.

+
+
-
- +
- - + + diff --git a/docs/reference/plotAgeEffect.html b/docs/reference/plotAgeEffect.html index d6c79fa..d344231 100644 --- a/docs/reference/plotAgeEffect.html +++ b/docs/reference/plotAgeEffect.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the age effect — plotAgeEffect • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the age effect — plotAgeEffect • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,63 +68,53 @@

Plot the age effect

Plot the age effect

-
plotAgeEffect(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

rrLim

The limits on the incidence rate ratio scale in the plot.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. See the -function ggsave in the ggplot2 package for supported file formats.

- -

Value

+
+
plotAgeEffect(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
rrLim
+

The limits on the incidence rate ratio scale in the plot.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. See the +function ggsave in the ggplot2 package for supported file formats.

+
+
+

Value

A Ggplot object. Use the ggsave function to save to file.

-

Details

- +
+
+

Details

Plot the spline curve of the age effect.

+
+
- - - + + diff --git a/docs/reference/plotAgeSpans.html b/docs/reference/plotAgeSpans.html index 3c2f740..8cf2873 100644 --- a/docs/reference/plotAgeSpans.html +++ b/docs/reference/plotAgeSpans.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the age ranges spanned by each observation period. — plotAgeSpans • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the age ranges spanned by each observation period. — plotAgeSpans • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,69 +68,59 @@

Plot the age ranges spanned by each observation period.

Plot the age ranges spanned by each observation period.

-
plotAgeSpans(
-  studyPopulation,
-  maxPersons = 10000,
-  title = NULL,
-  fileName = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

maxPersons

The maximum number of persons to plot. If there are more than this number of persons -a random sample will be taken to avoid visual clutter.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. -See the function ggplot2::ggsave() for supported file formats.

- -

Value

- -

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different -format.

-

Details

+
+
plotAgeSpans(
+  studyPopulation,
+  maxPersons = 10000,
+  title = NULL,
+  fileName = NULL
+)
+
+
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
maxPersons
+

The maximum number of persons to plot. If there are more than this number of persons +a random sample will be taken to avoid visual clutter.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. +See the function ggplot2::ggsave() for supported file formats.

+
+
+

Value

+

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different +format.

+
+
+

Details

Plots a line per patient from their age at observation start to their age at observation end.

+
+
- - - + + diff --git a/docs/reference/plotCalendarTimeEffect.html b/docs/reference/plotCalendarTimeEffect.html index d7985f3..0ca9396 100644 --- a/docs/reference/plotCalendarTimeEffect.html +++ b/docs/reference/plotCalendarTimeEffect.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the calendar time effect — plotCalendarTimeEffect • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the calendar time effect — plotCalendarTimeEffect • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,68 +68,58 @@

Plot the calendar time effect

Plot the calendar time effect

-
plotCalendarTimeEffect(
-  sccsModel,
-  rrLim = c(0.1, 10),
-  title = NULL,
-  fileName = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

rrLim

The limits on the incidence rate ratio scale in the plot.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. See the -function ggsave in the ggplot2 package for supported file formats.

- -

Value

+
+
plotCalendarTimeEffect(
+  sccsModel,
+  rrLim = c(0.1, 10),
+  title = NULL,
+  fileName = NULL
+)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
rrLim
+

The limits on the incidence rate ratio scale in the plot.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. See the +function ggsave in the ggplot2 package for supported file formats.

+
+
+

Value

A Ggplot object. Use the ggsave function to save to file.

-

Details

- +
+
+

Details

Plot the spline curve of the calendar time effect.

+
+
- - - + + diff --git a/docs/reference/plotCalendarTimeSpans.html b/docs/reference/plotCalendarTimeSpans.html index 10fde1e..9729c5a 100644 --- a/docs/reference/plotCalendarTimeSpans.html +++ b/docs/reference/plotCalendarTimeSpans.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the calendar time ranges spanned by each observation period. — plotCalendarTimeSpans • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the calendar time ranges spanned by each observation period. — plotCalendarTimeSpans • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,69 +68,59 @@

Plot the calendar time ranges spanned by each observation period.

Plot the calendar time ranges spanned by each observation period.

-
plotCalendarTimeSpans(
-  studyPopulation,
-  maxPersons = 10000,
-  title = NULL,
-  fileName = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

maxPersons

The maximum number of persons to plot. If there are more than this number of persons -a random sample will be taken to avoid visual clutter.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. -See the function ggplot2::ggsave() for supported file formats.

- -

Value

- -

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different -format.

-

Details

+
+
plotCalendarTimeSpans(
+  studyPopulation,
+  maxPersons = 10000,
+  title = NULL,
+  fileName = NULL
+)
+
+
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
maxPersons
+

The maximum number of persons to plot. If there are more than this number of persons +a random sample will be taken to avoid visual clutter.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. +See the function ggplot2::ggsave() for supported file formats.

+
+
+

Value

+

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different +format.

+
+
+

Details

Plots a line per patient from their observation start to their observation end.

+
+
- - - + + diff --git a/docs/reference/plotEventObservationDependence.html b/docs/reference/plotEventObservationDependence.html index 3923eca..b012cc8 100644 --- a/docs/reference/plotEventObservationDependence.html +++ b/docs/reference/plotEventObservationDependence.html @@ -1,67 +1,12 @@ - - - - - - - -Plot time from event to observation end for censored and uncensored time. — plotEventObservationDependence • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot time from event to observation end for censored and uncensored time. — plotEventObservationDependence • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,68 +68,61 @@

Plot time from event to observation end for censored and uncensored time.Plot time from event to observation end for censored and uncensored time.

-
plotEventObservationDependence(studyPopulation, title = NULL, fileName = NULL)
- -

Arguments

- - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. -See the function ggplot2::ggsave() for supported file formats.

- -

Value

- -

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different -format.

-

Details

+
+
plotEventObservationDependence(studyPopulation, title = NULL, fileName = NULL)
+
+
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. +See the function ggplot2::ggsave() for supported file formats.

+
+
+

Value

+

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different +format.

+
+
+

Details

This plot shows whether there is a difference in time between (first) event and the observation period end for periods that are ' censored' and those that are 'uncensored'. By 'censored' we mean periods that end before we would normally expect. Here, we define periods to be uncensored if they end at either the study end date (if specified), database end date (i.e. the date after which no data is captured in the database), or maximum age (if specified). All other periods are assumed to be censored.

As proposed by Farrington et al., by comparing the two plots, we can gain some insight into whether the censoring is dependent on the occurrence of the event.

-

References

- +
+
+

References

Farrington P, Whitaker H, Ghebremichael Weldeselassie Y (2018), Self-controlled case series studies: A modelling guide with R, Taylor & Francis

+
+
- - - + + diff --git a/docs/reference/plotEventToCalendarTime.html b/docs/reference/plotEventToCalendarTime.html index 3190f33..985b49c 100644 --- a/docs/reference/plotEventToCalendarTime.html +++ b/docs/reference/plotEventToCalendarTime.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the count of events over calendar time. — plotEventToCalendarTime • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the count of events over calendar time. — plotEventToCalendarTime • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,67 +68,56 @@

Plot the count of events over calendar time.

Plot the count of events over calendar time.

-
plotEventToCalendarTime(
-  studyPopulation,
-  sccsModel = NULL,
-  title = NULL,
-  fileName = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

sccsModel

Optional: A fitted SCCS model as created using fitSccsModel(). If the +

+
plotEventToCalendarTime(
+  studyPopulation,
+  sccsModel = NULL,
+  title = NULL,
+  fileName = NULL
+)
+
+ +
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
sccsModel
+

Optional: A fitted SCCS model as created using fitSccsModel(). If the model contains splines for seasonality and or calendar time a panel will -be added with outcome counts adjusted for these splines.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. -See the function ggplot2::ggsave() for supported file formats.

- -

Value

- -

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different +be added with outcome counts adjusted for these splines.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. +See the function ggplot2::ggsave() for supported file formats.

+
+
+

Value

+

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different format.

+
+
- - - + + diff --git a/docs/reference/plotExposureCentered.html b/docs/reference/plotExposureCentered.html index 612eabb..5c02387 100644 --- a/docs/reference/plotExposureCentered.html +++ b/docs/reference/plotExposureCentered.html @@ -1,67 +1,12 @@ - - - - - - - -Plot information centered around the start of exposure — plotExposureCentered • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot information centered around the start of exposure — plotExposureCentered • SelfControlledCaseSeries - - + + - - -
-
- -
- -
+
@@ -138,82 +68,68 @@

Plot information centered around the start of exposure

Plot information centered around the start of exposure

-
plotExposureCentered(
-  studyPopulation,
-  sccsData,
-  exposureEraId = NULL,
-  highlightExposedEvents = TRUE,
-  title = NULL,
-  fileName = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - -
studyPopulation

An object created using the createStudyPopulation() function.

sccsData

An object of type SccsData as created using the -getDbSccsData function.

exposureEraId

The exposure to create the era data for. If not specified it is +

+
plotExposureCentered(
+  studyPopulation,
+  sccsData,
+  exposureEraId = NULL,
+  highlightExposedEvents = TRUE,
+  title = NULL,
+  fileName = NULL
+)
+
+ +
+

Arguments

+
studyPopulation
+

An object created using the createStudyPopulation() function.

+
sccsData
+

An object of type SccsData as created using the +getDbSccsData function.

+
exposureEraId
+

The exposure to create the era data for. If not specified it is assumed to be the one exposure for which the data was loaded from -the database.

highlightExposedEvents

Highlight events that occurred during the exposure era using a different color?

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. -See the function ggplot2::ggsave() for supported file formats.

- -

Value

- -

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different +the database.

+
highlightExposedEvents
+

Highlight events that occurred during the exposure era using a different color?

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. +See the function ggplot2::ggsave() for supported file formats.

+
+
+

Value

+

A ggplot object. Use the ggplot2::ggsave() function to save to file in a different format.

-

Details

- +
+
+

Details

This plot shows the number of events and the number of subjects under observation in week-sized intervals relative to the start of the first exposure.

+
+
- - - + + diff --git a/docs/reference/plotSeasonality.html b/docs/reference/plotSeasonality.html index 08bb6be..5139fef 100644 --- a/docs/reference/plotSeasonality.html +++ b/docs/reference/plotSeasonality.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the seasonality effect — plotSeasonality • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the seasonality effect — plotSeasonality • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,63 +68,53 @@

Plot the seasonality effect

Plot the seasonality effect

-
plotSeasonality(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
sccsModel

An object of type sccsModel as created using the -fitSccsModel function.

rrLim

The limits on the incidence rate ratio scale in the plot.

title

Optional: the main title for the plot

fileName

Name of the file where the plot should be saved, for example 'plot.png'. See the -function ggsave in the ggplot2 package for supported file formats.

- -

Value

+
+
plotSeasonality(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL)
+
+
+

Arguments

+
sccsModel
+

An object of type sccsModel as created using the +fitSccsModel function.

+
rrLim
+

The limits on the incidence rate ratio scale in the plot.

+
title
+

Optional: the main title for the plot

+
fileName
+

Name of the file where the plot should be saved, for example 'plot.png'. See the +function ggsave in the ggplot2 package for supported file formats.

+
+
+

Value

A Ggplot object. Use the ggsave function to save to file.

-

Details

- +
+
+

Details

Plot the spline curve of the seasonality effect.

+
+
- - - + + diff --git a/docs/reference/runSccsAnalyses.html b/docs/reference/runSccsAnalyses.html index 0dad74b..215cd92 100644 --- a/docs/reference/runSccsAnalyses.html +++ b/docs/reference/runSccsAnalyses.html @@ -1,67 +1,12 @@ - - - - - - - -Run a list of analyses — runSccsAnalyses • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run a list of analyses — runSccsAnalyses • SelfControlledCaseSeries - - - - + + -
-
- -
- -
+
@@ -138,173 +68,128 @@

Run a list of analyses

Run a list of analyses

-
runSccsAnalyses(
-  connectionDetails,
-  cdmDatabaseSchema,
-  oracleTempSchema = NULL,
-  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
-  exposureDatabaseSchema = cdmDatabaseSchema,
-  exposureTable = "drug_era",
-  outcomeDatabaseSchema = cdmDatabaseSchema,
-  outcomeTable = "condition_era",
-  customCovariateDatabaseSchema = cdmDatabaseSchema,
-  customCovariateTable = "cohort",
-  nestingCohortDatabaseSchema = cdmDatabaseSchema,
-  nestingCohortTable = "cohort",
-  cdmVersion = 5,
-  outputFolder = "./SccsOutput",
-  sccsAnalysisList,
-  exposureOutcomeList,
-  combineDataFetchAcrossOutcomes = TRUE,
-  getDbSccsDataThreads = 1,
-  createStudyPopulationThreads = 1,
-  createSccsIntervalDataThreads = 1,
-  fitSccsModelThreads = 1,
-  cvThreads = 1,
-  analysesToExclude = NULL
-)
+
+
runSccsAnalyses(
+  connectionDetails,
+  cdmDatabaseSchema,
+  oracleTempSchema = NULL,
+  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
+  exposureDatabaseSchema = cdmDatabaseSchema,
+  exposureTable = "drug_era",
+  outcomeDatabaseSchema = cdmDatabaseSchema,
+  outcomeTable = "condition_era",
+  customCovariateDatabaseSchema = cdmDatabaseSchema,
+  customCovariateTable = "cohort",
+  nestingCohortDatabaseSchema = cdmDatabaseSchema,
+  nestingCohortTable = "cohort",
+  cdmVersion = 5,
+  outputFolder = "./SccsOutput",
+  sccsAnalysisList,
+  exposureOutcomeList,
+  combineDataFetchAcrossOutcomes = TRUE,
+  getDbSccsDataThreads = 1,
+  createStudyPopulationThreads = 1,
+  createSccsIntervalDataThreads = 1,
+  fitSccsModelThreads = 1,
+  cvThreads = 1,
+  analysesToExclude = NULL
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
connectionDetails

An R object of type ConnectionDetails created using -the function DatabaseConnector::createConnectionDetails().

cdmDatabaseSchema

The name of the database schema that contains the OMOP CDM +

+

Arguments

+
connectionDetails
+

An R object of type ConnectionDetails created using +the function DatabaseConnector::createConnectionDetails().

+
cdmDatabaseSchema
+

The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should specify both the database and the -schema, so for example 'cdm_instance.dbo'.

oracleTempSchema

DEPRECATED: use tempEmulationSchema instead.

tempEmulationSchema

Some database platforms like Oracle and Impala do not truly support temp tables. To +schema, so for example 'cdm_instance.dbo'.

+
oracleTempSchema
+

DEPRECATED: use tempEmulationSchema instead.

+
tempEmulationSchema
+

Some database platforms like Oracle and Impala do not truly support temp tables. To emulate temp tables, provide a schema with write privileges where temp tables -can be created.

exposureDatabaseSchema

The name of the database schema that is the location where +can be created.

+
exposureDatabaseSchema
+

The name of the database schema that is the location where the exposure data used to define the exposure cohorts is available. If exposureTable = "DRUG_ERA", exposureDatabaseSchema is not used but assumed to be -cdmDatabaseSchema. Requires read permissions to this database.

exposureTable

The table name that contains the exposure cohorts. If +cdmDatabaseSchema. Requires read permissions to this database.

+
exposureTable
+

The table name that contains the exposure cohorts. If exposureTable <> "DRUG_ERA", then expectation is exposureTable has format of COHORT table: cohort_concept_id, SUBJECT_ID, -COHORT_START_DATE, COHORT_END_DATE.

outcomeDatabaseSchema

The name of the database schema that is the location where +COHORT_START_DATE, COHORT_END_DATE.

+
outcomeDatabaseSchema
+

The name of the database schema that is the location where the data used to define the outcome cohorts is available. If outcomeTable = "CONDITION_ERA", outcomeDatabaseSchema is not -used. Requires read permissions to this database.

outcomeTable

The table name that contains the outcome cohorts. If +used. Requires read permissions to this database.

+
outcomeTable
+

The table name that contains the outcome cohorts. If outcomeTable is not CONDITION_OCCURRENCE or CONDITION_ERA, then expectation is outcomeTable has format of COHORT table: COHORT_DEFINITION_ID, SUBJECT_ID, COHORT_START_DATE, -COHORT_END_DATE.

customCovariateDatabaseSchema

The name of the database schema that is the location where -the custom covariate data is available.

customCovariateTable

Name of the table holding the custom covariates. This table -should have the same structure as the cohort table.

nestingCohortDatabaseSchema

The name of the database schema that is the location -where the nesting cohort is defined.

nestingCohortTable

Name of the table holding the nesting cohort. This table -should have the same structure as the cohort table.

cdmVersion

Define the OMOP CDM version used: currently support "4" and -"5".

outputFolder

Name of the folder where all the outputs will written to.

sccsAnalysisList

A list of objects of sccsAnalysis as created -using the createSccsAnalysis() function.

exposureOutcomeList

A list of objects of type exposureOutcome as created -using the createExposureOutcome() function.

combineDataFetchAcrossOutcomes

Should fetching data from the database be done one outcome +COHORT_END_DATE.

+
customCovariateDatabaseSchema
+

The name of the database schema that is the location where +the custom covariate data is available.

+
customCovariateTable
+

Name of the table holding the custom covariates. This table +should have the same structure as the cohort table.

+
nestingCohortDatabaseSchema
+

The name of the database schema that is the location +where the nesting cohort is defined.

+
nestingCohortTable
+

Name of the table holding the nesting cohort. This table +should have the same structure as the cohort table.

+
cdmVersion
+

Define the OMOP CDM version used: currently support "4" and +"5".

+
outputFolder
+

Name of the folder where all the outputs will written to.

+
sccsAnalysisList
+

A list of objects of sccsAnalysis as created +using the createSccsAnalysis() function.

+
exposureOutcomeList
+

A list of objects of type exposureOutcome as created +using the createExposureOutcome() function.

+
combineDataFetchAcrossOutcomes
+

Should fetching data from the database be done one outcome at a time, or for all outcomes in one fetch? Combining fetches will be more efficient if there is large overlap in -the subjects that have the different outcomes.

getDbSccsDataThreads

The number of parallel threads to use for building the -SccsData objects.

createStudyPopulationThreads

The number of parallel threads to use for building the -studyPopulation objects.

createSccsIntervalDataThreads

The number of parallel threads to use for building the -SccsIntervalData objects.

fitSccsModelThreads

The number of parallel threads to use for fitting the -models.

cvThreads

The number of parallel threads to use for the cross- +the subjects that have the different outcomes.

+
getDbSccsDataThreads
+

The number of parallel threads to use for building the +SccsData objects.

+
createStudyPopulationThreads
+

The number of parallel threads to use for building the +studyPopulation objects.

+
createSccsIntervalDataThreads
+

The number of parallel threads to use for building the +SccsIntervalData objects.

+
fitSccsModelThreads
+

The number of parallel threads to use for fitting the +models.

+
cvThreads
+

The number of parallel threads to use for the cross- validation when estimating the hyperparameter for the outcome model. Note that the total number of CV threads at -one time could be fitSccsModelThreads * cvThreads.

analysesToExclude

Analyses to exclude. See the Analyses to Exclude section for -details.

- -

Value

- +one time could be fitSccsModelThreads * cvThreads.

+
analysesToExclude
+

Analyses to exclude. See the Analyses to Exclude section for +details.

+
+
+

Value

A tibble describing for each exposure-outcome-analysisId combination where the intermediary and outcome model files can be found, relative to the outputFolder.

-

Details

- +
+
+

Details

Run a list of analyses for the drug-comparator-outcomes of interest. This function will run all specified analyses against all hypotheses of interest, meaning that the total number of outcome models is length(cmAnalysisList) * length(drugComparatorOutcomesList) (if all analyses specify an @@ -312,48 +197,44 @@

Details the analyses have anything in common, and will take advantage of this fact. For example, if we specify several analyses that only differ in the way the outcome model is fitted, then this function will extract the data and fit the propensity model only once, and re-use this in all the -analysis.

Analyses to Exclude

+analysis.

+

Analyses to Exclude

Normally, runSccsAnalyses will run all combinations of exposure-outcome-analyses settings. However, sometimes we may not need all those combinations. Using the analysesToExclude argument, we can remove certain items from the full matrix. This argument should be a data frame with at least -one of the following columns:

    -
  • exposureId

  • +one of the following columns:

    • exposureId

    • outcomeId

    • analysisId

    • -
    - -

    This data frame will be joined to the outcome model reference table before executing, and matching rows +

This data frame will be joined to the outcome model reference table before executing, and matching rows will be removed. For example, if one specifies only one exposure ID and analysis ID, then any analyses with that exposure and that analysis ID will be skipped.

+
+
+
- - - + + diff --git a/docs/reference/saveExposureOutcomeList.html b/docs/reference/saveExposureOutcomeList.html index 07120a4..1f24bf2 100644 --- a/docs/reference/saveExposureOutcomeList.html +++ b/docs/reference/saveExposureOutcomeList.html @@ -1,67 +1,12 @@ - - - - - - - -Save a list of exposureOutcome to file — saveExposureOutcomeList • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Save a list of exposureOutcome to file — saveExposureOutcomeList • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,47 +68,39 @@

Save a list of exposureOutcome to file

Write a list of objects of type exposureOutcome to file. The file is in JSON format.

-
saveExposureOutcomeList(exposureOutcomeList, file)
- -

Arguments

- - - - - - - - - - -
exposureOutcomeList

The exposureOutcome list to be written to file

file

The name of the file where the results will be written

+
+
saveExposureOutcomeList(exposureOutcomeList, file)
+
+
+

Arguments

+
exposureOutcomeList
+

The exposureOutcome list to be written to file

+
file
+

The name of the file where the results will be written

+
+
- - - + + diff --git a/docs/reference/saveSccsAnalysisList.html b/docs/reference/saveSccsAnalysisList.html index 779fe6d..2bb5ab1 100644 --- a/docs/reference/saveSccsAnalysisList.html +++ b/docs/reference/saveSccsAnalysisList.html @@ -1,67 +1,12 @@ - - - - - - - -Save a list of sccsAnalysis to file — saveSccsAnalysisList • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Save a list of sccsAnalysis to file — saveSccsAnalysisList • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
@@ -138,47 +68,39 @@

Save a list of sccsAnalysis to file

Write a list of objects of type sccsAnalysis to file. The file is in JSON format.

-
saveSccsAnalysisList(sccsAnalysisList, file)
- -

Arguments

- - - - - - - - - - -
sccsAnalysisList

The sccsAnalysis list to be written to file

file

The name of the file where the results will be written

+
+
saveSccsAnalysisList(sccsAnalysisList, file)
+
+
+

Arguments

+
sccsAnalysisList
+

The sccsAnalysis list to be written to file

+
file
+

The name of the file where the results will be written

+
+
- - - + + diff --git a/docs/reference/saveSccsData.html b/docs/reference/saveSccsData.html index e15166c..5f825dd 100644 --- a/docs/reference/saveSccsData.html +++ b/docs/reference/saveSccsData.html @@ -1,67 +1,12 @@ - - - - - - - -Save the cohort method data to file — saveSccsData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Save the cohort method data to file — saveSccsData • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
-

Saves an object of type SccsData to a file.

+

Saves an object of type SccsData to a file.

-
saveSccsData(SccsData, file)
- -

Arguments

- - - - - - - - - - -
SccsData

An object of type SccsData as generated using -getDbSccsData().

file

The name of the file where the data will be written. If the file already -exists it will be overwritten.

- -

Value

+
+
saveSccsData(SccsData, file)
+
+
+

Arguments

+
SccsData
+

An object of type SccsData as generated using +getDbSccsData().

+
file
+

The name of the file where the data will be written. If the file already +exists it will be overwritten.

+
+
+

Value

Returns no output.

+
+
-
- +
- - + + diff --git a/docs/reference/saveSccsIntervalData.html b/docs/reference/saveSccsIntervalData.html index bf9cdc3..d9d0ddf 100644 --- a/docs/reference/saveSccsIntervalData.html +++ b/docs/reference/saveSccsIntervalData.html @@ -1,67 +1,12 @@ - - - - - - - -Save the cohort method data to file — saveSccsIntervalData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Save the cohort method data to file — saveSccsIntervalData • SelfControlledCaseSeries + + - - - - -
-
- -
- -
+
-

Saves an object of type SccsIntervalData to a file.

+

Saves an object of type SccsIntervalData to a file.

-
saveSccsIntervalData(SccsIntervalData, file)
- -

Arguments

- - - - - - - - - - -
SccsIntervalData

An object of type SccsIntervalData as generated using -createSccsIntervalData().

file

The name of the file where the data will be written. If the file already -exists it will be overwritten.

- -

Value

+
+
saveSccsIntervalData(SccsIntervalData, file)
+
+
+

Arguments

+
SccsIntervalData
+

An object of type SccsIntervalData as generated using +createSccsIntervalData().

+
file
+

The name of the file where the data will be written. If the file already +exists it will be overwritten.

+
+
+

Value

Returns no output.

+
+
-
- +
- - + + diff --git a/docs/reference/simulateSccsData.html b/docs/reference/simulateSccsData.html index e50d5a6..ac3d2cf 100644 --- a/docs/reference/simulateSccsData.html +++ b/docs/reference/simulateSccsData.html @@ -1,67 +1,12 @@ - - - - - - - -Simulate SCCS data — simulateSccsData • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simulate SCCS data — simulateSccsData • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,51 +68,44 @@

Simulate SCCS data

Simulate SCCS data

-
simulateSccsData(nCases, settings)
- -

Arguments

- - - - - - - - - - -
nCases

The number of cases to simulate.

settings

An object of type sccsSimulationSettings as created using the -createSccsSimulationSettings.

- -

Value

+
+
simulateSccsData(nCases, settings)
+
+
+

Arguments

+
nCases
+

The number of cases to simulate.

+
settings
+

An object of type sccsSimulationSettings as created using the +createSccsSimulationSettings.

+
+
+

Value

An object of type sccsData.

+
+
- - - + + diff --git a/docs/reference/summarizeSccsAnalyses.html b/docs/reference/summarizeSccsAnalyses.html index a2e0acb..10685bb 100644 --- a/docs/reference/summarizeSccsAnalyses.html +++ b/docs/reference/summarizeSccsAnalyses.html @@ -1,67 +1,12 @@ - - - - - - - -Create a summary report of the analyses — summarizeSccsAnalyses • SelfControlledCaseSeries - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a summary report of the analyses — summarizeSccsAnalyses • SelfControlledCaseSeries - + + - - - -
-
- -
- -
+
@@ -138,50 +68,43 @@

Create a summary report of the analyses

Create a summary report of the analyses

-
summarizeSccsAnalyses(referenceTable, outputFolder)
- -

Arguments

- - - - - - - - - - -
referenceTable

A tibble as created by the runSccsAnalyses function.

outputFolder

Name of the folder where all the outputs have been written to.

- -

Value

+
+
summarizeSccsAnalyses(referenceTable, outputFolder)
+
+
+

Arguments

+
referenceTable
+

A tibble as created by the runSccsAnalyses function.

+
outputFolder
+

Name of the folder where all the outputs have been written to.

+
+
+

Value

A tibble containing summary statistics for each exposure-outcome-analysis combination.

+
+
- - - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..dbc464c --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,213 @@ + + + + /404.html + + + /articles/index.html + + + /articles/MultipleAnalyses.html + + + /articles/SingleStudies.html + + + /authors.html + + + /index.html + + + /news/index.html + + + /pull_request_template.html + + + /reference/computeMdrr.html + + + /reference/computeTimeStability.html + + + /reference/createAgeCovariateSettings.html + + + /reference/createAgeSettings.html + + + /reference/createCalendarTimeCovariateSettings.html + + + /reference/createControlIntervalSettings.html + + + /reference/createCovariateSettings.html + + + /reference/createCreateSccsEraDataArgs.html + + + /reference/createCreateSccsIntervalDataArgs.html + + + /reference/createCreateScriIntervalDataArgs.html + + + /reference/createCreateStudyPopulationArgs.html + + + /reference/createEraCovariateSettings.html + + + /reference/createExposureOutcome.html + + + /reference/createFitSccsModelArgs.html + + + /reference/createGetDbSccsDataArgs.html + + + /reference/createSccsAnalysis.html + + + /reference/createSccsEraData.html + + + /reference/createSccsIntervalData.html + + + /reference/createSccsSimulationSettings.html + + + /reference/createScriIntervalData.html + + + /reference/createSeasonalityCovariateSettings.html + + + /reference/createSeasonalitySettings.html + + + /reference/createSimulationRiskWindow.html + + + /reference/createStudyPopulation.html + + + /reference/cyclicSplineDesign.html + + + /reference/fitSccsModel.html + + + /reference/forceSccsEraDataIntoRam.html + + + /reference/getAttritionTable.html + + + /reference/getDbSccsData.html + + + /reference/getModel.html + + + /reference/hasAgeEffect.html + + + /reference/hasCalendarTimeEffect.html + + + /reference/hasSeasonality.html + + + /reference/index.html + + + /reference/isSccsData.html + + + /reference/isSccsIntervalData.html + + + /reference/loadExposureOutcomeList.html + + + /reference/loadSccsAnalysisList.html + + + /reference/loadSccsData.html + + + /reference/loadSccsEraData.html + + + /reference/loadSccsIntervalData.html + + + /reference/plotAgeEffect.html + + + /reference/plotAgeSpans.html + + + /reference/plotCalendarTimeEffect.html + + + /reference/plotCalendarTimeSpans.html + + + /reference/plotEventObservationDependence.html + + + /reference/plotEventToCalendarTime.html + + + /reference/plotExposureCentered.html + + + /reference/plotPerPersonData.html + + + /reference/plotSeasonality.html + + + /reference/runSccsAnalyses.html + + + /reference/saveExposureOutcomeList.html + + + /reference/saveSccsAnalysisList.html + + + /reference/saveSccsData.html + + + /reference/saveSccsEraData.html + + + /reference/saveSccsIntervalData.html + + + /reference/SccsData-class.html + + + /reference/SccsIntervalData-class.html + + + /reference/SelfControlledCaseSeries-package.html + + + /reference/SelfControlledCaseSeries.html + + + /reference/simulateSccsData.html + + + /reference/summarizeSccsAnalyses.html + + diff --git a/extras/AgeAndSeasonSimulations.R b/extras/AgeAndSeasonSimulations.R index e8cb0ef..5f49114 100644 --- a/extras/AgeAndSeasonSimulations.R +++ b/extras/AgeAndSeasonSimulations.R @@ -1,9 +1,9 @@ library(SelfControlledCaseSeries) options(andromedaTempFolder = "s:/andromedaTemp") settings <- createSccsSimulationSettings(includeAgeEffect = TRUE, - includeCalendarTimeEffect = TRUE, + includeCalendarTimeEffect = FALSE, includeSeasonality = TRUE) - +set.seed(123) sccsData <- simulateSccsData(5000, settings) # summary(sccsData) ageSettings <- createAgeCovariateSettings(ageKnots = 5, @@ -23,21 +23,15 @@ studyPop <- createStudyPopulation(sccsData = sccsData, firstOutcomeOnly = FALSE, naivePeriod = 0) -# plotAgeSpans(studyPop) -# plotCalendarTimeSpans(studyPop) -# plotEventToCalendarTime(studyPop) sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, sccsData = sccsData, eraCovariateSettings = covarSettings, ageCovariateSettings = ageSettings, seasonalityCovariateSettings = seasonalitySettings, - calendarTimeCovariateSettings = calendarTimeSettings, + # calendarTimeCovariateSettings = calendarTimeSettings, minCasesForTimeCovariates = 10000) -model <- fitSccsModel(sccsIntervalData, prior = createPrior("none")) -# # Use weak prior on age, season, and calendar time because otherwise may be ill-defined: -# model <- fitSccsModel(sccsIntervalData, prior = createPrior(priorType = "laplace", variance = 1)) - +model <- fitSccsModel(sccsIntervalData, prior = createPrior("none"), control = createControl(threads = 4)) estimate1 <- model$estimates[model$estimates$originalEraId == 1, ] estimate2 <- model$estimates[model$estimates$originalEraId == 2, ] @@ -57,10 +51,10 @@ writeLines(sprintf("True RR: %0.2f, estimate: %0.2f (%0.2f-%0.2f)", # model # plotSeasonality(model) # plotAgeEffect(model) -plotCalendarTimeEffect(model) -plotEventToCalendarTime(studyPop, model) -computeTimeStability(studyPop)$stable -computeTimeStability(studyPop, model)$stable +# plotCalendarTimeEffect(model) +# plotEventToCalendarTime(studyPop, model) +# computeTimeStability(studyPop)$stable +# computeTimeStability(studyPop, model)$stable ### Plot simulated seasonality ### estimates <- model$estimates @@ -108,7 +102,6 @@ plot <- ggplot2::ggplot(data, ggplot2::aes(x = x, y = y, group = type, color = t legend.position = "top") print(plot) -# ggplot2::ggsave("s:/temp/season.png", plot, width = 5, height = 4, dpi = 300) ### Plot simulated age effect ### @@ -162,51 +155,50 @@ print(plot) ### Plot simulated calendar time effect ### -estimates <- model$estimates -estimates <- estimates[estimates$covariateId >= 300 & estimates$covariateId < 400, ] -splineCoefs <- c(0, estimates$logRr) - -calendarTime <- seq(settings$minCalendarTime, settings$maxCalendarTime, length.out = 100) -calendarMonth <- as.numeric(format(calendarTime,'%Y')) * 12 + as.numeric(format(calendarTime,'%m')) - 1 -calendarTimeKnots <- model$metaData$calendarTime$calendarTimeKnots -calendarTimeDesignMatrix <- splines::bs(calendarMonth, - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) -logRr <- apply(calendarTimeDesignMatrix %*% splineCoefs, 1, sum) -logRr <- logRr - mean(logRr) -rr <- exp(logRr) -data <- data.frame(x = calendarTime, y = rr, type = "estimated") - -x <- calendarTime -y <- attr(sccsData, "metaData")$calendarTimeFun(x) -y <- y - mean(y) -y <- exp(y) -data <- rbind(data, data.frame(x = x, y = y, type = "simulated")) -breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 6, 8, 10) -rrLim <- c(0.1, 10) -theme <- ggplot2::element_text(colour = "#000000", size = 12) -themeRA <- ggplot2::element_text(colour = "#000000", size = 12, hjust = 1) -plot <- ggplot2::ggplot(data, ggplot2::aes(x = x, y = y, group = type, color = type)) + - ggplot2::geom_hline(yintercept = breaks, colour = "#AAAAAA", lty = 1, size = 0.2) + - ggplot2::geom_line(lwd = 1) + - ggplot2::geom_vline(xintercept = SelfControlledCaseSeries:::convertMonthToStartDate(calendarTimeKnots)) + - ggplot2::scale_x_date("calendarTime") + - ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0), - rgb(0, 0, 0.8))) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - legend.title = ggplot2::element_blank(), - legend.position = "top") -print(plot) -# SelfControlledCaseSeries:::convertMonthToStartDate(calendarTimeKnots) +# estimates <- model$estimates +# estimates <- estimates[estimates$covariateId >= 300 & estimates$covariateId < 400, ] +# splineCoefs <- c(0, estimates$logRr) +# +# calendarTime <- seq(settings$minCalendarTime, settings$maxCalendarTime, length.out = 100) +# calendarMonth <- as.numeric(format(calendarTime,'%Y')) * 12 + as.numeric(format(calendarTime,'%m')) - 1 +# calendarTimeKnots <- model$metaData$calendarTime$calendarTimeKnots +# calendarTimeDesignMatrix <- splines::bs(calendarMonth, +# knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], +# Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) +# logRr <- apply(calendarTimeDesignMatrix %*% splineCoefs, 1, sum) +# logRr <- logRr - mean(logRr) +# rr <- exp(logRr) +# data <- data.frame(x = calendarTime, y = rr, type = "estimated") +# +# x <- calendarTime +# y <- attr(sccsData, "metaData")$calendarTimeFun(x) +# y <- y - mean(y) +# y <- exp(y) +# data <- rbind(data, data.frame(x = x, y = y, type = "simulated")) +# breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 6, 8, 10) +# rrLim <- c(0.1, 10) +# theme <- ggplot2::element_text(colour = "#000000", size = 12) +# themeRA <- ggplot2::element_text(colour = "#000000", size = 12, hjust = 1) +# plot <- ggplot2::ggplot(data, ggplot2::aes(x = x, y = y, group = type, color = type)) + +# ggplot2::geom_hline(yintercept = breaks, colour = "#AAAAAA", lty = 1, size = 0.2) + +# ggplot2::geom_line(lwd = 1) + +# ggplot2::geom_vline(xintercept = SelfControlledCaseSeries:::convertMonthToStartDate(calendarTimeKnots)) + +# ggplot2::scale_x_date("calendarTime") + +# ggplot2::scale_y_continuous("Relative risk", +# limits = rrLim, +# trans = "log10", +# breaks = breaks, +# labels = breaks) + +# ggplot2::scale_color_manual(values = c(rgb(0.8, 0, 0), +# rgb(0, 0, 0.8))) + +# ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), +# panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), +# panel.grid.major = ggplot2::element_blank(), +# axis.ticks = ggplot2::element_blank(), +# axis.text.y = themeRA, +# axis.text.x = theme, +# strip.text.x = theme, +# strip.background = ggplot2::element_blank(), +# legend.title = ggplot2::element_blank(), +# legend.position = "top") +# print(plot) diff --git a/extras/SelfControlledCaseSeries.pdf b/extras/SelfControlledCaseSeries.pdf index 28cfb26..d39c5b0 100644 Binary files a/extras/SelfControlledCaseSeries.pdf and b/extras/SelfControlledCaseSeries.pdf differ diff --git a/inst/doc/MultipleAnalyses.pdf b/inst/doc/MultipleAnalyses.pdf index 16d103e..683bf5a 100644 Binary files a/inst/doc/MultipleAnalyses.pdf and b/inst/doc/MultipleAnalyses.pdf differ diff --git a/inst/doc/SingleStudies.pdf b/inst/doc/SingleStudies.pdf index fea6089..34238c5 100644 Binary files a/inst/doc/SingleStudies.pdf and b/inst/doc/SingleStudies.pdf differ diff --git a/tests/testthat/test-eraConstruction.R b/tests/testthat/test-eraConstruction.R index 77971da..3b35b5e 100644 --- a/tests/testthat/test-eraConstruction.R +++ b/tests/testthat/test-eraConstruction.R @@ -13,10 +13,12 @@ convertToSccsDataWrapper <- function(cases, minAge = NULL, maxAge = NULL) { if (is.null(covariateSettings)) { - covariateSettings <- createEraCovariateSettings(includeEraIds = exposureId, - start = 0, - end = 0, - endAnchor = "era end") + covariateSettings <- createEraCovariateSettings( + includeEraIds = exposureId, + start = 0, + end = 0, + endAnchor = "era end" + ) } covariateIds <- c() @@ -31,46 +33,58 @@ convertToSccsDataWrapper <- function(cases, distinct(.data$eraId, .data$eraType) %>% mutate(eraName = "") - data <- Andromeda::andromeda(cases = cases, - eras = eras, - eraRef = eraRef) - attr(data, "metaData") <- list(outcomeIds = 10, - attrition = tibble(outcomeId = 10)) + data <- Andromeda::andromeda( + cases = cases, + eras = eras, + eraRef = eraRef + ) + attr(data, "metaData") <- list( + outcomeIds = 10, + attrition = tibble(outcomeId = 10) + ) class(data) <- "SccsData" attr(class(data), "package") <- "SelfControlledCaseSeries" - studyPop <- createStudyPopulation(sccsData = data, - outcomeId = 10, - firstOutcomeOnly = firstOutcomeOnly, - naivePeriod = naivePeriod, - minAge = minAge, - maxAge = maxAge) - - result <- createSccsIntervalData(studyPopulation = studyPop, - sccsData = data, - ageCovariateSettings = ageSettings, - seasonalityCovariateSettings = seasonalitySettings, - eraCovariateSettings = covariateSettings) + studyPop <- createStudyPopulation( + sccsData = data, + outcomeId = 10, + firstOutcomeOnly = firstOutcomeOnly, + naivePeriod = naivePeriod, + minAge = minAge, + maxAge = maxAge + ) + + result <- createSccsIntervalData( + studyPopulation = studyPop, + sccsData = data, + ageCovariateSettings = ageSettings, + seasonalityCovariateSettings = seasonalitySettings, + eraCovariateSettings = covariateSettings + ) return(list(outcomes = collect(result$outcomes), covariates = collect(result$covariates))) } test_that("Simple era construction", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 25), - endDay = c(50, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 25), + endDay = c(50, 75) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = 11) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -82,22 +96,26 @@ test_that("Simple era construction", { }) test_that("Age restriction", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 10000, - ageInDays = 365, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(500, 525), - endDay = c(500, 575)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 10000, + ageInDays = 365, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(500, 525), + endDay = c(500, 575) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = 11, minAge = 2, maxAge = 3) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -109,22 +127,26 @@ test_that("Age restriction", { }) test_that("Outcome on boundary", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(25, 25), - endDay = c(25, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(25, 25), + endDay = c(25, 75) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -136,22 +158,26 @@ test_that("Outcome on boundary", { }) test_that("Outcome on boundary", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(24, 25), - endDay = c(24, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(24, 25), + endDay = c(24, 75) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -163,22 +189,26 @@ test_that("Outcome on boundary", { }) test_that("Outcome on boundary", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(75, 25), - endDay = c(75, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(75, 25), + endDay = c(75, 75) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -190,22 +220,26 @@ test_that("Outcome on boundary", { }) test_that("Outcome on boundary", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(76, 25), - endDay = c(76, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(76, 25), + endDay = c(76, 75) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -217,22 +251,26 @@ test_that("Outcome on boundary", { }) test_that("One day era", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 25), - endDay = c(50, 25)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 25), + endDay = c(50, 25) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -244,22 +282,26 @@ test_that("One day era", { }) test_that("Merging overlapping eras", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 11), - value = c(1, 1, 1), - startDay = c(50, 25, 70), - endDay = c(50, 75, 80)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 11), + value = c(1, 1, 1), + startDay = c(50, 25, 70), + endDay = c(50, 75, 80) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -271,22 +313,26 @@ test_that("Merging overlapping eras", { }) test_that("Merging overlapping eras with same start date", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 11), - value = c(1, 1, 1), - startDay = c(50, 25, 25), - endDay = c(50, 75, 50)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 11), + value = c(1, 1, 1), + startDay = c(50, 25, 25), + endDay = c(50, 75, 50) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -299,22 +345,26 @@ test_that("Merging overlapping eras with same start date", { test_that("Concomitant drug use", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 12), - value = c(1, 1, 1), - startDay = c(50, 25, 60), - endDay = c(50, 75, 70)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 12), + value = c(1, 1, 1), + startDay = c(50, 25, 60), + endDay = c(50, 75, 70) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11, 12)) expect_equal(result$outcomes$rowId, c(0, 1, 2)) expect_equal(result$outcomes$stratumId, c(1, 1, 1)) @@ -326,22 +376,26 @@ test_that("Concomitant drug use", { }) test_that("Concomitant drug use (3 drugs)", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "hoi", "rx", "rx", "rx"), - caseId = c(1, 1, 1, 1, 1), - eraId = c(10, 10, 11, 12, 13), - value = c(1, 1, 1, 1, 1), - startDay = c(50, 85, 25, 70, 70), - endDay = c(NA, NA, 75, 80, 77)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "hoi", "rx", "rx", "rx"), + caseId = c(1, 1, 1, 1, 1), + eraId = c(10, 10, 11, 12, 13), + value = c(1, 1, 1, 1, 1), + startDay = c(50, 85, 25, 70, 70), + endDay = c(NA, NA, 75, 80, 77) + ) result <- convertToSccsDataWrapper(cases, eras, exposureId = c(11, 12, 13)) expect_equal(result$outcomes$rowId, c(0, 1, 2, 3, 4)) expect_equal(result$outcomes$stratumId, c(1, 1, 1, 1, 1)) @@ -353,28 +407,35 @@ test_that("Concomitant drug use (3 drugs)", { }) test_that("Start risk window at day 1 not 0", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 50), - endDay = c(50, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 50), + endDay = c(50, 75) + ) result <- convertToSccsDataWrapper(cases, - eras, - covariateSettings = createEraCovariateSettings(includeEraIds = c(11, 12, 13), - start = 1, - end = 0, - endAnchor = "era end")) + eras, + covariateSettings = createEraCovariateSettings( + includeEraIds = c(11, 12, 13), + start = 1, + end = 0, + endAnchor = "era end" + ) + ) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) expect_equal(result$outcomes$time, c(75, 25)) @@ -385,22 +446,26 @@ test_that("Start risk window at day 1 not 0", { }) test_that("Two HOIs, keeping both", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "hoi", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 10, 11), - value = c(1, 1, 1), - startDay = c(25, 50, 30), - endDay = c(25, 50, 60)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "hoi", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 10, 11), + value = c(1, 1, 1), + startDay = c(25, 50, 30), + endDay = c(25, 50, 60) + ) result <- convertToSccsDataWrapper(cases, eras, firstOutcomeOnly = FALSE, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -412,22 +477,26 @@ test_that("Two HOIs, keeping both", { }) test_that("Two HOIs, keeping first", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "hoi", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 10, 11), - value = c(1, 1, 1), - startDay = c(25, 50, 30), - endDay = c(25, 50, 60)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "hoi", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 10, 11), + value = c(1, 1, 1), + startDay = c(25, 50, 30), + endDay = c(25, 50, 60) + ) result <- convertToSccsDataWrapper(cases, eras, firstOutcomeOnly = TRUE, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -439,30 +508,37 @@ test_that("Two HOIs, keeping first", { }) test_that("Removal of risk windows where end before start", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 50), - endDay = c(50, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 50), + endDay = c(50, 75) + ) expect_warning({ result <- convertToSccsDataWrapper(cases, - eras, - covariateSettings = createEraCovariateSettings(includeEraIds = c(11), - start = 0, - end = 7, - startAnchor = "era end", - endAnchor = "era start")) + eras, + covariateSettings = createEraCovariateSettings( + includeEraIds = c(11), + start = 0, + end = 7, + startAnchor = "era end", + endAnchor = "era start" + ) + ) }) expect_equal(result$outcomes %>% count() %>% pull(), 0) }) @@ -470,13 +546,19 @@ test_that("Removal of risk windows where end before start", { test_that("Aggregates on large set", { settings <- createSccsSimulationSettings(includeAgeEffect = FALSE, includeSeasonality = FALSE) sccsData <- simulateSccsData(1000, settings) - studyPop <- createStudyPopulation(sccsData = sccsData, - naivePeriod = 0, - firstOutcomeOnly = FALSE,) - sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, - sccsData, - eraCovariateSettings = createEraCovariateSettings(includeEraIds = c(1, 2), - endAnchor = "era end")) + studyPop <- createStudyPopulation( + sccsData = sccsData, + naivePeriod = 0, + firstOutcomeOnly = FALSE, + ) + sccsIntervalData <- createSccsIntervalData( + studyPopulation = studyPop, + sccsData, + eraCovariateSettings = createEraCovariateSettings( + includeEraIds = c(1, 2), + endAnchor = "era end" + ) + ) x <- sccsData$eras %>% filter(.data$eraId == 1) %>% @@ -542,29 +624,36 @@ test_that("Aggregates on large set", { }) test_that("Exposure splitting", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 25), - endDay = c(50, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 25), + endDay = c(50, 75) + ) result <- convertToSccsDataWrapper(cases, - eras, - covariateSettings = createEraCovariateSettings(includeEraIds = 11, - start = 0, - end = 0, - endAnchor = "era end", - splitPoints = c(7))) + eras, + covariateSettings = createEraCovariateSettings( + includeEraIds = 11, + start = 0, + end = 0, + endAnchor = "era end", + splitPoints = c(7) + ) + ) expect_equal(result$outcomes$rowId, c(0, 1, 2)) expect_equal(result$outcomes$stratumId, c(1, 1, 1)) expect_equal(result$outcomes$time, c(49, 8, 43)) @@ -575,29 +664,36 @@ test_that("Exposure splitting", { }) test_that("Exposure splitting twice", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 25), - endDay = c(50, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 25), + endDay = c(50, 75) + ) result <- convertToSccsDataWrapper(cases, - eras, - covariateSettings = createEraCovariateSettings(includeEraIds = 11, - start = 0, - end = 0, - endAnchor = "era end", - splitPoints = c(7, 15))) + eras, + covariateSettings = createEraCovariateSettings( + includeEraIds = 11, + start = 0, + end = 0, + endAnchor = "era end", + splitPoints = c(7, 15) + ) + ) expect_equal(result$outcomes$rowId, c(0, 1, 2, 3)) expect_equal(result$outcomes$stratumId, c(1, 1, 1, 1)) expect_equal(result$outcomes$time, c(49, 8, 8, 35)) @@ -608,29 +704,36 @@ test_that("Exposure splitting twice", { }) test_that("Merging exposures (stratifyById=FALSE)", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 12), - value = c(1, 1, 1), - startDay = c(50, 25, 70), - endDay = c(50, 75, 100)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 12), + value = c(1, 1, 1), + startDay = c(50, 25, 70), + endDay = c(50, 75, 100) + ) result <- convertToSccsDataWrapper(cases, - eras, - covariateSettings = createEraCovariateSettings(includeEraIds = c(11,12), - stratifyById = FALSE, - start = 0, - end = 0, - endAnchor = "era end")) + eras, + covariateSettings = createEraCovariateSettings( + includeEraIds = c(11, 12), + stratifyById = FALSE, + start = 0, + end = 0, + endAnchor = "era end" + ) + ) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) expect_equal(result$outcomes$time, c(25, 75)) @@ -642,33 +745,40 @@ test_that("Merging exposures (stratifyById=FALSE)", { test_that("Exposure splitting without stratifyById", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 12), - value = c(1, 1, 1), - startDay = c(50, 25, 70), - endDay = c(50, 75, 100)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 12), + value = c(1, 1, 1), + startDay = c(50, 25, 70), + endDay = c(50, 75, 100) + ) result <- convertToSccsDataWrapper(cases, - eras, - covariateSettings = createEraCovariateSettings(includeEraIds = c(11,12), - stratifyById = FALSE, - start = 0, - end = 0, - endAnchor = "era end", - splitPoints = c(50))) + eras, + covariateSettings = createEraCovariateSettings( + includeEraIds = c(11, 12), + stratifyById = FALSE, + start = 0, + end = 0, + endAnchor = "era end", + splitPoints = c(50) + ) + ) expect_equal(result$outcomes$rowId, c(0, 1, 2)) expect_equal(result$outcomes$stratumId, c(1, 1, 1)) - expect_equal(result$outcomes$time, c(25,51, 24)) + expect_equal(result$outcomes$time, c(25, 51, 24)) expect_equal(result$outcomes$y, c(0, 1, 0)) expect_equal(result$covariates$rowId, c(1, 2)) expect_equal(result$covariates$stratumId, c(1, 1)) @@ -676,30 +786,40 @@ test_that("Exposure splitting without stratifyById", { }) test_that("Pre-exposure window", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 25), - endDay = c(50, 75)) - result <- convertToSccsDataWrapper(cases, eras, covariateSettings = list(createEraCovariateSettings(includeEraIds = 11, - start = 0, - end = 0, - endAnchor = "era end"), - createEraCovariateSettings(includeEraIds = 11, - start = -30, - end = -1, - endAnchor = "era start"))) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 25), + endDay = c(50, 75) + ) + result <- convertToSccsDataWrapper(cases, eras, covariateSettings = list( + createEraCovariateSettings( + includeEraIds = 11, + start = 0, + end = 0, + endAnchor = "era end" + ), + createEraCovariateSettings( + includeEraIds = 11, + start = -30, + end = -1, + endAnchor = "era start" + ) + )) expect_equal(result$outcomes$rowId, c(0, 1, 2)) expect_equal(result$outcomes$stratumId, c(1, 1, 1)) expect_equal(result$outcomes$time, c(24, 51, 25)) diff --git a/tests/testthat/test-eunomia.R b/tests/testthat/test-eunomia.R index c771053..6936fb1 100644 --- a/tests/testthat/test-eunomia.R +++ b/tests/testthat/test-eunomia.R @@ -9,68 +9,94 @@ createCohorts(connectionDetails) test_that("Running multiple analyses against Eunomia", { # Adding empty exposure and outcome cohorts: - exposureOutcomeList <- list(createExposureOutcome(exposureId = 1, - outcomeId = 3), - createExposureOutcome(exposureId = 2, - outcomeId = 3), - createExposureOutcome(exposureId = 1, - outcomeId = 4), - createExposureOutcome(exposureId = 999, - outcomeId = 4), - createExposureOutcome(exposureId = 1, - outcomeId = 999)) + exposureOutcomeList <- list( + createExposureOutcome( + exposureId = 1, + outcomeId = 3 + ), + createExposureOutcome( + exposureId = 2, + outcomeId = 3 + ), + createExposureOutcome( + exposureId = 1, + outcomeId = 4 + ), + createExposureOutcome( + exposureId = 999, + outcomeId = 4 + ), + createExposureOutcome( + exposureId = 1, + outcomeId = 999 + ) + ) getDbSccsDataArgs1 <- createGetDbSccsDataArgs(deleteCovariatesSmallCount = 1) - createStudyPopulationArgs1 <- createCreateStudyPopulationArgs(naivePeriod = 180, - firstOutcomeOnly = FALSE) + createStudyPopulationArgs1 <- createCreateStudyPopulationArgs( + naivePeriod = 180, + firstOutcomeOnly = FALSE + ) - covarExposureOfInt <- createEraCovariateSettings(label = "Exposure of interest", - includeEraIds = "exposureId", - start = 0, - end = 7, - endAnchor = "era start") + covarExposureOfInt <- createEraCovariateSettings( + label = "Exposure of interest", + includeEraIds = "exposureId", + start = 0, + end = 7, + endAnchor = "era start" + ) # All outcomes occur at almost the same age, causing issues. Disable for now: # ageSettings <- createAgeCovariateSettings(ageKnots = 5) # # seasonalitySettings <- createSeasonalityCovariateSettings(seasonKnots = 5) - covarPreExp <- createEraCovariateSettings(label = "Pre-exposure", - includeEraIds = "exposureId", - start = -30, - end = -1, - endAnchor = "era start") + covarPreExp <- createEraCovariateSettings( + label = "Pre-exposure", + includeEraIds = "exposureId", + start = -30, + end = -1, + endAnchor = "era start" + ) - createSccsIntervalDataArgs1 <- createCreateSccsIntervalDataArgs(eraCovariateSettings = list(covarExposureOfInt, - covarPreExp)) + createSccsIntervalDataArgs1 <- createCreateSccsIntervalDataArgs(eraCovariateSettings = list( + covarExposureOfInt, + covarPreExp + )) fitSccsModelArgs <- createFitSccsModelArgs() - sccsAnalysis1 <- createSccsAnalysis(analysisId = 1, - description = "Including pre-exposure", - getDbSccsDataArgs = getDbSccsDataArgs1, - createStudyPopulationArgs = createStudyPopulationArgs1, - createSccsIntervalDataArgs = createSccsIntervalDataArgs1, - fitSccsModelArgs = fitSccsModelArgs) + sccsAnalysis1 <- createSccsAnalysis( + analysisId = 1, + description = "Including pre-exposure", + getDbSccsDataArgs = getDbSccsDataArgs1, + createStudyPopulationArgs = createStudyPopulationArgs1, + createSccsIntervalDataArgs = createSccsIntervalDataArgs1, + fitSccsModelArgs = fitSccsModelArgs + ) sccsAnalysisList <- list(sccsAnalysis1) outputFolder <- tempfile(pattern = "sccsOutput") - analysesToExclude <- data.frame(exposureId = c(1), - outcomeId = c(3)) + analysesToExclude <- data.frame( + exposureId = c(1), + outcomeId = c(3) + ) suppressWarnings( - result <- runSccsAnalyses(connectionDetails = connectionDetails, - cdmDatabaseSchema = "main", - exposureDatabaseSchema = "main", - exposureTable = "cohort", - outcomeDatabaseSchema = "main", - outcomeTable = "cohort", - outputFolder = outputFolder, - exposureOutcomeList = exposureOutcomeList, - sccsAnalysisList = sccsAnalysisList, - analysesToExclude = analysesToExclude) + result <- runSccsAnalyses( + connectionDetails = connectionDetails, + cdmDatabaseSchema = "main", + exposureDatabaseSchema = "main", + exposureTable = "cohort", + outcomeDatabaseSchema = "main", + outcomeTable = "cohort", + outputFolder = outputFolder, + exposureOutcomeList = exposureOutcomeList, + sccsAnalysisList = sccsAnalysisList, + analysesToExclude = analysesToExclude + ) ) expect_equal(sum(result$exposureId == 1 & result$outcomeId == 3), 0) diff --git a/tests/testthat/test-eventDepObservation.R b/tests/testthat/test-eventDepObservation.R index 93be76a..1799969 100644 --- a/tests/testthat/test-eventDepObservation.R +++ b/tests/testthat/test-eventDepObservation.R @@ -24,19 +24,23 @@ data$eventDate <- observationDays + 1 peopleUnexpEvent <- data$eventsUnexposed > 0 # Date of event is random day when unexposed: -data$eventDate[peopleUnexpEvent] <- round(runif(sum(peopleUnexpEvent), - 1, - data$daysUnexposed[peopleUnexpEvent])) +data$eventDate[peopleUnexpEvent] <- round(runif( + sum(peopleUnexpEvent), + 1, + data$daysUnexposed[peopleUnexpEvent] +)) # If day greater than exposure start day, at exposure time so it falls in period post exposure: data$eventDate[peopleUnexpEvent & data$eventDate > data$exposureStartDate] <- data$eventDate[peopleUnexpEvent & - data$eventDate > data$exposureStartDate] + data$daysExposed[peopleUnexpEvent & data$eventDate > data$exposureStartDate] + data$eventDate > data$exposureStartDate] + data$daysExposed[peopleUnexpEvent & data$eventDate > data$exposureStartDate] # For people with event during exposure, and no event in the period prior exposure, randomly pick an # event date during exposure: peopleExpEvent <- data$eventsExposed > 0 & data$eventDate > data$exposureStartDate -data$eventDate[peopleExpEvent] <- round(runif(sum(peopleExpEvent), - data$exposureStartDate[peopleExpEvent], - data$exposureEndDate[peopleExpEvent])) +data$eventDate[peopleExpEvent] <- round(runif( + sum(peopleExpEvent), + data$exposureStartDate[peopleExpEvent], + data$exposureEndDate[peopleExpEvent] +)) # Remove non-cases: data <- data[data$eventDate <= observationDays, ] @@ -59,7 +63,7 @@ data <- data[data$eventDate <= data$censorDate, ] # Truncate exposure end date at censor date: data$exposureEndDate[data$exposureEndDate > data$censorDate] <- data$censorDate[data$exposureEndDate > - data$censorDate] + data$censorDate] data$censorDate[data$censorDate > observationDays] <- observationDays nrow(data) @@ -87,29 +91,35 @@ x$summary$coefficients <- c(0.762933) x$modelfit <- matrix(c(-3122.776, -3122.776, -3122.776, -3122.776), nrow = 2) test_that("Produces same results as SCCS package when using event-dependent observation periods", { - cases <- tibble(observationPeriodId = as.numeric(data$personId), - caseId = as.numeric(data$personId), - personId = data$personId, - observationDays = data$censorDate - data$observationStartDate + 1, - ageInDays = data$ageInDays, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0) + cases <- tibble( + observationPeriodId = as.numeric(data$personId), + caseId = as.numeric(data$personId), + personId = data$personId, + observationDays = data$censorDate - data$observationStartDate + 1, + ageInDays = data$ageInDays, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0 + ) cases$noninformativeEndCensor <- cases$observationDays == max(cases$observationDays) - heiEras <- tibble(eraType = "rx", - caseId = as.numeric(data$personId), - eraId = 1, - value = 1, - startDay = data$exposureStartDate - data$observationStartDate, - endDay = data$exposureEndDate - data$observationStartDate) - hoiEras <- tibble(eraType = "hoi", - caseId = as.numeric(data$personId), - eraId = 2, - value = 1, - startDay = data$eventDate - data$observationStartDate, - endDay = data$eventDate - data$observationStartDate) + heiEras <- tibble( + eraType = "rx", + caseId = as.numeric(data$personId), + eraId = 1, + value = 1, + startDay = data$exposureStartDate - data$observationStartDate, + endDay = data$exposureEndDate - data$observationStartDate + ) + hoiEras <- tibble( + eraType = "hoi", + caseId = as.numeric(data$personId), + eraId = 2, + value = 1, + startDay = data$eventDate - data$observationStartDate, + endDay = data$eventDate - data$observationStartDate + ) eras <- rbind(heiEras, hoiEras) eras <- eras[order(eras$caseId), ] @@ -118,23 +128,31 @@ test_that("Produces same results as SCCS package when using event-dependent obse distinct(.data$eraId, .data$eraType) %>% mutate(eraName = "") - sccsData <- Andromeda::andromeda(cases = cases, - eras = eras, - eraRef = eraRef) - attr(sccsData, "metaData") <- list(outcomeIds = 2, - attrition = tibble(outcomeId = 2)) + sccsData <- Andromeda::andromeda( + cases = cases, + eras = eras, + eraRef = eraRef + ) + attr(sccsData, "metaData") <- list( + outcomeIds = 2, + attrition = tibble(outcomeId = 2) + ) class(sccsData) <- "SccsData" attr(class(sccsData), "package") <- "SelfControlledCaseSeries" studyPop <- createStudyPopulation(sccsData = sccsData) - sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, - sccsData = sccsData, - eraCovariateSettings = createEraCovariateSettings(includeEraIds = 1, - start = 0, - end = 0, - endAnchor = "era end"), - eventDependentObservation = TRUE) + sccsIntervalData <- createSccsIntervalData( + studyPopulation = studyPop, + sccsData = sccsData, + eraCovariateSettings = createEraCovariateSettings( + includeEraIds = 1, + start = 0, + end = 0, + endAnchor = "era end" + ), + eventDependentObservation = TRUE + ) expect_equal(attr(sccsIntervalData, "metaData")$censorModel$aic, min(x$modelfit[2, ]), tolerance = 1e-04) diff --git a/tests/testthat/test-parameterSweep.R b/tests/testthat/test-parameterSweep.R index 938226f..7f35a8e 100644 --- a/tests/testthat/test-parameterSweep.R +++ b/tests/testthat/test-parameterSweep.R @@ -4,11 +4,15 @@ library(SelfControlledCaseSeries) set.seed(123) sampleSize <- 1000 -simulationRiskWindows <- list(createSimulationRiskWindow(relativeRisks = 1), - createSimulationRiskWindow(relativeRisks = 1.5)) -settings <- createSccsSimulationSettings(simulationRiskWindows = simulationRiskWindows, - eraIds = c(1, 2), - outcomeId = 10) +simulationRiskWindows <- list( + createSimulationRiskWindow(relativeRisks = 1), + createSimulationRiskWindow(relativeRisks = 1.5) +) +settings <- createSccsSimulationSettings( + simulationRiskWindows = simulationRiskWindows, + eraIds = c(1, 2), + outcomeId = 10 +) sccsData <- simulateSccsData(sampleSize, settings) test_that("Support functions and diagnostics", { @@ -20,14 +24,18 @@ test_that("Support functions and diagnostics", { ageSettings <- createAgeCovariateSettings(allowRegularization = TRUE) seasonSettings <- createSeasonalityCovariateSettings(allowRegularization = TRUE) calendarTimeSettings <- createCalendarTimeCovariateSettings(allowRegularization = TRUE) - studyPop <- createStudyPopulation(sccsData = sccsData, - outcomeId = 10) - sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, - sccsData = sccsData, - eraCovariateSettings = covar, - ageCovariateSettings = ageSettings, - seasonalityCovariateSettings = seasonSettings, - calendarTimeCovariateSettings = calendarTimeSettings) + studyPop <- createStudyPopulation( + sccsData = sccsData, + outcomeId = 10 + ) + sccsIntervalData <- createSccsIntervalData( + studyPopulation = studyPop, + sccsData = sccsData, + eraCovariateSettings = covar, + ageCovariateSettings = ageSettings, + seasonalityCovariateSettings = seasonSettings, + calendarTimeCovariateSettings = calendarTimeSettings + ) s <- summary(sccsIntervalData) expect_equal(class(s), "summary.SccsIntervalData") @@ -68,20 +76,26 @@ test_that("Parameter sweep", { for (firstOutcomeOnly in c(TRUE, FALSE)) { for (includeAgeSeasonAndCalendarTime in c(TRUE, FALSE)) { for (eventDependentObservation in c(FALSE)) { - covar <- createEraCovariateSettings(includeEraIds = c(1, 2), - stratifyById = stratifyById, - endAnchor = "era end") - studyPop <- createStudyPopulation(sccsData = sccsData, - outcomeId = 10, - naivePeriod = naivePeriod, - firstOutcomeOnly = firstOutcomeOnly) - sccsIntervalData <- createSccsIntervalData(studyPopulation = studyPop, - sccsData = sccsData, - eraCovariateSettings = covar, - ageCovariateSettings = if (includeAgeSeasonAndCalendarTime) ageSettings else NULL, - seasonalityCovariateSettings = if (includeAgeSeasonAndCalendarTime) seasonSettings else NULL, - calendarTimeCovariateSettings = if (includeAgeSeasonAndCalendarTime) calendarTimeSettings else NULL, - eventDependentObservation = eventDependentObservation) + covar <- createEraCovariateSettings( + includeEraIds = c(1, 2), + stratifyById = stratifyById, + endAnchor = "era end" + ) + studyPop <- createStudyPopulation( + sccsData = sccsData, + outcomeId = 10, + naivePeriod = naivePeriod, + firstOutcomeOnly = firstOutcomeOnly + ) + sccsIntervalData <- createSccsIntervalData( + studyPopulation = studyPop, + sccsData = sccsData, + eraCovariateSettings = covar, + ageCovariateSettings = if (includeAgeSeasonAndCalendarTime) ageSettings else NULL, + seasonalityCovariateSettings = if (includeAgeSeasonAndCalendarTime) seasonSettings else NULL, + calendarTimeCovariateSettings = if (includeAgeSeasonAndCalendarTime) calendarTimeSettings else NULL, + eventDependentObservation = eventDependentObservation + ) expect_equivalent(class(sccsIntervalData), "SccsIntervalData") # Not enough data to fit age and season: if (!includeAgeSeasonAndCalendarTime) { @@ -98,10 +112,12 @@ test_that("Parameter sweep", { }) test_that("Plots", { - studyPop <- createStudyPopulation(sccsData = sccsData, - outcomeId = 10, - naivePeriod = 0, - firstOutcomeOnly = TRUE) + studyPop <- createStudyPopulation( + sccsData = sccsData, + outcomeId = 10, + naivePeriod = 0, + firstOutcomeOnly = TRUE + ) plot <- plotAgeSpans(studyPopulation = studyPop) expect_s3_class(plot, "ggplot") @@ -115,12 +131,16 @@ test_that("Plots", { plot <- plotEventObservationDependence(studyPopulation = studyPop) expect_s3_class(plot, "ggplot") - plot <- plotExposureCentered(studyPopulation = studyPop, - sccsData = sccsData, - exposureEraId = 1) + plot <- plotExposureCentered( + studyPopulation = studyPop, + sccsData = sccsData, + exposureEraId = 1 + ) expect_s3_class(plot, "ggplot") - expect_warning(plotExposureCentered(studyPopulation = studyPop, - sccsData = sccsData, - exposureEraId = 999)) + expect_warning(plotExposureCentered( + studyPopulation = studyPop, + sccsData = sccsData, + exposureEraId = 999 + )) }) diff --git a/tests/testthat/test-scriEraConstruction.R b/tests/testthat/test-scriEraConstruction.R index 7afefc1..5e68642 100644 --- a/tests/testthat/test-scriEraConstruction.R +++ b/tests/testthat/test-scriEraConstruction.R @@ -12,17 +12,21 @@ convertToScriDataWrapper <- function(cases, minAge = NULL, maxAge = NULL) { if (is.null(covariateSettings)) { - covariateSettings <- createEraCovariateSettings(includeEraIds = exposureId, - start = 0, - end = 0, - endAnchor = "era end") + covariateSettings <- createEraCovariateSettings( + includeEraIds = exposureId, + start = 0, + end = 0, + endAnchor = "era end" + ) } if (is.null(controlIntervalSettings)) { - controlIntervalSettings <- createControlIntervalSettings(includeEraIds = exposureId, - start = -14, - end = -7, - endAnchor = "era start") + controlIntervalSettings <- createControlIntervalSettings( + includeEraIds = exposureId, + start = -14, + end = -7, + endAnchor = "era start" + ) } covariateIds <- c() @@ -37,43 +41,55 @@ convertToScriDataWrapper <- function(cases, distinct(.data$eraId, .data$eraType) %>% mutate(eraName = "") - data <- Andromeda::andromeda(cases = cases, - eras = eras, - eraRef = eraRef) - attr(data, "metaData") <- list(outcomeIds = 10, - attrition = tibble(outcomeId = 10)) + data <- Andromeda::andromeda( + cases = cases, + eras = eras, + eraRef = eraRef + ) + attr(data, "metaData") <- list( + outcomeIds = 10, + attrition = tibble(outcomeId = 10) + ) - studyPop <- createStudyPopulation(sccsData = data, - outcomeId = 10, - firstOutcomeOnly = firstOutcomeOnly, - naivePeriod = naivePeriod, - minAge = minAge, - maxAge = maxAge) + studyPop <- createStudyPopulation( + sccsData = data, + outcomeId = 10, + firstOutcomeOnly = firstOutcomeOnly, + naivePeriod = naivePeriod, + minAge = minAge, + maxAge = maxAge + ) - result <- createScriIntervalData(studyPopulation = studyPop, - sccsData = data, - eraCovariateSettings = covariateSettings, - controlIntervalSettings = controlIntervalSettings) + result <- createScriIntervalData( + studyPopulation = studyPop, + sccsData = data, + eraCovariateSettings = covariateSettings, + controlIntervalSettings = controlIntervalSettings + ) return(list(outcomes = collect(result$outcomes), covariates = collect(result$covariates))) } test_that("Simple SCRI era construction", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(50, 25), - endDay = c(50, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(50, 25), + endDay = c(50, 75) + ) result <- convertToScriDataWrapper(cases, eras, exposureId = 11) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -86,22 +102,26 @@ test_that("Simple SCRI era construction", { test_that("Outcome on boundary or control interval", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx"), - caseId = c(1, 1), - eraId = c(10, 11), - value = c(1, 1), - startDay = c(25-7, 25), - endDay = c(25-7, 75)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx"), + caseId = c(1, 1), + eraId = c(10, 11), + value = c(1, 1), + startDay = c(25 - 7, 25), + endDay = c(25 - 7, 75) + ) result <- convertToScriDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -113,22 +133,26 @@ test_that("Outcome on boundary or control interval", { }) test_that("Merging overlapping control intervals", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 11), - value = c(1, 1, 1), - startDay = c(25-7, 25, 28), - endDay = c(25-7, 26, 29)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 11), + value = c(1, 1, 1), + startDay = c(25 - 7, 25, 28), + endDay = c(25 - 7, 26, 29) + ) result <- convertToScriDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -140,22 +164,26 @@ test_that("Merging overlapping control intervals", { }) test_that("Control intervals overlapping with a risk window", { - cases <- tibble(observationPeriodId = "1000", - caseId = 1, - personId = "1", - observationDays = 100, - ageInDays = 0, - startYear = 2000, - startMonth = 5, - startDay = 1, - censoredDays = 0, - noninformativeEndCensor = 0) - eras <- tibble(eraType = c("hoi", "rx", "rx"), - caseId = c(1, 1, 1), - eraId = c(10, 11, 11), - value = c(1, 1, 1), - startDay = c(32, 30, 50), - endDay = c(32, 40, 60)) + cases <- tibble( + observationPeriodId = "1000", + caseId = 1, + personId = "1", + observationDays = 100, + ageInDays = 0, + startYear = 2000, + startMonth = 5, + startDay = 1, + censoredDays = 0, + noninformativeEndCensor = 0 + ) + eras <- tibble( + eraType = c("hoi", "rx", "rx"), + caseId = c(1, 1, 1), + eraId = c(10, 11, 11), + value = c(1, 1, 1), + startDay = c(32, 30, 50), + endDay = c(32, 40, 60) + ) result <- convertToScriDataWrapper(cases, eras, exposureId = c(11)) expect_equal(result$outcomes$rowId, c(0, 1)) expect_equal(result$outcomes$stratumId, c(1, 1)) @@ -165,4 +193,3 @@ test_that("Control intervals overlapping with a risk window", { expect_equal(result$covariates$stratumId, c(1)) expect_equal(result$covariates$covariateId, c(1000)) }) - diff --git a/tests/testthat/test-weightFunctions.R b/tests/testthat/test-weightFunctions.R index 4cc306b..929fd8f 100644 --- a/tests/testthat/test-weightFunctions.R +++ b/tests/testthat/test-weightFunctions.R @@ -15,22 +15,21 @@ library("testthat") # p<-p_ewad2 -wsmall_ewad2<-function(t,p, present,astart,aend, Dmatrix){ - - thetaA <- p[which.max(Dmatrix)] - thetaB <- p[(length(Dmatrix))+ (which.max(Dmatrix))] + p[2*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(astart)) - eta <- p[3*(length(Dmatrix))+ (which.max(Dmatrix))] + p[4*(length(Dmatrix))+ (which.max(Dmatrix))]*t - gamma0 <- p[5*(length(Dmatrix))+ (which.max(Dmatrix))] + p[6*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(astart)) - - lamA <-(exp(-thetaA)) # 1/rho in the paper - lamB <-(exp(-thetaB)) # 1/mu - pi0 <-(exp(eta)/(1+exp(eta))) # pi - nu0 <-(exp(gamma0)) # nu - - val <- ((1-present)*log(pi0*lamA*exp(-lamA*(aend-t))+ - (1-pi0)*nu0*lamB*((aend*lamB)^(nu0-1))*exp(-((aend*lamB)^nu0-(t*lamB)^nu0))) + - present *log(pi0*exp(-lamA*(aend-t))+ - (1-pi0)*exp(-((aend*lamB)^nu0-(t*lamB)^nu0)))) +wsmall_ewad2 <- function(t, p, present, astart, aend, Dmatrix) { + thetaA <- p[which.max(Dmatrix)] + thetaB <- p[(length(Dmatrix)) + (which.max(Dmatrix))] + p[2 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(astart)) + eta <- p[3 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[4 * (length(Dmatrix)) + (which.max(Dmatrix))] * t + gamma0 <- p[5 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[6 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(astart)) + + lamA <- (exp(-thetaA)) # 1/rho in the paper + lamB <- (exp(-thetaB)) # 1/mu + pi0 <- (exp(eta) / (1 + exp(eta))) # pi + nu0 <- (exp(gamma0)) # nu + + val <- ((1 - present) * log(pi0 * lamA * exp(-lamA * (aend - t)) + + (1 - pi0) * nu0 * lamB * ((aend * lamB)^(nu0 - 1)) * exp(-((aend * lamB)^nu0 - (t * lamB)^nu0))) + + present * log(pi0 * exp(-lamA * (aend - t)) + + (1 - pi0) * exp(-((aend * lamB)^nu0 - (t * lamB)^nu0)))) # print(paste(t, exp(val))) exp(val) } @@ -45,26 +44,25 @@ wsmall_ewad2<-function(t,p, present,astart,aend, Dmatrix){ # p<-p_ewid2 -wsmall_ewid2<-function(t, p, present, aend, Dmatrix){ +wsmall_ewid2 <- function(t, p, present, aend, Dmatrix) { + thetaA <- p[which.max(Dmatrix)] + thetaB <- p[(length(Dmatrix)) + (which.max(Dmatrix))] + p[2 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(t)) + eta <- p[3 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[4 * (length(Dmatrix)) + (which.max(Dmatrix))] * t + gamma0 <- p[5 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[6 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(t)) - thetaA <- p[which.max(Dmatrix)] - thetaB <- p[(length(Dmatrix))+ (which.max(Dmatrix))] + p[2*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(t)) - eta <- p[3*(length(Dmatrix))+ (which.max(Dmatrix))] + p[4*(length(Dmatrix))+ (which.max(Dmatrix))]*t - gamma0 <- p[5*(length(Dmatrix))+ (which.max(Dmatrix))] + p[6*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(t)) + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu + int <- aend - t - int<-aend-t + val <- ((1 - present) * log(pi0 * lamA * exp(-lamA * int) + + (1 - pi0) * nu0 * lamB * ((int * lamB)^(nu0 - 1)) * exp(-((int * lamB)^nu0))) + - val<- ((1-present)*log(pi0*lamA*exp(-lamA*int)+ - (1-pi0)*nu0*lamB*((int*lamB)^(nu0-1))*exp(-((int*lamB)^nu0))) + - - present *log(pi0*exp(-lamA*int)+ - (1-pi0)*exp(-((int*lamB)^nu0)))) + present * log(pi0 * exp(-lamA * int) + + (1 - pi0) * exp(-((int * lamB)^nu0)))) exp(val) } @@ -77,29 +75,28 @@ wsmall_ewid2<-function(t, p, present, aend, Dmatrix){ # p<-p_egad2 -wsmall_egad2 <- function(t,p,present,astart,aend,Dmatrix){ - - thetaA <- p[which.max(Dmatrix)] - thetaB <- p[(length(Dmatrix))+ (which.max(Dmatrix))] + p[2*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(astart)) - eta <- p[3*(length(Dmatrix))+ (which.max(Dmatrix))] + p[4*(length(Dmatrix))+ (which.max(Dmatrix))]*t - gamma0 <- p[5*(length(Dmatrix))+ (which.max(Dmatrix))] + p[6*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(astart)) +wsmall_egad2 <- function(t, p, present, astart, aend, Dmatrix) { + thetaA <- p[which.max(Dmatrix)] + thetaB <- p[(length(Dmatrix)) + (which.max(Dmatrix))] + p[2 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(astart)) + eta <- p[3 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[4 * (length(Dmatrix)) + (which.max(Dmatrix))] * t + gamma0 <- p[5 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[6 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(astart)) - lamA <-exp(-thetaA) # 1/rho in the paper - lamB <-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0 <-exp(gamma0) # nu + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - rate0 <-nu0*lamB + rate0 <- nu0 * lamB # val<- ((1-present)*log(pi0*lamA*exp(-lamA*(aend-t))+ # (1-pi0)*dgamma(aend,shape=nu0,rate=rate0)/pgamma(t,shape=nu0,rate=rate0,lower.tail=F)) + # present*log(pi0*exp(-lamA*(aend-t))+ # (1-pi0)*pgamma(aend,shape=nu0,rate=rate0,lower.tail=F)/pgamma(t,shape=nu0,rate=rate0,lower.tail=F))) - val<- ((1-present)*log(pi0*lamA*exp(-lamA*(aend-t))+ - (1-pi0)*dgamma(aend,shape=nu0,rate=rate0)/ifelse(pgamma(t,shape=nu0,rate=rate0,lower.tail=F)==0,0.000000001, pgamma(t,shape=nu0,rate=rate0,lower.tail=F))) + - present *log(pi0*exp(-lamA*(aend-t))+ - (1-pi0)*pgamma(aend,shape=nu0,rate=rate0,lower.tail=F)/ifelse(pgamma(t,shape=nu0,rate=rate0,lower.tail=F)==0, 0.000000001, pgamma(t,shape=nu0,rate=rate0,lower.tail=F)))) + val <- ((1 - present) * log(pi0 * lamA * exp(-lamA * (aend - t)) + + (1 - pi0) * dgamma(aend, shape = nu0, rate = rate0) / ifelse(pgamma(t, shape = nu0, rate = rate0, lower.tail = F) == 0, 0.000000001, pgamma(t, shape = nu0, rate = rate0, lower.tail = F))) + + present * log(pi0 * exp(-lamA * (aend - t)) + + (1 - pi0) * pgamma(aend, shape = nu0, rate = rate0, lower.tail = F) / ifelse(pgamma(t, shape = nu0, rate = rate0, lower.tail = F) == 0, 0.000000001, pgamma(t, shape = nu0, rate = rate0, lower.tail = F)))) @@ -113,33 +110,31 @@ wsmall_egad2 <- function(t,p,present,astart,aend,Dmatrix){ # p<-p_egid2 -wsmall_egid2 <- function(t,p,present,astart,aend,Dmatrix) { - +wsmall_egid2 <- function(t, p, present, astart, aend, Dmatrix) { + thetaA <- p[which.max(Dmatrix)] + thetaB <- p[(length(Dmatrix)) + (which.max(Dmatrix))] + p[2 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(t)) + eta <- p[3 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[4 * (length(Dmatrix)) + (which.max(Dmatrix))] * t + gamma0 <- p[5 * (length(Dmatrix)) + (which.max(Dmatrix))] + p[6 * (length(Dmatrix)) + (which.max(Dmatrix))] * (log(t)) - thetaA <- p[which.max(Dmatrix)] - thetaB <- p[(length(Dmatrix))+ (which.max(Dmatrix))] + p[2*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(t)) - eta <- p[3*(length(Dmatrix))+ (which.max(Dmatrix))] + p[4*(length(Dmatrix))+ (which.max(Dmatrix))]*t - gamma0 <- p[5*(length(Dmatrix))+ (which.max(Dmatrix))] + p[6*(length(Dmatrix))+ (which.max(Dmatrix))]*(log(t)) + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu + rate0 <- nu0 * lamB - rate0 <-nu0*lamB + int <- aend - t - int <-aend-t - - val<- ((1-present)*log(pi0*lamA*exp(-lamA*int)+ - (1-pi0)*dgamma(int,shape=nu0,rate=rate0)) + - present *log(pi0*exp(-lamA*int)+ - (1-pi0)*pgamma(int,shape=nu0,rate=rate0,lower.tail=F))) + val <- ((1 - present) * log(pi0 * lamA * exp(-lamA * int) + + (1 - pi0) * dgamma(int, shape = nu0, rate = rate0)) + + present * log(pi0 * exp(-lamA * int) + + (1 - pi0) * pgamma(int, shape = nu0, rate = rate0, lower.tail = F))) exp(val) } test_that("Weight functions match those in SCCS package", { - p <- c(0.1,0.2,0.1,0.2,0.1,0.2,0.1) + p <- c(0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1) present <- 1 astart <- 1 aend <- 10 @@ -149,18 +144,17 @@ test_that("Weight functions match those in SCCS package", { w1 <- SelfControlledCaseSeries:::testEwad(p, present, astart, aend, start, end) w2 <- integrate(wsmall_ewad2, lower = start, upper = end, p = p, present = present, astart = astart, aend = aend, Dmatrix = Dmatrix)$value - expect_equal(w1,w2, tolerance = 1E-6) + expect_equal(w1, w2, tolerance = 1E-6) w1 <- SelfControlledCaseSeries:::testEwid(p, present, astart, aend, start, end) w2 <- integrate(wsmall_ewid2, lower = start, upper = end, p = p, present = present, aend = aend, Dmatrix = Dmatrix)$value - expect_equal(w1,w2, tolerance = 1E-6) + expect_equal(w1, w2, tolerance = 1E-6) w1 <- SelfControlledCaseSeries:::testEgad(p, present, astart, aend, start, end) w2 <- integrate(wsmall_egad2, lower = start, upper = end, p = p, present = present, astart = astart, aend = aend, Dmatrix = Dmatrix)$value - expect_equal(w1,w2, tolerance = 1E-6) + expect_equal(w1, w2, tolerance = 1E-6) w1 <- SelfControlledCaseSeries:::testEgid(p, present, astart, aend, start, end) w2 <- integrate(wsmall_egid2, lower = start, upper = end, p = p, present = present, aend = aend, Dmatrix = Dmatrix)$value - expect_equal(w1,w2, tolerance = 1E-6) - + expect_equal(w1, w2, tolerance = 1E-6) })