diff --git a/DESCRIPTION b/DESCRIPTION index ca45558..18b1a63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: SelfControlledCaseSeries Type: Package Title: Self-Controlled Case Series Version: 3.3.0 -Date: 2022-04-04 +Date: 2022-05-25 Authors@R: c( person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut", "cre")), person("Patrick", "Ryan", role = c("aut")), diff --git a/R/Analyses.R b/R/Analyses.R index 42f921b..0a026aa 100644 --- a/R/Analyses.R +++ b/R/Analyses.R @@ -65,10 +65,12 @@ createSccsAnalysis <- function(analysisId = 1, checkmate::assertClass(createScriIntervalDataArgs, "args", null.ok = TRUE, add = errorMessages) checkmate::assertClass(fitSccsModelArgs, "args", add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - if (toupper(design) == "SCCS" && is.null(createSccsIntervalDataArgs)) + if (toupper(design) == "SCCS" && is.null(createSccsIntervalDataArgs)) { stop("Must provide createSccsIntervalDataArgs argument when design = 'SCCS'") - if (toupper(design) == "SCRI" && is.null(createScriIntervalDataArgs)) + } + if (toupper(design) == "SCRI" && is.null(createScriIntervalDataArgs)) { stop("Must provide createScriIntervalDataArgs argument when design = 'SCRI'") + } analysis <- list() for (name in names(formals(createSccsAnalysis))) { analysis[[name]] <- get(name) @@ -132,10 +134,12 @@ loadSccsAnalysisList <- function(file) { #' @export createExposureOutcome <- function(exposureId, outcomeId, ...) { errorMessages <- checkmate::makeAssertCollection() - if (!is.list(exposureId)) + if (!is.list(exposureId)) { checkmate::assertInt(exposureId, add = errorMessages) - if (!is.list(outcomeId)) + } + if (!is.list(outcomeId)) { checkmate::assertInt(outcomeId, add = errorMessages) + } checkmate::reportAssertions(collection = errorMessages) exposureOutcome <- list(...) diff --git a/R/CovariateSettings.R b/R/CovariateSettings.R index 6745f47..0f55230 100644 --- a/R/CovariateSettings.R +++ b/R/CovariateSettings.R @@ -87,8 +87,9 @@ createEraCovariateSettings <- function(includeEraIds = NULL, checkmate::assertLogical(allowRegularization, len = 1, add = errorMessages) checkmate::assertLogical(profileLikelihood, len = 1, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - if (allowRegularization && profileLikelihood) + if (allowRegularization && profileLikelihood) { stop("Cannot profile the likelihood of regularized covariates") + } if (!grepl("start$|end$", startAnchor, ignore.case = TRUE)) { stop("startAnchor should have value 'era start' or 'era end'") } @@ -98,8 +99,9 @@ createEraCovariateSettings <- function(includeEraIds = NULL, isEnd <- function(anchor) { return(grepl("end$", anchor, ignore.case = TRUE)) } - if (end < start && !isEnd(endAnchor)) + if (end < start && !isEnd(endAnchor)) { stop("End day always precedes start day. Either pick a later end day, or set endAnchor to 'era end'.") + } # Make sure string is exact: if (isEnd(startAnchor)) { @@ -315,8 +317,9 @@ createControlIntervalSettings <- function(includeEraIds = NULL, isEnd <- function(anchor) { return(grepl("end$", anchor, ignore.case = TRUE)) } - if (end < start && !isEnd(endAnchor)) + if (end < start && !isEnd(endAnchor)) { stop("End day always precedes start day. Either pick a later end day, or set endAnchor to 'era end'.") + } # Make sure string is exact: if (isEnd(startAnchor)) { @@ -329,17 +332,19 @@ createControlIntervalSettings <- function(includeEraIds = NULL, } else { endAnchor <- "era start" } - analysis <- createEraCovariateSettings(includeEraIds = includeEraIds, - excludeEraIds = excludeEraIds, - label = "Control interval", - stratifyById = FALSE, - start = start, - startAnchor = startAnchor, - end = end, - endAnchor = endAnchor, - firstOccurrenceOnly = firstOccurrenceOnly, - splitPoints = c(), - allowRegularization = FALSE) + analysis <- createEraCovariateSettings( + includeEraIds = includeEraIds, + excludeEraIds = excludeEraIds, + label = "Control interval", + stratifyById = FALSE, + start = start, + startAnchor = startAnchor, + end = end, + endAnchor = endAnchor, + firstOccurrenceOnly = firstOccurrenceOnly, + splitPoints = c(), + allowRegularization = FALSE + ) analysis$isControlInterval <- TRUE class(analysis) <- "ControlIntervalSettings" return(analysis) diff --git a/R/DataConversion.R b/R/DataConversion.R index c14f116..b271a74 100644 --- a/R/DataConversion.R +++ b/R/DataConversion.R @@ -62,8 +62,9 @@ createSccsIntervalData <- function(studyPopulation, checkmate::assertClass(sccsData, "SccsData", add = errorMessages) checkmate::assertList(studyPopulation, min.len = 1, add = errorMessages) if (is.list(eraCovariateSettings) && class(eraCovariateSettings) != "EraCovariateSettings") { - for (i in 1:length(eraCovariateSettings)) + for (i in 1:length(eraCovariateSettings)) { checkmate::assertClass(eraCovariateSettings[[i]], "EraCovariateSettings", add = errorMessages) + } } else { checkmate::assertClass(eraCovariateSettings, "EraCovariateSettings", add = errorMessages) } @@ -92,8 +93,8 @@ createSccsIntervalData <- function(studyPopulation, timeCovariateCases <- numeric(0) if (!is.null(ageCovariateSettings) || - !is.null(seasonalityCovariateSettings) || - !is.null(calendarTimeCovariateSettings)) { + !is.null(seasonalityCovariateSettings) || + !is.null(calendarTimeCovariateSettings)) { if (nrow(studyPopulation$cases) > minCasesForTimeCovariates) { set.seed(0) timeCovariateCases <- sample(studyPopulation$cases$caseId, minCasesForTimeCovariates, replace = FALSE) @@ -103,9 +104,11 @@ createSccsIntervalData <- function(studyPopulation, settings <- list() settings$metaData <- list() settings$covariateRef <- tibble() - settings <- addEventDependentObservationSettings(settings, - eventDependentObservation, - studyPopulation) + settings <- addEventDependentObservationSettings( + settings, + eventDependentObservation, + studyPopulation + ) if (eventDependentObservation && settings$metaData$censorModel$model %in% c(1, 3) && !is.null(ageCovariateSettings)) { warning("Optimal censoring model adjusts for age, so removing age as separate covariate.") ageCovariateSettings <- NULL @@ -125,23 +128,25 @@ createSccsIntervalData <- function(studyPopulation, eras <- sccsData$eras %>% arrange(.data$caseId) - data <- convertToSccs(cases = cases, - outcomes = outcomes, - eras = eras, - includeAge = !is.null(ageCovariateSettings), - ageOffset = settings$ageOffset, - ageDesignMatrix = settings$ageDesignMatrix, - includeSeason = !is.null(seasonalityCovariateSettings), - seasonDesignMatrix = settings$seasonDesignMatrix, - includeCalendarTime = !is.null(calendarTimeCovariateSettings), - calendarTimeOffset = settings$calendarTimeOffset, - calendarTimeDesignMatrix = settings$calendarTimeDesignMatrix, - timeCovariateCases = timeCovariateCases, - covariateSettingsList = settings$covariateSettingsList, - eventDependentObservation = eventDependentObservation, - censorModel = settings$censorModel, - scri = FALSE, - controlIntervalId = 0) + data <- convertToSccs( + cases = cases, + outcomes = outcomes, + eras = eras, + includeAge = !is.null(ageCovariateSettings), + ageOffset = settings$ageOffset, + ageDesignMatrix = settings$ageDesignMatrix, + includeSeason = !is.null(seasonalityCovariateSettings), + seasonDesignMatrix = settings$seasonDesignMatrix, + includeCalendarTime = !is.null(calendarTimeCovariateSettings), + calendarTimeOffset = settings$calendarTimeOffset, + calendarTimeDesignMatrix = settings$calendarTimeDesignMatrix, + timeCovariateCases = timeCovariateCases, + covariateSettingsList = settings$covariateSettingsList, + eventDependentObservation = eventDependentObservation, + censorModel = settings$censorModel, + scri = FALSE, + controlIntervalId = 0 + ) if (is.null(data$outcomes) || is.null(data$covariates)) { warning("Conversion resulted in empty data set. Perhaps no one with the outcome had any exposure of interest?") @@ -150,14 +155,12 @@ createSccsIntervalData <- function(studyPopulation, if (nrow(settings$covariateRef) > 0) { data$covariateRef <- settings$covariateRef } - } else { metaData$covariateStatistics <- collect(data$covariateStatistics) metaData$daysObserved <- pull(data$observedDays, .data$observedDays) data$covariateStatistics <- NULL data$observedDays <- NULL data$covariateRef <- settings$covariateRef - } attr(data, "metaData") <- metaData class(data) <- "SccsIntervalData" @@ -169,19 +172,27 @@ createSccsIntervalData <- function(studyPopulation, } createEmptySccsIntervalData <- function() { - sccsIntervalData <- Andromeda::andromeda(outcomes = tibble(rowId = 1, - stratumId = 1, - time = 1, - y = 1)[-1, ], - covariates = tibble(rowId = 1, - stratumId = 1, - covariateId = 1, - covariateValue = 1)[-1, ], - covariateRef = tibble(covariateId = 1, - covariateName = "", - originalEraId = 1, - originalEraName = "", - originalEraType = "")[-1, ]) + sccsIntervalData <- Andromeda::andromeda( + outcomes = tibble( + rowId = 1, + stratumId = 1, + time = 1, + y = 1 + )[-1, ], + covariates = tibble( + rowId = 1, + stratumId = 1, + covariateId = 1, + covariateValue = 1 + )[-1, ], + covariateRef = tibble( + covariateId = 1, + covariateName = "", + originalEraId = 1, + originalEraName = "", + originalEraType = "" + )[-1, ] + ) return(sccsIntervalData) } @@ -210,21 +221,28 @@ addAgeSettings <- function(settings, } settings$ageOffset <- ageKnots[1] ageDesignMatrix <- splines::bs(ageKnots[1]:ageKnots[length(ageKnots)], - knots = ageKnots[2:(length(ageKnots) - 1)], - Boundary.knots = ageKnots[c(1, length(ageKnots))]) + knots = ageKnots[2:(length(ageKnots) - 1)], + Boundary.knots = ageKnots[c(1, length(ageKnots))] + ) # Fixing first beta to zero, so dropping first column of design matrix: settings$ageDesignMatrix <- ageDesignMatrix[, 2:ncol(ageDesignMatrix)] - splineCovariateRef <- tibble(covariateId = 100:(100 + length(ageKnots) - 1), - covariateName = paste("Age spline component", - 1:(length(ageKnots))), - originalEraId = 0, - originalEraType = "", - originalEraName = "") + splineCovariateRef <- tibble( + covariateId = 100:(100 + length(ageKnots) - 1), + covariateName = paste( + "Age spline component", + 1:(length(ageKnots)) + ), + originalEraId = 0, + originalEraType = "", + originalEraName = "" + ) settings$covariateRef <- bind_rows(settings$covariateRef, splineCovariateRef) - age <- list(ageKnots = ageKnots, - covariateIds = splineCovariateRef$covariateId, - allowRegularization = ageCovariateSettings$allowRegularization, - computeConfidenceIntervals = ageCovariateSettings$computeConfidenceIntervals) + age <- list( + ageKnots = ageKnots, + covariateIds = splineCovariateRef$covariateId, + allowRegularization = ageCovariateSettings$allowRegularization, + computeConfidenceIntervals = ageCovariateSettings$computeConfidenceIntervals + ) settings$metaData$age <- age return(settings) } @@ -243,17 +261,23 @@ addSeasonalitySettings <- function(settings, seasonalityCovariateSettings, sccsD seasonDesignMatrix <- cyclicSplineDesign(1:12, knots = seasonKnots) # Fixing first beta to zero, so dropping first column of design matrix: settings$seasonDesignMatrix <- seasonDesignMatrix[, 2:ncol(seasonDesignMatrix)] - splineCovariateRef <- tibble(covariateId = 200:(200 + length(seasonKnots) - 3), - covariateName = paste("Seasonality spline component", - 1:(length(seasonKnots) - 2)), - originalEraId = 0, - originalEraType = "", - originalEraName = "") + splineCovariateRef <- tibble( + covariateId = 200:(200 + length(seasonKnots) - 3), + covariateName = paste( + "Seasonality spline component", + 1:(length(seasonKnots) - 2) + ), + originalEraId = 0, + originalEraType = "", + originalEraName = "" + ) settings$covariateRef <- bind_rows(settings$covariateRef, splineCovariateRef) - seasonality <- list(seasonKnots = seasonKnots, - covariateIds = splineCovariateRef$covariateId, - allowRegularization = seasonalityCovariateSettings$allowRegularization, - computeConfidenceIntervals = seasonalityCovariateSettings$computeConfidenceIntervals) + seasonality <- list( + seasonKnots = seasonKnots, + covariateIds = splineCovariateRef$covariateId, + allowRegularization = seasonalityCovariateSettings$allowRegularization, + computeConfidenceIntervals = seasonalityCovariateSettings$computeConfidenceIntervals + ) settings$metaData$seasonality <- seasonality } return(settings) @@ -270,14 +294,14 @@ addCalendarTimeSettings <- function(settings, if (length(calendarTimeCovariateSettings$calendarTimeKnots) == 1) { observationPeriodCounts <- computeObservedPerMonth(studyPopulation) %>% arrange(.data$month) %>% - mutate(cumCount = cumsum(.data$observationPeriodCount )) + mutate(cumCount = cumsum(.data$observationPeriodCount)) total <- observationPeriodCounts %>% tail(1) %>% pull(.data$cumCount) - cutoffs <- total * seq(0.01, 0.99, length.out = calendarTimeCovariateSettings$calendarTimeKnots) - calendarTimeKnots = rep(0, calendarTimeCovariateSettings$calendarTimeKnots) + cutoffs <- total * seq(0.01, 0.99, length.out = calendarTimeCovariateSettings$calendarTimeKnots) + calendarTimeKnots <- rep(0, calendarTimeCovariateSettings$calendarTimeKnots) for (i in 1:calendarTimeCovariateSettings$calendarTimeKnots) { calendarTimeKnots[i] <- min(observationPeriodCounts$month[observationPeriodCounts$cumCount >= cutoffs[i]]) } @@ -293,37 +317,46 @@ addCalendarTimeSettings <- function(settings, } settings$calendarTimeOffset <- calendarTimeKnots[1] calendarTimeDesignMatrix <- splines::bs(calendarTimeKnots[1]:calendarTimeKnots[length(calendarTimeKnots)], - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) + knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], + Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))] + ) # Fixing first beta to zero, so dropping first column of design matrix: settings$calendarTimeDesignMatrix <- calendarTimeDesignMatrix[, 2:ncol(calendarTimeDesignMatrix)] - splineCovariateRef <- tibble(covariateId = 300:(300 + length(calendarTimeKnots) - 1), - covariateName = paste("Calendar time spline component", - 1:(length(calendarTimeKnots))), - originalEraId = 0, - originalEraType = "", - originalEraName = "") + splineCovariateRef <- tibble( + covariateId = 300:(300 + length(calendarTimeKnots) - 1), + covariateName = paste( + "Calendar time spline component", + 1:(length(calendarTimeKnots)) + ), + originalEraId = 0, + originalEraType = "", + originalEraName = "" + ) settings$covariateRef <- bind_rows(settings$covariateRef, splineCovariateRef) - calendarTime <- list(calendarTimeKnots = calendarTimeKnots, - covariateIds = splineCovariateRef$covariateId, - allowRegularization = calendarTimeCovariateSettings$allowRegularization, - computeConfidenceIntervals = calendarTimeCovariateSettings$computeConfidenceIntervals) + calendarTime <- list( + calendarTimeKnots = calendarTimeKnots, + covariateIds = splineCovariateRef$covariateId, + allowRegularization = calendarTimeCovariateSettings$allowRegularization, + computeConfidenceIntervals = calendarTimeCovariateSettings$computeConfidenceIntervals + ) settings$metaData$calendarTime <- calendarTime return(settings) } } convertDateToMonth <- function(date) { - return(as.numeric(format(date, '%Y')) * 12 + as.numeric(format(date, '%m')) - 1) + return(as.numeric(format(date, "%Y")) * 12 + as.numeric(format(date, "%m")) - 1) } convertMonthToStartDate <- function(month) { year <- floor(month / 12) month <- floor(month %% 12) + 1 - return(as.Date(sprintf("%s-%s-%s", - year, - month, - 1))) + return(as.Date(sprintf( + "%s-%s-%s", + year, + month, + 1 + ))) } convertMonthToEndDate <- function(month) { @@ -331,17 +364,21 @@ convertMonthToEndDate <- function(month) { month <- floor(month %% 12) + 1 year <- if_else(month == 12, year + 1, year) month <- if_else(month == 12, 1, month + 1) - return(as.Date(sprintf("%s-%s-%s", - year, - month, - 1)) - 1) + return(as.Date(sprintf( + "%s-%s-%s", + year, + month, + 1 + )) - 1) } computeObservedPerMonth <- function(studyPopulation) { observationPeriods <- studyPopulation$cases %>% mutate(endDate = .data$startDate + .data$endDay) %>% - mutate(startMonth = convertDateToMonth(.data$startDate), - endMonth = convertDateToMonth(.data$endDate) + 1) %>% + mutate( + startMonth = convertDateToMonth(.data$startDate), + endMonth = convertDateToMonth(.data$endDate) + 1 + ) %>% select(.data$startMonth, .data$endMonth) months <- full_join( @@ -353,20 +390,27 @@ computeObservedPerMonth <- function(studyPopulation) { group_by(.data$endMonth) %>% summarise(endCount = n()) %>% rename(month = .data$endMonth), - by = "month") %>% - mutate(startCount = ifelse(is.na(.data$startCount), 0, .data$startCount), - endCount = ifelse(is.na(.data$endCount), 0, .data$endCount)) + by = "month" + ) %>% + mutate( + startCount = ifelse(is.na(.data$startCount), 0, .data$startCount), + endCount = ifelse(is.na(.data$endCount), 0, .data$endCount) + ) # Adding months with no starts and ends: months <- months %>% full_join(tibble(month = min(months$month):max(months$month)), by = "month") %>% - mutate(startCount = if_else(is.na(.data$startCount), 0, .data$startCount), - endCount = if_else(is.na(.data$endCount), 0, .data$endCount)) + mutate( + startCount = if_else(is.na(.data$startCount), 0, .data$startCount), + endCount = if_else(is.na(.data$endCount), 0, .data$endCount) + ) months <- months %>% arrange(.data$month) %>% - mutate(cumStarts = cumsum(.data$startCount), - cumEnds = cumsum(.data$endCount)) %>% + mutate( + cumStarts = cumsum(.data$startCount), + cumEnds = cumsum(.data$endCount) + ) %>% mutate(observationPeriodCount = .data$cumStarts - .data$cumEnds) %>% select(.data$month, .data$observationPeriodCount) %>% head(-1) @@ -380,15 +424,16 @@ addEventDependentObservationSettings <- function(settings, if (!eventDependentObservation) { settings$censorModel <- list(model = 0, p = c(0)) } else { - data <- studyPopulation$outcomes %>% group_by(.data$caseId) %>% summarise(outcomeDay = min(.data$outcomeDay)) %>% inner_join(studyPopulation$cases, by = "caseId") %>% - transmute(astart = .data$ageInDays, - aend = .data$ageInDays + .data$endDay + 1, - aevent = .data$ageInDays + .data$outcomeDay + 1, - present = .data$noninformativeEndCensor == 1) + transmute( + astart = .data$ageInDays, + aend = .data$ageInDays + .data$endDay + 1, + aevent = .data$ageInDays + .data$outcomeDay + 1, + present = .data$noninformativeEndCensor == 1 + ) settings$censorModel <- fitModelsAndPickBest(data) settings$metaData$censorModel <- settings$censorModel @@ -429,12 +474,14 @@ addEraCovariateSettings <- function(settings, eraCovariateSettings, sccsData) { if (!covariateSettings$stratifyById) { # Create a single output ID covariateSettings$outputIds <- as.matrix(outputId) - newCovariateRef <- tibble(covariateId = outputId, - covariateName = covariateSettings$label, - originalEraId = 0, - originalEraType = "", - originalEraName = "", - isControlInterval = covariateSettings$isControlInterval) + newCovariateRef <- tibble( + covariateId = outputId, + covariateName = covariateSettings$label, + originalEraId = 0, + originalEraType = "", + originalEraName = "", + isControlInterval = covariateSettings$isControlInterval + ) settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) outputId <- outputId + 1 } else { @@ -447,16 +494,21 @@ addEraCovariateSettings <- function(settings, eraCovariateSettings, sccsData) { warning(paste0("Could not find era with ID ", covariateSettings$eraIds, " in data")) } else { varNames <- varNames %>% - transmute(originalEraId = .data$eraId, - originalEraType = .data$eraType, - originalEraName = .data$eraName, - covariateName = paste(covariateSettings$label, - .data$eraName, - sep = ": "), - isControlInterval = FALSE) - - newCovariateRef <- tibble(covariateId = outputIds, - originalEraId = covariateSettings$eraIds) %>% + transmute( + originalEraId = .data$eraId, + originalEraType = .data$eraType, + originalEraName = .data$eraName, + covariateName = paste(covariateSettings$label, + .data$eraName, + sep = ": " + ), + isControlInterval = FALSE + ) + + newCovariateRef <- tibble( + covariateId = outputIds, + originalEraId = covariateSettings$eraIds + ) %>% inner_join(varNames, by = "originalEraId") settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) } @@ -471,43 +523,55 @@ addEraCovariateSettings <- function(settings, eraCovariateSettings, sccsData) { varNames <- paste(varNames, " day ", startDays, "-", c(endDays[1:length(endDays) - 1], "")) # covariateSettings$outputIds <- matrix(outputIds, ncol = 1) covariateSettings$outputIds <- matrix(outputIds, - ncol = length(covariateSettings$splitPoints) + 1, - byrow = TRUE) - newCovariateRef <- tibble(covariateId = outputIds, - covariateName = varNames, - originaEraId = 0, - originalEraType = "", - originalEraName = "", - isControlInterval = FALSE) + ncol = length(covariateSettings$splitPoints) + 1, + byrow = TRUE + ) + newCovariateRef <- tibble( + covariateId = outputIds, + covariateName = varNames, + originaEraId = 0, + originalEraType = "", + originalEraName = "", + isControlInterval = FALSE + ) settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) } else { outputIds <- outputId:(outputId + (length(covariateSettings$splitPoint) + 1) * length(covariateSettings$eraIds) - 1) outputId <- max(outputIds) + 1 covariateSettings$outputIds <- matrix(outputIds, - ncol = length(covariateSettings$splitPoints) + 1, - byrow = TRUE) + ncol = length(covariateSettings$splitPoints) + 1, + byrow = TRUE + ) if (any(covariateSettings$eraIds %in% eraRef$eraId)) { originalEraId <- rep(covariateSettings$eraIds, - each = length(covariateSettings$splitPoints) + 1) - originalEraType <- eraRef$eraType[match(originalEraId, - eraRef$eraId)] - originalEraName <- eraRef$eraName[match(originalEraId, - eraRef$eraId)] + each = length(covariateSettings$splitPoints) + 1 + ) + originalEraType <- eraRef$eraType[match( + originalEraId, + eraRef$eraId + )] + originalEraName <- eraRef$eraName[match( + originalEraId, + eraRef$eraId + )] originalEraName[originalEraName == ""] <- originalEraId[originalEraName == ""] varNames <- paste(covariateSettings$label, ": ", originalEraName, sep = "") varNames <- paste(varNames, - ", day ", - startDays, - "-", - c(endDays[1:length(endDays) - 1], ""), - sep = "") - - newCovariateRef <- tibble(covariateId = outputIds, - covariateName = varNames, - originalEraId = originalEraId, - originalEraType = originalEraType, - originalEraName = originalEraName, - isControlInterval = FALSE) + ", day ", + startDays, + "-", + c(endDays[1:length(endDays) - 1], ""), + sep = "" + ) + + newCovariateRef <- tibble( + covariateId = outputIds, + covariateName = varNames, + originalEraId = originalEraId, + originalEraType = originalEraType, + originalEraName = originalEraName, + isControlInterval = FALSE + ) settings$covariateRef <- bind_rows(settings$covariateRef, newCovariateRef) } } @@ -535,14 +599,17 @@ cyclicSplineDesign <- function(x, knots, ord = 4) { checkmate::assertInt(ord, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) nk <- length(knots) - if (ord < 2) + if (ord < 2) { stop("order too low") - if (nk < ord) + } + if (nk < ord) { stop("too few knots") + } knots <- sort(knots) k1 <- knots[1] - if (min(x) < k1 || max(x) > knots[nk]) + if (min(x) < k1 || max(x) > knots[nk]) { stop("x out of range") + } xc <- knots[nk - ord + 1] knots <- c(k1 - (knots[nk] - knots[(nk - ord + 1):(nk - 1)]), knots) ind <- x > xc diff --git a/R/DataLoadingSaving.R b/R/DataLoadingSaving.R index 77f5a67..ab4d35b 100644 --- a/R/DataLoadingSaving.R +++ b/R/DataLoadingSaving.R @@ -137,16 +137,21 @@ getDbSccsData <- function(connectionDetails, studyEndDate = "", cdmVersion = "5", maxCasesPerOutcome = 0) { - if (studyStartDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyStartDate) == -1) + if (studyStartDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyStartDate) == -1) { stop("Study start date must have format YYYYMMDD") - if (studyEndDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyEndDate) == -1) + } + if (studyEndDate != "" && regexpr("^[12][0-9]{3}[01][0-9][0-3][0-9]$", studyEndDate) == -1) { stop("Study end date must have format YYYYMMDD") - if (cdmVersion == "4") + } + if (cdmVersion == "4") { stop("CDM version 4 is no longer supported") - if (!is.null(exposureIds) && length(exposureIds) > 0 && !is.numeric(exposureIds)) + } + if (!is.null(exposureIds) && length(exposureIds) > 0 && !is.numeric(exposureIds)) { stop("exposureIds must be a (vector of) numeric") - if (useCustomCovariates && !is.null(customCovariateIds) && length(customCovariateIds) > 0 && !is.numeric(customCovariateIds)) + } + if (useCustomCovariates && !is.null(customCovariateIds) && length(customCovariateIds) > 0 && !is.numeric(customCovariateIds)) { stop("customCovariateIds must be a (vector of) numeric") + } if (!is.null(oracleTempSchema) && oracleTempSchema != "") { warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema @@ -160,12 +165,13 @@ getDbSccsData <- function(connectionDetails, } else { hasExposureIds <- TRUE DatabaseConnector::insertTable(conn, - tableName = "#exposure_ids", - data = data.frame(concept_id = as.integer(exposureIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema) + tableName = "#exposure_ids", + data = data.frame(concept_id = as.integer(exposureIds)), + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = TRUE, + tempEmulationSchema = tempEmulationSchema + ) } if (!useCustomCovariates || is.null(customCovariateIds) || length(customCovariateIds) == 0) { @@ -173,61 +179,66 @@ getDbSccsData <- function(connectionDetails, } else { hasCustomCovariateIds <- TRUE DatabaseConnector::insertTable(conn, - tableName = "#custom_cov_ids", - data = data.frame(concept_id = as.integer(customCovariateIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema) + tableName = "#custom_cov_ids", + data = data.frame(concept_id = as.integer(customCovariateIds)), + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = TRUE, + tempEmulationSchema = tempEmulationSchema + ) } ParallelLogger::logInfo("Selecting outcomes") sql <- SqlRender::loadRenderTranslateSql("SelectOutcomes.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - outcome_database_schema = outcomeDatabaseSchema, - outcome_table = outcomeTable, - outcome_concept_ids = outcomeIds, - use_nesting_cohort = useNestingCohort, - nesting_cohort_database_schema = nestingCohortDatabaseSchema, - nesting_cohort_table = nestingCohortTable, - nesting_cohort_id = nestingCohortId, - study_start_date = studyStartDate, - study_end_date = studyEndDate) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + outcome_database_schema = outcomeDatabaseSchema, + outcome_table = outcomeTable, + outcome_concept_ids = outcomeIds, + use_nesting_cohort = useNestingCohort, + nesting_cohort_database_schema = nestingCohortDatabaseSchema, + nesting_cohort_table = nestingCohortTable, + nesting_cohort_id = nestingCohortId, + study_start_date = studyStartDate, + study_end_date = studyEndDate + ) DatabaseConnector::executeSql(conn, sql) ParallelLogger::logInfo("Creating cases") sql <- SqlRender::loadRenderTranslateSql("CreateCases.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - use_nesting_cohort = useNestingCohort, - nesting_cohort_database_schema = nestingCohortDatabaseSchema, - nesting_cohort_table = nestingCohortTable, - nesting_cohort_id = nestingCohortId, - study_start_date = studyStartDate, - study_end_date = studyEndDate) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + use_nesting_cohort = useNestingCohort, + nesting_cohort_database_schema = nestingCohortDatabaseSchema, + nesting_cohort_table = nestingCohortTable, + nesting_cohort_id = nestingCohortId, + study_start_date = studyStartDate, + study_end_date = studyEndDate + ) DatabaseConnector::executeSql(conn, sql) DatabaseConnector::insertTable(conn, - tableName = "#outcome_ids", - data = data.frame(outcome_id = as.integer(outcomeIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - tempEmulationSchema = tempEmulationSchema) + tableName = "#outcome_ids", + data = data.frame(outcome_id = as.integer(outcomeIds)), + dropTableIfExists = TRUE, + createTable = TRUE, + tempTable = TRUE, + tempEmulationSchema = tempEmulationSchema + ) ParallelLogger::logInfo("Counting outcomes") sql <- SqlRender::loadRenderTranslateSql("CountOutcomes.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - use_nesting_cohort = useNestingCohort, - study_start_date = studyStartDate, - study_end_date = studyEndDate) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + use_nesting_cohort = useNestingCohort, + study_start_date = studyStartDate, + study_end_date = studyEndDate + ) DatabaseConnector::executeSql(conn, sql) sql <- "SELECT * FROM #counts;" @@ -246,49 +257,54 @@ getDbSccsData <- function(connectionDetails, } if (sampledCases) { sql <- SqlRender::loadRenderTranslateSql("SampleCases.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - max_cases_per_outcome = maxCasesPerOutcome) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + max_cases_per_outcome = maxCasesPerOutcome + ) DatabaseConnector::executeSql(conn, sql) } } ParallelLogger::logInfo("Creating eras") sql <- SqlRender::loadRenderTranslateSql("CreateEras.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - cdm_database_schema = cdmDatabaseSchema, - outcome_database_schema = outcomeDatabaseSchema, - outcome_table = outcomeTable, - outcome_concept_ids = outcomeIds, - exposure_database_schema = exposureDatabaseSchema, - exposure_table = exposureTable, - use_nesting_cohort = useNestingCohort, - use_custom_covariates = useCustomCovariates, - custom_covariate_database_schema = customCovariateDatabaseSchema, - custom_covariate_table = customCovariateTable, - has_exposure_ids = hasExposureIds, - has_custom_covariate_ids = hasCustomCovariateIds, - delete_covariates_small_count = deleteCovariatesSmallCount, - study_start_date = studyStartDate, - study_end_date = studyEndDate, - sampled_cases = sampledCases) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + cdm_database_schema = cdmDatabaseSchema, + outcome_database_schema = outcomeDatabaseSchema, + outcome_table = outcomeTable, + outcome_concept_ids = outcomeIds, + exposure_database_schema = exposureDatabaseSchema, + exposure_table = exposureTable, + use_nesting_cohort = useNestingCohort, + use_custom_covariates = useCustomCovariates, + custom_covariate_database_schema = customCovariateDatabaseSchema, + custom_covariate_table = customCovariateTable, + has_exposure_ids = hasExposureIds, + has_custom_covariate_ids = hasCustomCovariateIds, + delete_covariates_small_count = deleteCovariatesSmallCount, + study_start_date = studyStartDate, + study_end_date = studyEndDate, + sampled_cases = sampledCases + ) DatabaseConnector::executeSql(conn, sql) ParallelLogger::logInfo("Fetching data from server") sccsData <- Andromeda::andromeda() sql <- SqlRender::loadRenderTranslateSql("QueryCases.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - sampled_cases = sampledCases) - DatabaseConnector::querySqlToAndromeda(connection = conn, - sql = sql, - andromeda = sccsData, - andromedaTableName = "cases", - snakeCaseToCamelCase = TRUE) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + sampled_cases = sampledCases + ) + DatabaseConnector::querySqlToAndromeda( + connection = conn, + sql = sql, + andromeda = sccsData, + andromedaTableName = "cases", + snakeCaseToCamelCase = TRUE + ) ParallelLogger::logDebug("Fetched ", sccsData$cases %>% count() %>% pull(), " cases from server") @@ -300,50 +316,60 @@ getDbSccsData <- function(connectionDetails, if (countNegativeAges > 0) { warning("There are ", countNegativeAges, " cases with negative ages. Setting their starting age to 0. Please review your data.") sccsData$cases <- sccsData$cases %>% - mutate(ageInDays = case_when(.data$ageInDays < 0 ~ 0, - TRUE ~ .data$ageInDays)) + mutate(ageInDays = case_when( + .data$ageInDays < 0 ~ 0, + TRUE ~ .data$ageInDays + )) } sql <- SqlRender::loadRenderTranslateSql("QueryEras.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema) - DatabaseConnector::querySqlToAndromeda(connection = conn, - sql = sql, - andromeda = sccsData, - andromedaTableName = "eras", - snakeCaseToCamelCase = TRUE) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema + ) + DatabaseConnector::querySqlToAndromeda( + connection = conn, + sql = sql, + andromeda = sccsData, + andromedaTableName = "eras", + snakeCaseToCamelCase = TRUE + ) sql <- "SELECT era_type, era_id, era_name FROM #era_ref" sql <- SqlRender::translate(sql = sql, targetDialect = connectionDetails$dbms, tempEmulationSchema = tempEmulationSchema) - DatabaseConnector::querySqlToAndromeda(connection = conn, - sql = sql, - andromeda = sccsData, - andromedaTableName = "eraRef", - snakeCaseToCamelCase = TRUE) + DatabaseConnector::querySqlToAndromeda( + connection = conn, + sql = sql, + andromeda = sccsData, + andromedaTableName = "eraRef", + snakeCaseToCamelCase = TRUE + ) # Delete temp tables sql <- SqlRender::loadRenderTranslateSql("RemoveTempTables.sql", - packageName = "SelfControlledCaseSeries", - dbms = connectionDetails$dbms, - tempEmulationSchema = tempEmulationSchema, - study_start_date = studyStartDate, - study_end_date = studyEndDate, - sampled_cases = sampledCases, - has_exposure_ids = hasExposureIds, - use_nesting_cohort = useNestingCohort, - has_custom_covariate_ids = hasCustomCovariateIds) + packageName = "SelfControlledCaseSeries", + dbms = connectionDetails$dbms, + tempEmulationSchema = tempEmulationSchema, + study_start_date = studyStartDate, + study_end_date = studyEndDate, + sampled_cases = sampledCases, + has_exposure_ids = hasExposureIds, + use_nesting_cohort = useNestingCohort, + has_custom_covariate_ids = hasCustomCovariateIds + ) DatabaseConnector::executeSql(conn, sql, progressBar = FALSE, reportOverallTime = FALSE) - if (sampledCases) { + if (sampledCases) { sampledCounts <- sccsData$eras %>% filter(.data$eraType == "hoi") %>% inner_join(sccsData$cases, by = "caseId") %>% group_by(.data$eraId) %>% - summarise(outcomeSubjects = n_distinct(.data$personId), - outcomeEvents = count(), - outcomeObsPeriods = n_distinct(.data$observationPeriodId), - .groups = "drop_last") %>% + summarise( + outcomeSubjects = n_distinct(.data$personId), + outcomeEvents = count(), + outcomeObsPeriods = n_distinct(.data$observationPeriodId), + .groups = "drop_last" + ) %>% rename(outcomeId = .data$eraId) %>% mutate(description = "Random sample") %>% collect() @@ -353,9 +379,11 @@ getDbSccsData <- function(connectionDetails, } } - attr(sccsData, "metaData") <- list(exposureIds = exposureIds, - outcomeIds = outcomeIds, - attrition = outcomeCounts) + attr(sccsData, "metaData") <- list( + exposureIds = exposureIds, + outcomeIds = outcomeIds, + attrition = outcomeCounts + ) class(sccsData) <- "SccsData" attr(class(sccsData), "package") <- "SelfControlledCaseSeries" diff --git a/R/Diagnostics.R b/R/Diagnostics.R index b966453..ab67e9e 100644 --- a/R/Diagnostics.R +++ b/R/Diagnostics.R @@ -17,15 +17,17 @@ computeOutcomeRatePerMonth <- function(studyPopulation) { observationPeriodCounts <- computeObservedPerMonth(studyPopulation) outcomeCounts <- studyPopulation$outcomes %>% - inner_join(studyPopulation$cases , by = "caseId") %>% + inner_join(studyPopulation$cases, by = "caseId") %>% transmute(month = convertDateToMonth(.data$startDate + .data$outcomeDay)) %>% group_by(.data$month) %>% summarise(outcomeCount = n()) data <- observationPeriodCounts %>% inner_join(outcomeCounts, by = "month") %>% mutate(rate = .data$outcomeCount / .data$observationPeriodCount) %>% - mutate(monthStartDate = convertMonthToStartDate(.data$month), - monthEndDate = convertMonthToEndDate(.data$month)) + mutate( + monthStartDate = convertMonthToStartDate(.data$month), + monthEndDate = convertMonthToEndDate(.data$month) + ) return(data) } @@ -41,14 +43,14 @@ adjustOutcomeRatePerMonth <- function(data, sccsModel) { calendarTime[calendarTime < calendarTimeKnots[1]] <- calendarTimeKnots[1] calendarTime[calendarTime > calendarTimeKnots[length(calendarTimeKnots)]] <- calendarTimeKnots[length(calendarTimeKnots)] calendarTimeDesignMatrix <- splines::bs(calendarTime, - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) + knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], + Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))] + ) logRr <- apply(calendarTimeDesignMatrix %*% splineCoefs, 1, sum) logRr <- logRr - mean(logRr) data$calendarTimeRr <- exp(logRr) data <- data %>% mutate(adjustedRate = .data$adjustedRate / .data$calendarTimeRr) - } if (hasSeasonality(sccsModel)) { @@ -63,9 +65,12 @@ adjustOutcomeRatePerMonth <- function(data, sccsModel) { data <- data %>% mutate(monthOfYear = .data$month %% 12 + 1) %>% - inner_join(tibble(monthOfYear = season, - seasonRr = exp(logRr)), - by = "monthOfYear") + inner_join(tibble( + monthOfYear = season, + seasonRr = exp(logRr) + ), + by = "monthOfYear" + ) data <- data %>% mutate(adjustedRate = .data$adjustedRate / .data$seasonRr) @@ -105,17 +110,19 @@ computeTimeStability <- function(studyPopulation, sccsModel = NULL, maxRatio = 1 data <- adjustOutcomeRatePerMonth(data, sccsModel) } computeTwoSidedP <- function(observed, expected) { - pUpperBound = 1 - ppois(observed, expected * maxRatio, lower.tail = TRUE) - pLowerBound = 1 - ppois(observed, expected / maxRatio, lower.tail = FALSE) + pUpperBound <- 1 - ppois(observed, expected * maxRatio, lower.tail = TRUE) + pLowerBound <- 1 - ppois(observed, expected / maxRatio, lower.tail = FALSE) return(min(1, 2 * pmin(pUpperBound, pLowerBound))) } # Season and calendar time splines lack intercept, so need to compute expected count in indirect way: - meanAdjustedRate <- sum(data$adjustedRate * data$observationPeriodCount ) / sum(data$observationPeriodCount) + meanAdjustedRate <- sum(data$adjustedRate * data$observationPeriodCount) / sum(data$observationPeriodCount) data <- data %>% mutate(expected = .data$outcomeCount * meanAdjustedRate / .data$adjustedRate) %>% - mutate(p = computeTwoSidedP(.data$outcomeCount, .data$expected), - alpha = !!alpha / n()) %>% + mutate( + p = computeTwoSidedP(.data$outcomeCount, .data$expected), + alpha = !!alpha / n() + ) %>% mutate(stable = .data$p >= .data$alpha) # print(data[50:100, ], n = 35) # sum(!data$stable) diff --git a/R/EventDepObservation.R b/R/EventDepObservation.R index 22557b3..e032bfe 100644 --- a/R/EventDepObservation.R +++ b/R/EventDepObservation.R @@ -4,15 +4,14 @@ # One major modification: removed possibility to specify covariates for censoring models fitModelsAndPickBest <- function(data) { - - fitCensorModel <- function(model, data){ + fitCensorModel <- function(model, data) { # This function gives a matrix created by multiplying # (Pointwise multiplication) a Matrix M by each column of Matrix S - Yproduct <- function(S, M){ - product <- matrix(NA, nrow(S), ncol(S)*ncol(M)) + Yproduct <- function(S, M) { + product <- matrix(NA, nrow(S), ncol(S) * ncol(M)) for (i in 1:ncol(S)) { - product[,(1 + ncol(M)*(i-1)):(ncol(M)*i)] <- S[,i]*M + product[, (1 + ncol(M) * (i - 1)):(ncol(M) * i)] <- S[, i] * M } return(product) } @@ -20,7 +19,7 @@ fitModelsAndPickBest <- function(data) { #--------------------------------------------------------# # Exponential- Weibull (Age) mixture Model # #--------------------------------------------------------# - mod_ewad2<-function(p, astart, aevent, aend, present){ + mod_ewad2 <- function(p, astart, aevent, aend, present) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) # @@ -36,24 +35,24 @@ fitModelsAndPickBest <- function(data) { # gamma0 <- Dmatrixstartlog%*%p[((5*(ncol(Dmatrix))) + 1):(7*(ncol(Dmatrix)))] # log(nu(t,y)) gamma0 <- p[6] + p[7] * log(astart) - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu - - lik<- ((1-present)*log(pi0*lamA*exp(-lamA*(aend-aevent))+ - (1-pi0)*nu0*lamB*((aend*lamB)^(nu0-1))*exp(-((aend*lamB)^nu0-(aevent*lamB)^nu0))) + - present *log(pi0*exp(-lamA*(aend-aevent))+ - (1-pi0)*exp(-((aend*lamB)^nu0-(aevent*lamB)^nu0)))) - l<-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) - return (l) + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu + + lik <- ((1 - present) * log(pi0 * lamA * exp(-lamA * (aend - aevent)) + + (1 - pi0) * nu0 * lamB * ((aend * lamB)^(nu0 - 1)) * exp(-((aend * lamB)^nu0 - (aevent * lamB)^nu0))) + + present * log(pi0 * exp(-lamA * (aend - aevent)) + + (1 - pi0) * exp(-((aend * lamB)^nu0 - (aevent * lamB)^nu0)))) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) + return(l) } #--------------------------------------------------------# # Exponential- Weibull (Interval) mixture Model # #--------------------------------------------------------# - mod_ewid2 <-function(p, aevent, aend, present, Dmatrix){ + mod_ewid2 <- function(p, aevent, aend, present, Dmatrix) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) @@ -68,28 +67,27 @@ fitModelsAndPickBest <- function(data) { gamma0 <- p[6] + p[7] * log(aevent) - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu - - int <- aend-aevent - lik<- - - ((1-present)*log(pi0*lamA*exp(-lamA*int)+ - (1-pi0)*nu0*lamB*((int*lamB)^(nu0-1))*exp(-((int*lamB)^nu0))) + + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - present *log(pi0*exp(-lamA*int)+ - (1-pi0)*exp(-((int*lamB)^nu0)))) - l<-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) + int <- aend - aevent + lik <- + ((1 - present) * log(pi0 * lamA * exp(-lamA * int) + + (1 - pi0) * nu0 * lamB * ((int * lamB)^(nu0 - 1)) * exp(-((int * lamB)^nu0))) + + + present * log(pi0 * exp(-lamA * int) + + (1 - pi0) * exp(-((int * lamB)^nu0)))) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) l } #--------------------------------------------------------# # Exponential- Gamma (Age) mixture Model # #--------------------------------------------------------# - mod_egad2<-function(p, astart, aevent, aend, present, Dmatrix){ + mod_egad2 <- function(p, astart, aevent, aend, present, Dmatrix) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) @@ -107,27 +105,27 @@ fitModelsAndPickBest <- function(data) { gamma0 <- p[6] + p[7] * log(astart) - lamA <-exp(-thetaA) # 1/rho in the paper - lamB <-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0 <-exp(gamma0) # nu + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - rate0 <-nu0*lamB + rate0 <- nu0 * lamB - lik<-((1-present)*log(pi0*lamA*exp(-lamA*(aend-aevent))+ - (1-pi0)*dgamma(aend,shape=nu0,rate=rate0)/ifelse(pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F)==0,0.000000001, pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F))) + - present *log(pi0*exp(-lamA*(aend-aevent))+ - (1-pi0)*pgamma(aend,shape=nu0,rate=rate0,lower.tail=F)/ifelse(pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F)==0, 0.000000001, pgamma(aevent,shape=nu0,rate=rate0,lower.tail=F)))) + lik <- ((1 - present) * log(pi0 * lamA * exp(-lamA * (aend - aevent)) + + (1 - pi0) * dgamma(aend, shape = nu0, rate = rate0) / ifelse(pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F) == 0, 0.000000001, pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F))) + + present * log(pi0 * exp(-lamA * (aend - aevent)) + + (1 - pi0) * pgamma(aend, shape = nu0, rate = rate0, lower.tail = F) / ifelse(pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F) == 0, 0.000000001, pgamma(aevent, shape = nu0, rate = rate0, lower.tail = F)))) - l <-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) l } #--------------------------------------------------------# # Exponential- Gamma (Interval) mixture Model # #--------------------------------------------------------# - mod_egid2<-function(p, aevent, aend, present, Dmatrix){ + mod_egid2 <- function(p, aevent, aend, present, Dmatrix) { # Dmatrixevent <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(aevent))) # Dmatrixeventlog <- cbind(Dmatrix, Yproduct(Dmatrix, as.matrix(log(aevent)))) @@ -141,52 +139,63 @@ fitModelsAndPickBest <- function(data) { # gamma0 <- Dmatrixeventlog%*%p[((5*(ncol(Dmatrix))) + 1):(7*(ncol(Dmatrix)))] # log(nu(t,y)) gamma0 <- p[6] + p[7] * log(aevent) - lamA<-exp(-thetaA) # 1/rho in the paper - lamB<-exp(-thetaB) # 1/mu - pi0 <-exp(eta)/(1+exp(eta)) # pi - nu0<-exp(gamma0) # nu + lamA <- exp(-thetaA) # 1/rho in the paper + lamB <- exp(-thetaB) # 1/mu + pi0 <- exp(eta) / (1 + exp(eta)) # pi + nu0 <- exp(gamma0) # nu - rate0 <-nu0*lamB + rate0 <- nu0 * lamB int <- aend - aevent - lik<-((1-present)*log(pi0*lamA*exp(-lamA*int)+ - (1-pi0)*dgamma(int,shape=nu0,rate=rate0)) + - present *log(pi0*exp(-lamA*int)+ - (1-pi0)*pgamma(int,shape=nu0,rate=rate0,lower.tail=F))) + lik <- ((1 - present) * log(pi0 * lamA * exp(-lamA * int) + + (1 - pi0) * dgamma(int, shape = nu0, rate = rate0)) + + present * log(pi0 * exp(-lamA * int) + + (1 - pi0) * pgamma(int, shape = nu0, rate = rate0, lower.tail = F))) - l<-(-2)*sum(lik) - #writeLines(paste(paste(p, collapse = ","), " L =", l)) + l <- (-2) * sum(lik) + # writeLines(paste(paste(p, collapse = ","), " L =", l)) l } npar <- 7 - p0 <- rep(0.1, times=npar) # inital values - result <- tryCatch({ - if (model == 1){ - fit <- nlm(mod_ewad2, p=p0, astart=data$astart/365.25, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) - } else if (model == 2){ - fit <- nlm(mod_ewid2, p=p0, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) - } else if (model == 3){ - fit <- nlm(mod_egad2, p=p0, astart=data$astart/365.25, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) - } else { - fit <- nlm(mod_egid2, p=p0, aevent=data$aevent/365.25, aend=data$aend/365.25, present=data$present, - hessian = FALSE, iterlim=1000) + p0 <- rep(0.1, times = npar) # inital values + result <- tryCatch( + { + if (model == 1) { + fit <- nlm(mod_ewad2, + p = p0, astart = data$astart / 365.25, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } else if (model == 2) { + fit <- nlm(mod_ewid2, + p = p0, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } else if (model == 3) { + fit <- nlm(mod_egad2, + p = p0, astart = data$astart / 365.25, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } else { + fit <- nlm(mod_egid2, + p = p0, aevent = data$aevent / 365.25, aend = data$aend / 365.25, present = data$present, + hessian = FALSE, iterlim = 1000 + ) + } + list(model = model, p = fit$estimate, aic = 2 * npar + fit$minimum) + }, + error = function(e) { + missing(e) # suppresses R CMD check note + list(model = model, p = rep(0, npar), aic = 999999999) } - list(model = model, p = fit$estimate, aic = 2*npar + fit$minimum) - }, error = function(e) { - missing(e) # suppresses R CMD check note - list(model = model, p = rep(0,npar), aic = 999999999) - }) + ) return(result) } ParallelLogger::logInfo("Fitting censoring models") cluster <- ParallelLogger::makeCluster(4) results <- ParallelLogger::clusterApply(cluster, 1:4, fitCensorModel, data) ParallelLogger::stopCluster(cluster) - for (i in 1:4){ + for (i in 1:4) { if (results[[i]]$aic == 999999999) { if (results[[i]]$model == 1) { warning("Could not fit exponential - Weibull (Age) mixture Model") diff --git a/R/ModelFitting.R b/R/ModelFitting.R index 632fa34..384f4fc 100644 --- a/R/ModelFitting.R +++ b/R/ModelFitting.R @@ -50,22 +50,27 @@ #' @export fitSccsModel <- function(sccsIntervalData, prior = createPrior("laplace", useCrossValidation = TRUE), - control = createControl(cvType = "auto", - selectorType = "byPid", - startingVariance = 0.1, - seed = 1, - resetCoefficients = TRUE, - noiseLevel = "quiet"), + control = createControl( + cvType = "auto", + selectorType = "byPid", + startingVariance = 0.1, + seed = 1, + resetCoefficients = TRUE, + noiseLevel = "quiet" + ), profileGrid = NULL, profileBounds = c(log(0.1), log(10))) { - if (!is.null(profileGrid) && !is.null(profileBounds)) + if (!is.null(profileGrid) && !is.null(profileBounds)) { stop("Specify either profileGrid or profileBounds") + } ParallelLogger::logTrace("Fitting SCCS model") metaData <- attr(sccsIntervalData, "metaData") if (!is.null(metaData$error)) { - result <- list(status = metaData$error, - metaData = metaData) + result <- list( + status = metaData$error, + metaData = metaData + ) class(result) <- "sccsModel" return(result) } @@ -126,16 +131,20 @@ fitSccsModel <- function(sccsIntervalData, prior$exclude <- intersect(nonRegularized, covariateIds) } cyclopsData <- Cyclops::convertToCyclopsData(sccsIntervalData$outcomes, - sccsIntervalData$covariates, - modelType = "cpr", - addIntercept = FALSE, - checkRowIds = FALSE, - quiet = TRUE) - fit <- tryCatch({ - Cyclops::fitCyclopsModel(cyclopsData, prior = prior, control = control) - }, error = function(e) { - e$message - }) + sccsIntervalData$covariates, + modelType = "cpr", + addIntercept = FALSE, + checkRowIds = FALSE, + quiet = TRUE + ) + fit <- tryCatch( + { + Cyclops::fitCyclopsModel(cyclopsData, prior = prior, control = control) + }, + error = function(e) { + e$message + } + ) if (is.character(fit)) { coefficients <- c(0) estimates <- NULL @@ -145,12 +154,14 @@ fitSccsModel <- function(sccsIntervalData, if (!is.null(profileGrid) || !is.null(profileBounds)) { covariateIds <- intersect(needProfile, as.numeric(Cyclops::getCovariateIds(cyclopsData))) getLikelihoodProfile <- function(covariateId) { - logLikelihoodProfile <- Cyclops::getCyclopsProfileLogLikelihood(object = fit, - parm = covariateId, - x = profileGrid, - bounds = profileBounds, - tolerance = 0.1, - includePenalty = TRUE) + logLikelihoodProfile <- Cyclops::getCyclopsProfileLogLikelihood( + object = fit, + parm = covariateId, + x = profileGrid, + bounds = profileBounds, + tolerance = 0.1, + includePenalty = TRUE + ) return(logLikelihoodProfile) } logLikelihoodProfiles <- lapply(covariateIds, getLikelihoodProfile) @@ -176,26 +187,31 @@ fitSccsModel <- function(sccsIntervalData, estimates$logUb95 <- NA estimates$seLogRr <- NA } else { - ci <- tryCatch({ - result <- confint(fit, parm = intersect(needCi, estimates$covariateId), includePenalty = TRUE) - attr(result, "dimnames")[[1]] <- 1:length(attr(result, "dimnames")[[1]]) - result <- as.data.frame(result) - rownames(result) <- NULL - result - }, error = function(e) { - missing(e) # suppresses R CMD check note - data.frame(covariate = 0, logLb95 = 0, logUb95 = 0) - }) + ci <- tryCatch( + { + result <- confint(fit, parm = intersect(needCi, estimates$covariateId), includePenalty = TRUE) + attr(result, "dimnames")[[1]] <- 1:length(attr(result, "dimnames")[[1]]) + result <- as.data.frame(result) + rownames(result) <- NULL + result + }, + error = function(e) { + missing(e) # suppresses R CMD check note + data.frame(covariate = 0, logLb95 = 0, logUb95 = 0) + } + ) names(ci)[names(ci) == "2.5 %"] <- "logLb95" names(ci)[names(ci) == "97.5 %"] <- "logUb95" ci$evaluations <- NULL estimates <- merge(estimates, ci, by.x = "covariateId", by.y = "covariate", all.x = TRUE) - estimates$seLogRr <- (estimates$logUb95 - estimates$logLb95)/(2*qnorm(0.975)) + estimates$seLogRr <- (estimates$logUb95 - estimates$logLb95) / (2 * qnorm(0.975)) for (param in intersect(needCi, estimates$covariateId)) { - llNull <- Cyclops::getCyclopsProfileLogLikelihood(object = fit, - parm = param, - x = 0, - includePenalty = FALSE)$value + llNull <- Cyclops::getCyclopsProfileLogLikelihood( + object = fit, + parm = param, + x = 0, + includePenalty = FALSE + )$value estimates$llr[estimates$covariateId == param] <- fit$log_likelihood - llNull } } @@ -206,12 +222,14 @@ fitSccsModel <- function(sccsIntervalData, } } } - result <- list(estimates = estimates, - priorVariance = priorVariance, - logLikelihood = logLikelihood, - logLikelihoodProfiles = logLikelihoodProfiles, - status = status, - metaData = metaData) + result <- list( + estimates = estimates, + priorVariance = priorVariance, + logLikelihood = logLikelihood, + logLikelihoodProfiles = logLikelihoodProfiles, + status = status, + metaData = metaData + ) class(result) <- "SccsModel" delta <- Sys.time() - start ParallelLogger::logInfo(paste("Fitting the model took", signif(delta, 3), attr(delta, "units"))) @@ -249,13 +267,15 @@ print.SccsModel <- function(x, ...) { } else { writeLines("Estimates:") d <- x$estimates - output <- tibble(d$covariateName, - d$covariateId, - exp(d$logRr), - exp(d$logLb95), - exp(d$logUb95), - d$logRr, - d$seLogRr) + output <- tibble( + d$covariateName, + d$covariateId, + exp(d$logRr), + exp(d$logLb95), + exp(d$logUb95), + d$logRr, + d$seLogRr + ) colnames(output) <- c("Name", "ID", "Estimate", "LB95CI", "UB95CI", "LogRr", "SeLogRr") print(output, n = 25) @@ -273,31 +293,36 @@ print.SccsModel <- function(x, ...) { #' #' @export getModel <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } d <- sccsModel$estimates # d$seLogRr <- (d$logUb95 - d$logRr)/qnorm(0.975) - output <- tibble(d$covariateName, - d$covariateId, - exp(d$logRr), - exp(d$logLb95), - exp(d$logUb95), - d$logRr, - d$seLogRr, - d$originalEraId, - d$originalEraType, - d$originalEraName) - colnames(output) <- c("name", - "id", - "estimate", - "lb95Ci", - "ub95Ci", - "logRr", - "seLogRr", - "originalEraId", - "originalEraType", - "originalEraName") + output <- tibble( + d$covariateName, + d$covariateId, + exp(d$logRr), + exp(d$logLb95), + exp(d$logUb95), + d$logRr, + d$seLogRr, + d$originalEraId, + d$originalEraType, + d$originalEraName + ) + colnames(output) <- c( + "name", + "id", + "estimate", + "lb95Ci", + "ub95Ci", + "logRr", + "seLogRr", + "originalEraId", + "originalEraType", + "originalEraName" + ) return(output) } @@ -311,8 +336,9 @@ getModel <- function(sccsModel) { #' #' @export hasAgeEffect <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } estimates <- sccsModel$estimates return(any(estimates$covariateId >= 100 & estimates$covariateId < 200)) } @@ -327,8 +353,9 @@ hasAgeEffect <- function(sccsModel) { #' #' @export hasSeasonality <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } estimates <- sccsModel$estimates return(any(estimates$covariateId >= 200 & estimates$covariateId < 300)) } @@ -343,8 +370,9 @@ hasSeasonality <- function(sccsModel) { #' #' @export hasCalendarTimeEffect <- function(sccsModel) { - if (class(sccsModel) != "SccsModel") + if (class(sccsModel) != "SccsModel") { stop("the sccsModel argument must be of type 'sccsModel'.") + } estimates <- sccsModel$estimates return(any(estimates$covariateId >= 300 & estimates$covariateId < 400)) } diff --git a/R/Plots.R b/R/Plots.R index 93cc8f3..30606ec 100644 --- a/R/Plots.R +++ b/R/Plots.R @@ -40,9 +40,9 @@ plotAgeSpans <- function(studyPopulation, arrange(.data$startAge, .data$endAge) %>% mutate(rank = row_number()) - ageLabels <- floor(min(cases$startAge)/365.25):ceiling(max(cases$endAge)/365.25) + ageLabels <- floor(min(cases$startAge) / 365.25):ceiling(max(cases$endAge) / 365.25) if (length(ageLabels) > 10) { - ageLabels <- 10 * (floor(min(cases$startAge)/3652.5):floor(max(cases$endAge)/3652.5)) + ageLabels <- 10 * (floor(min(cases$startAge) / 3652.5):floor(max(cases$endAge) / 3652.5)) } ageBreaks <- ageLabels * 365.25 if (nrow(cases) > maxPersons) { @@ -56,23 +56,26 @@ plotAgeSpans <- function(studyPopulation, ggplot2::geom_errorbarh(color = rgb(0, 0, 0.8), alpha = 0.8) + ggplot2::scale_x_continuous("Age (years)", breaks = ageBreaks, labels = ageLabels) + ggplot2::scale_y_continuous("Case rank") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } # fileName <- "S:/temp/plot.png" - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -104,17 +107,19 @@ plotAgeSpans <- function(studyPopulation, plotEventObservationDependence <- function(studyPopulation, title = NULL, fileName = NULL) { - - outcomes <- studyPopulation$outcomes %>% group_by(.data$caseId) %>% summarise(outcomeDay = min(.data$outcomeDay), .groups = "drop_last") %>% inner_join(studyPopulation$cases, by = "caseId") %>% - transmute(daysFromEvent = .data$endDay - .data$outcomeDay, - censoring = case_when(.data$noninformativeEndCensor == 1 ~ "Uncensored", - TRUE ~ "Censored")) + transmute( + daysFromEvent = .data$endDay - .data$outcomeDay, + censoring = case_when( + .data$noninformativeEndCensor == 1 ~ "Uncensored", + TRUE ~ "Censored" + ) + ) - ageLabels <- 0:ceiling(max(outcomes$daysFromEvent)/365.25) + ageLabels <- 0:ceiling(max(outcomes$daysFromEvent) / 365.25) ageBreaks <- ageLabels * 365.25 @@ -125,23 +130,26 @@ plotEventObservationDependence <- function(studyPopulation, ggplot2::geom_histogram(binwidth = 30.5, fill = rgb(0, 0, 0.8), alpha = 0.8) + ggplot2::scale_x_continuous("Years from event", breaks = ageBreaks, labels = ageLabels) + ggplot2::scale_y_continuous("Frequency") + - ggplot2::facet_grid(censoring~., scales = "free_y") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::facet_grid(censoring ~ ., scales = "free_y") + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -172,7 +180,6 @@ plotExposureCentered <- function(studyPopulation, highlightExposedEvents = TRUE, title = NULL, fileName = NULL) { - if (is.null(exposureEraId)) { exposureEraId <- attr(sccsData, "metaData")$exposureIds if (length(exposureEraId) != 1) { @@ -187,8 +194,10 @@ plotExposureCentered <- function(studyPopulation, filter(.data$eraId == exposureEraId & .data$eraType == "rx") %>% group_by(.data$caseId) %>% inner_join(cases, by = "caseId", copy = TRUE) %>% - mutate(startDay = .data$startDay - .data$offset, - endDay = .data$endDay - .data$offset) %>% + mutate( + startDay = .data$startDay - .data$offset, + endDay = .data$endDay - .data$offset + ) %>% filter(.data$startDay >= 0, .data$startDay < .data$caseEndDay) %>% collect() @@ -198,9 +207,11 @@ plotExposureCentered <- function(studyPopulation, } firstExposures <- exposures %>% group_by(.data$caseId, .data$caseEndDay) %>% - summarise(startDay = min(.data$startDay, na.rm = TRUE), - endDay = min(.data$endDay, na.rm = TRUE), - .groups = "drop_last") + summarise( + startDay = min(.data$startDay, na.rm = TRUE), + endDay = min(.data$endDay, na.rm = TRUE), + .groups = "drop_last" + ) outcomes <- studyPopulation$outcomes %>% inner_join(firstExposures, by = "caseId") %>% @@ -209,8 +220,10 @@ plotExposureCentered <- function(studyPopulation, exposedoutcomes <- exposures %>% inner_join(outcomes, by = "caseId") %>% - filter(.data$outcomeDay >= .data$startDay, - .data$outcomeDay <= .data$endDay) %>% + filter( + .data$outcomeDay >= .data$startDay, + .data$outcomeDay <= .data$endDay + ) %>% select(.data$caseId, .data$delta) %>% mutate(exposed = 1) @@ -219,50 +232,58 @@ plotExposureCentered <- function(studyPopulation, mutate(exposed = coalesce(.data$exposed, 0)) weeks <- dplyr::tibble(number = -26:25) %>% - mutate(start = .data$number*7, - end = .data$number*7 + 7) + mutate( + start = .data$number * 7, + end = .data$number * 7 + 7 + ) events <- weeks %>% full_join(select(outcomes, .data$delta, .data$exposed), by = character()) %>% filter(.data$delta >= .data$start, .data$delta < .data$end) %>% group_by(.data$number, .data$start, .data$end) %>% - summarise(eventsExposed = sum(.data$exposed), - eventsUnexposed = n() - sum(.data$exposed), - .groups = "drop_last") + summarise( + eventsExposed = sum(.data$exposed), + eventsUnexposed = n() - sum(.data$exposed), + .groups = "drop_last" + ) observed <- weeks %>% full_join(transmute(firstExposures, startDelta = -.data$startDay, endDelta = .data$caseEndDay - .data$startDay), by = character()) %>% filter(.data$endDelta >= .data$start, .data$startDelta < .data$end) %>% group_by(.data$number, .data$start, .data$end) %>% - summarise(observed = n(), - .groups = "drop_last") + summarise( + observed = n(), + .groups = "drop_last" + ) if (highlightExposedEvents) { events <- events %>% transmute(.data$start, - .data$end, - type = "Events", - count1 = .data$eventsUnexposed, - count2 = .data$eventsExposed) + .data$end, + type = "Events", + count1 = .data$eventsUnexposed, + count2 = .data$eventsExposed + ) } else { events <- events %>% transmute(.data$start, - .data$end, - type = "Events", - count1 = .data$eventsUnexposed + .data$eventsExposed, - count2 = NA) - + .data$end, + type = "Events", + count1 = .data$eventsUnexposed + .data$eventsExposed, + count2 = NA + ) } observed <- observed %>% transmute(.data$start, - .data$end, - type = "Subjects under observation", - count1 = .data$observed, - count2 = NA) + .data$end, + type = "Subjects under observation", + count1 = .data$observed, + count2 = NA + ) data <- bind_rows(events, observed) - breaks <- seq(-150,150, 30) + breaks <- seq(-150, 150, 30) theme <- ggplot2::element_text(colour = "#000000", size = 12) themeRA <- ggplot2::element_text(colour = "#000000", size = 12, hjust = 1) plot <- ggplot2::ggplot(data, ggplot2::aes(x = .data$start, xmin = .data$start, xmax = .data$end, ymax = .data$count1, ymin = 0)) + @@ -271,23 +292,26 @@ plotExposureCentered <- function(studyPopulation, ggplot2::geom_vline(xintercept = 0, colour = "#000000", lty = 1, size = 1) + ggplot2::scale_x_continuous("Days since first exposure start", breaks = breaks, labels = breaks) + ggplot2::scale_y_continuous("Count") + - ggplot2::facet_grid(type~., scales = "free_y") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::facet_grid(type ~ ., scales = "free_y") + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -337,23 +361,26 @@ plotEventToCalendarTime <- function(studyPopulation, ggplot2::scale_x_date("Calendar time") + ggplot2::scale_y_continuous("Count", limits = c(0, NA)) + ggplot2::facet_grid(.data$type ~ ., scales = "free_y") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.y = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.y = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) # plot if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 1 + (2 * length(levels)), dpi = 400) + } return(plot) } @@ -377,8 +404,9 @@ plotAgeEffect <- function(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL) { - if (!hasAgeEffect(sccsModel)) + if (!hasAgeEffect(sccsModel)) { stop("The model does not contain an age effect.") + } estimates <- sccsModel$estimates estimates <- estimates[estimates$covariateId >= 100 & estimates$covariateId < 200, ] @@ -386,16 +414,17 @@ plotAgeEffect <- function(sccsModel, ageKnots <- sccsModel$metaData$age$ageKnots age <- seq(min(ageKnots), max(ageKnots), length.out = 100) ageDesignMatrix <- splines::bs(age, - knots = ageKnots[2:(length(ageKnots) - 1)], - Boundary.knots = ageKnots[c(1, length(ageKnots))]) + knots = ageKnots[2:(length(ageKnots) - 1)], + Boundary.knots = ageKnots[c(1, length(ageKnots))] + ) logRr <- apply(ageDesignMatrix %*% splineCoefs, 1, sum) logRr <- logRr - mean(logRr) rr <- exp(logRr) data <- data.frame(age = age, rr = rr) breaks <- c(0.1, 0.25, 0.5, 1, 2, 4, 6, 8, 10) - ageLabels <- floor(min(ageKnots)/365.25):floor(max(ageKnots)/365.25) + ageLabels <- floor(min(ageKnots) / 365.25):floor(max(ageKnots) / 365.25) if (length(ageLabels) > 10) { - ageLabels <- 10 * (floor(min(ageKnots)/3652.5):floor(max(ageKnots)/3652.5)) + ageLabels <- 10 * (floor(min(ageKnots) / 3652.5):floor(max(ageKnots) / 3652.5)) } ageBreaks <- ageLabels * 365.25 theme <- ggplot2::element_text(colour = "#000000", size = 12) @@ -405,26 +434,30 @@ plotAgeEffect <- function(sccsModel, ggplot2::geom_line(color = rgb(0, 0, 0.8), alpha = 0.8, lwd = 1) + ggplot2::scale_x_continuous("Age", breaks = ageBreaks, labels = ageLabels) + ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + limits = rrLim, + trans = "log10", + breaks = breaks, + labels = breaks + ) + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -448,8 +481,9 @@ plotSeasonality <- function(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL) { - if (!hasSeasonality(sccsModel)) + if (!hasSeasonality(sccsModel)) { stop("The model does not contain seasonality.") + } estimates <- sccsModel$estimates estimates <- estimates[estimates$covariateId >= 200 & estimates$covariateId < 300, ] @@ -471,26 +505,30 @@ plotSeasonality <- function(sccsModel, ggplot2::geom_line(color = rgb(0, 0, 0.8), alpha = 0.8, lwd = 1) + ggplot2::scale_x_continuous("Month", breaks = seasonBreaks, labels = seasonBreaks) + ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + limits = rrLim, + trans = "log10", + breaks = breaks, + labels = breaks + ) + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -530,24 +568,27 @@ plotCalendarTimeSpans <- function(studyPopulation, ggplot2::geom_errorbarh(color = rgb(0, 0, 0.8)) + ggplot2::scale_x_date("Calendar time") + ggplot2::scale_y_continuous("Case rank") + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major.x = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), - panel.grid.major.y = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major.x = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), + panel.grid.major.y = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } # fileName <- "S:/temp/plot.png" - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } @@ -571,8 +612,9 @@ plotCalendarTimeEffect <- function(sccsModel, rrLim = c(0.1, 10), title = NULL, fileName = NULL) { - if (!hasCalendarTimeEffect(sccsModel)) + if (!hasCalendarTimeEffect(sccsModel)) { stop("The model does not contain a calendar time effect.") + } estimates <- sccsModel$estimates estimates <- estimates[estimates$covariateId >= 300 & estimates$covariateId < 400, ] @@ -580,8 +622,9 @@ plotCalendarTimeEffect <- function(sccsModel, calendarTimeKnots <- sccsModel$metaData$calendarTime$calendarTimeKnots calendarTime <- seq(min(calendarTimeKnots), max(calendarTimeKnots), length.out = 100) calendarTimeDesignMatrix <- splines::bs(calendarTime, - knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], - Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))]) + knots = calendarTimeKnots[2:(length(calendarTimeKnots) - 1)], + Boundary.knots = calendarTimeKnots[c(1, length(calendarTimeKnots))] + ) logRr <- apply(calendarTimeDesignMatrix %*% splineCoefs, 1, sum) logRr <- logRr - mean(logRr) rr <- exp(logRr) @@ -594,25 +637,29 @@ plotCalendarTimeEffect <- function(sccsModel, ggplot2::geom_line(color = rgb(0, 0, 0.8), alpha = 0.8, lwd = 1) + ggplot2::scale_x_date("Calendar Time") + ggplot2::scale_y_continuous("Relative risk", - limits = rrLim, - trans = "log10", - breaks = breaks, - labels = breaks) + - ggplot2::theme(panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), - panel.grid.major = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), - axis.ticks = ggplot2::element_blank(), - axis.text.y = themeRA, - axis.text.x = theme, - strip.text.x = theme, - strip.background = ggplot2::element_blank(), - plot.title = ggplot2::element_text(hjust = 0.5), - legend.title = ggplot2::element_blank(), - legend.position = "top") + limits = rrLim, + trans = "log10", + breaks = breaks, + labels = breaks + ) + + ggplot2::theme( + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA), + panel.grid.major = ggplot2::element_line(colour = "#AAAAAA", size = 0.2), + axis.ticks = ggplot2::element_blank(), + axis.text.y = themeRA, + axis.text.x = theme, + strip.text.x = theme, + strip.background = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + legend.title = ggplot2::element_blank(), + legend.position = "top" + ) if (!is.null(title)) { plot <- plot + ggplot2::ggtitle(title) } - if (!is.null(fileName)) + if (!is.null(fileName)) { ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400) + } return(plot) } diff --git a/R/Power.R b/R/Power.R index 6442ccb..1e871ac 100644 --- a/R/Power.R +++ b/R/Power.R @@ -47,8 +47,9 @@ computeMdrr <- function(sccsIntervalData, power = 0.8, twoSided = TRUE, method = "SRL1") { - if (!method %in% c("proportion", "binomial", "SRL1", "SRL2", "ageEffects")) + if (!method %in% c("proportion", "binomial", "SRL1", "SRL2", "ageEffects")) { stop("Method must be either 'proportion', 'binomial', 'SRL1', 'SRL2', or 'ageEffects'.") + } # Check if there is anyone with the exposure at all; nExposed <- sccsIntervalData$covariates %>% @@ -56,12 +57,14 @@ computeMdrr <- function(sccsIntervalData, count() %>% pull() if (nExposed == 0) { - result <- tibble(timeExposed = 0, - timeTotal = 0, - propTimeExposed = 0, - propPopulationExposed = 0, - events = 0, - mdrr = Inf) + result <- tibble( + timeExposed = 0, + timeTotal = 0, + propTimeExposed = 0, + propPopulationExposed = 0, + events = 0, + mdrr = Inf + ) return(result) } @@ -76,9 +79,11 @@ computeMdrr <- function(sccsIntervalData, overall <- sccsIntervalData$outcomes %>% filter(.data$stratumId %in% exposedStratumIds) %>% - summarise(time = sum(.data$time, na.rm = TRUE), - observationPeriods = n_distinct(.data$stratumId), - events = sum(.data$y, na.rm = TRUE)) %>% + summarise( + time = sum(.data$time, na.rm = TRUE), + observationPeriods = n_distinct(.data$stratumId), + events = sum(.data$y, na.rm = TRUE) + ) %>% collect() exposed <- sccsIntervalData$outcomes %>% @@ -101,57 +106,57 @@ computeMdrr <- function(sccsIntervalData, if (method == "distribution") { # expression 5 - computePower <- function(p, z, r, n, alpha) - { + computePower <- function(p, z, r, n, alpha) { zbnum <- log(p) * sqrt(n * p * r * (1 - r)) - z * sqrt(p) zbden <- p * r + 1 - r zb <- zbnum / zbden power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } if (method == "binomial") { # expression 6 - computePower <- function(p, z, r, n, alpha) - { - pi <- p*r/(p*r + 1 - r) + computePower <- function(p, z, r, n, alpha) { + pi <- p * r / (p * r + 1 - r) tAlt <- asin(sqrt(pi)) tNull <- asin(sqrt(r)) zb <- sqrt(n * 4 * (tAlt - tNull)^2) - z power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } if (method == "SRL1") { # expression 7 - computePowerSrl <- function(b, z, r, n, alpha) - { + computePowerSrl <- function(b, z, r, n, alpha) { A <- 2 * ((exp(b) * r / (exp(b) * r + 1 - r)) * b - log(exp(b) * r + 1 - r)) B <- b^2 / A * exp(b) * r * (1 - r) / (exp(b) * r + 1 - r)^2 zb <- (sqrt(n * A) - z) / sqrt(B) power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } if (method == "SRL2") { # expression 8 - computePowerSrl <- function(b, z, r, n, alpha) - { + computePowerSrl <- function(b, z, r, n, alpha) { A <- 2 * pr * (exp(b) * r + 1 - r) / (1 + pr * r * (exp(b) - 1)) * ((exp(b) * r / (exp(b) * r + 1 - r)) * b - log(exp(b) * r + 1 - r)) B <- b^2 / A * pr * (exp(b) * r + 1 - r) / (1 + pr * r * (exp(b) - 1)) * exp(b) * r * (1 - r) / (exp(b) * r + 1 - r)^2 zb <- (sqrt(n * A) - z) / sqrt(B) power <- pnorm(zb) - if (power < alpha | n < 1) + if (power < alpha | n < 1) { power <- alpha + } return(power) } } @@ -167,8 +172,7 @@ computeMdrr <- function(sccsIntervalData, M <- L + (H - L) / 2 if (method %in% c("SRL1", "SRL2")) { powerM <- computePowerSrl(M, z, r, n, alpha) - } - else { + } else { powerM <- computePower(exp(M), z, r, n, alpha) } d <- powerM - power @@ -179,18 +183,21 @@ computeMdrr <- function(sccsIntervalData, } else { return(M) } - if (M == 0 || M == 10) + if (M == 0 || M == 10) { return(M) + } } } mdLogRr <- binarySearch(z, r, n, power, alpha) mdrr <- exp(mdLogRr) - result <- tibble(timeExposed = timeExposed, - timeTotal = timeTotal, - propTimeExposed = round(r, 4), - propPopulationExposed = round(pr, 4), - events = n, - mdrr = round(mdrr, 4)) + result <- tibble( + timeExposed = timeExposed, + timeTotal = timeTotal, + propTimeExposed = round(r, 4), + propPopulationExposed = round(pr, 4), + events = n, + mdrr = round(mdrr, 4) + ) return(result) } diff --git a/R/RunAnalyses.R b/R/RunAnalyses.R index 1b1a459..708545e 100644 --- a/R/RunAnalyses.R +++ b/R/RunAnalyses.R @@ -133,30 +133,39 @@ runSccsAnalyses <- function(connectionDetails, fitSccsModelThreads = 1, cvThreads = 1, analysesToExclude = NULL) { - for (exposureOutcome in exposureOutcomeList) + for (exposureOutcome in exposureOutcomeList) { stopifnot(class(exposureOutcome) == "exposureOutcome") - for (sccsAnalysis in sccsAnalysisList) + } + for (sccsAnalysis in sccsAnalysisList) { stopifnot(class(sccsAnalysis) == "sccsAnalysis") + } if (!is.null(oracleTempSchema) && oracleTempSchema != "") { warning("The 'oracleTempSchema' argument is deprecated. Use 'tempEmulationSchema' instead.") tempEmulationSchema <- oracleTempSchema } - uniqueExposureOutcomeList <- unique(ParallelLogger::selectFromList(exposureOutcomeList, - c("exposureId", "outcomeId"))) - if (length(uniqueExposureOutcomeList) != length(exposureOutcomeList)) + uniqueExposureOutcomeList <- unique(ParallelLogger::selectFromList( + exposureOutcomeList, + c("exposureId", "outcomeId") + )) + if (length(uniqueExposureOutcomeList) != length(exposureOutcomeList)) { stop("Duplicate exposure-outcomes pairs are not allowed") + } uniqueAnalysisIds <- unlist(unique(ParallelLogger::selectFromList(sccsAnalysisList, "analysisId"))) - if (length(uniqueAnalysisIds) != length(sccsAnalysisList)) + if (length(uniqueAnalysisIds) != length(sccsAnalysisList)) { stop("Duplicate analysis IDs are not allowed") + } - if (!file.exists(outputFolder)) + if (!file.exists(outputFolder)) { dir.create(outputFolder) + } - referenceTable <- createReferenceTable(sccsAnalysisList, - exposureOutcomeList, - outputFolder, - combineDataFetchAcrossOutcomes, - analysesToExclude) + referenceTable <- createReferenceTable( + sccsAnalysisList, + exposureOutcomeList, + outputFolder, + combineDataFetchAcrossOutcomes, + analysesToExclude + ) sccsAnalysisPerRow <- attr(referenceTable, "sccsAnalysisPerRow") instantiatedExposureOutcomePerRow <- attr(referenceTable, "instantiatedExposureOutcomePerRow") @@ -171,38 +180,44 @@ runSccsAnalyses <- function(connectionDetails, head(1) loadConcepts <- loadConceptsPerLoad[[referenceRow$loadId]] - if (length(loadConcepts$exposureIds) == 1 && loadConcepts$exposureIds[1] == "all") + if (length(loadConcepts$exposureIds) == 1 && loadConcepts$exposureIds[1] == "all") { loadConcepts$exposureIds <- c() + } useCustomCovariates <- (length(loadConcepts$customCovariateIds) > 0) - if (length(loadConcepts$customCovariateIds) == 1 && loadConcepts$customCovariateIds[1] == "all") + if (length(loadConcepts$customCovariateIds) == 1 && loadConcepts$customCovariateIds[1] == "all") { loadConcepts$customCovariateIds <- c() + } outcomeIds <- unique(loadConcepts$outcomeIds) exposureIds <- unique(loadConcepts$exposureIds) customCovariateIds <- unique(loadConcepts$customCovariateIds) - args <- list(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - tempEmulationSchema = tempEmulationSchema, - exposureDatabaseSchema = exposureDatabaseSchema, - exposureTable = exposureTable, - outcomeDatabaseSchema = outcomeDatabaseSchema, - outcomeTable = outcomeTable, - customCovariateDatabaseSchema = customCovariateDatabaseSchema, - customCovariateTable = customCovariateTable, - nestingCohortDatabaseSchema = nestingCohortDatabaseSchema, - nestingCohortTable = nestingCohortTable, - cdmVersion = cdmVersion, - exposureIds = exposureIds, - outcomeIds = outcomeIds, - useCustomCovariates = useCustomCovariates, - customCovariateIds = customCovariateIds, - useNestingCohort = loadConcepts$nestingCohortId != -1, - nestingCohortId = loadConcepts$nestingCohortId, - deleteCovariatesSmallCount = loadConcepts$deleteCovariatesSmallCount, - studyStartDate = loadConcepts$studyStartDate, - studyEndDate = loadConcepts$studyEndDate, - maxCasesPerOutcome = loadConcepts$maxCasesPerOutcome) - sccsDataObjectsToCreate[[length(sccsDataObjectsToCreate) + 1]] <- list(args = args, - sccsDataFileName = file.path(outputFolder, sccsDataFileName)) + args <- list( + connectionDetails = connectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + tempEmulationSchema = tempEmulationSchema, + exposureDatabaseSchema = exposureDatabaseSchema, + exposureTable = exposureTable, + outcomeDatabaseSchema = outcomeDatabaseSchema, + outcomeTable = outcomeTable, + customCovariateDatabaseSchema = customCovariateDatabaseSchema, + customCovariateTable = customCovariateTable, + nestingCohortDatabaseSchema = nestingCohortDatabaseSchema, + nestingCohortTable = nestingCohortTable, + cdmVersion = cdmVersion, + exposureIds = exposureIds, + outcomeIds = outcomeIds, + useCustomCovariates = useCustomCovariates, + customCovariateIds = customCovariateIds, + useNestingCohort = loadConcepts$nestingCohortId != -1, + nestingCohortId = loadConcepts$nestingCohortId, + deleteCovariatesSmallCount = loadConcepts$deleteCovariatesSmallCount, + studyStartDate = loadConcepts$studyStartDate, + studyEndDate = loadConcepts$studyEndDate, + maxCasesPerOutcome = loadConcepts$maxCasesPerOutcome + ) + sccsDataObjectsToCreate[[length(sccsDataObjectsToCreate) + 1]] <- list( + args = args, + sccsDataFileName = file.path(outputFolder, sccsDataFileName) + ) } } @@ -215,9 +230,11 @@ runSccsAnalyses <- function(connectionDetails, analysisRow <- sccsAnalysisPerRow[[refRow$rowId]] args <- analysisRow$createStudyPopulationArgs args$outcomeId <- refRow$outcomeId - studyPopFilesToCreate[[length(studyPopFilesToCreate) + 1]] <- list(args = args, - sccsDataFile = file.path(outputFolder, refRow$sccsDataFile), - studyPopFile = file.path(outputFolder, refRow$studyPopFile)) + studyPopFilesToCreate[[length(studyPopFilesToCreate) + 1]] <- list( + args = args, + sccsDataFile = file.path(outputFolder, refRow$sccsDataFile), + studyPopFile = file.path(outputFolder, refRow$studyPopFile) + ) } # Create arguments for interval data objects --------------------------------------------------- @@ -236,8 +253,9 @@ runSccsAnalyses <- function(connectionDetails, args <- analysisRow$createScriIntervalDataArgs } covariateSettings <- args$eraCovariateSettings - if (is(covariateSettings, "EraCovariateSettings")) + if (is(covariateSettings, "EraCovariateSettings")) { covariateSettings <- list(covariateSettings) + } if (!sccs) { covariateSettings[[length(covariateSettings) + 1]] <- args$controlIntervalSettings } @@ -247,8 +265,9 @@ runSccsAnalyses <- function(connectionDetails, if (length(settings$includeEraIds) != 0) { for (includeEraId in settings$includeEraIds) { if (is.character(includeEraId)) { - if (is.null(exposureOutcome[[includeEraId]])) + if (is.null(exposureOutcome[[includeEraId]])) { stop(paste("Variable", includeEraId, " not found in exposure-outcome pair")) + } includeEraIds <- c(includeEraIds, exposureOutcome[[includeEraId]]) } else { includeEraIds <- c(includeEraIds, includeEraId) @@ -259,8 +278,9 @@ runSccsAnalyses <- function(connectionDetails, if (length(settings$excludeEraIds) != 0) { for (excludeEraId in settings$excludeEraIds) { if (is.character(excludeEraId)) { - if (is.null(exposureOutcome[[excludeEraId]])) + if (is.null(exposureOutcome[[excludeEraId]])) { stop(paste("Variable", excludeEraId, " not found in exposure-outcome pair")) + } excludeEraIds <- c(excludeEraIds, exposureOutcome[[excludeEraId]]) } else { excludeEraIds <- c(excludeEraIds, excludeEraId) @@ -279,11 +299,13 @@ runSccsAnalyses <- function(connectionDetails, } sccsDataFileName <- refRow$sccsDataFile studyPopFile <- refRow$studyPopFile - sccsIntervalDataObjectsToCreate[[length(sccsIntervalDataObjectsToCreate) + 1]] <- list(args = args, - sccs = sccs, - sccsDataFileName = file.path(outputFolder, sccsDataFileName), - studyPopFile = file.path(outputFolder, studyPopFile), - sccsIntervalDataFileName = file.path(outputFolder, sccsIntervalDataFile)) + sccsIntervalDataObjectsToCreate[[length(sccsIntervalDataObjectsToCreate) + 1]] <- list( + args = args, + sccs = sccs, + sccsDataFileName = file.path(outputFolder, sccsDataFileName), + studyPopFile = file.path(outputFolder, studyPopFile), + sccsIntervalDataFileName = file.path(outputFolder, sccsIntervalDataFile) + ) } # Create arguments for model objects --------------------------------------------- @@ -295,9 +317,11 @@ runSccsAnalyses <- function(connectionDetails, analysisRow <- sccsAnalysisPerRow[[refRow$rowId]] args <- analysisRow$fitSccsModelArgs args$control$threads <- cvThreads - sccsModelObjectsToCreate[[length(sccsModelObjectsToCreate) + 1]] <- list(args = args, - sccsIntervalDataFileName = file.path(outputFolder, refRow$sccsIntervalDataFile), - sccsModelFileName = file.path(outputFolder, sccsModelFile)) + sccsModelObjectsToCreate[[length(sccsModelObjectsToCreate) + 1]] <- list( + args = args, + sccsIntervalDataFileName = file.path(outputFolder, refRow$sccsIntervalDataFile), + sccsModelFileName = file.path(outputFolder, sccsModelFile) + ) } referenceTable$loadId <- NULL @@ -358,10 +382,12 @@ createReferenceTable <- function(sccsAnalysisList, instantiatedExposureOutcome <- exposureOutcome instantiatedExposureOutcome$exposureId <- .selectByType(sccsAnalysis$exposureType, exposureOutcome$exposureId, "exposure") instantiatedExposureOutcome$outcomeId <- .selectByType(sccsAnalysis$outcomeType, exposureOutcome$outcomeId, "outcome") - row <- tibble(rowId = rowId, - exposureId = instantiatedExposureOutcome$exposureId, - outcomeId = instantiatedExposureOutcome$outcomeId, - analysisId = sccsAnalysis$analysisId) + row <- tibble( + rowId = rowId, + exposureId = instantiatedExposureOutcome$exposureId, + outcomeId = instantiatedExposureOutcome$outcomeId, + analysisId = sccsAnalysis$analysisId + ) referenceTable <- rbind(referenceTable, row) sccsAnalysisPerRow[[rowId]] <- sccsAnalysis instantiatedExposureOutcomePerRow[[rowId]] <- instantiatedExposureOutcome @@ -384,8 +410,9 @@ createReferenceTable <- function(sccsAnalysisList, } else { for (exposureId in sccsAnalysis$getDbSccsDataArgs$exposureIds) { if (suppressWarnings(is.na(as.numeric(exposureId)))) { - if (is.null(exposureOutcome[[exposureId]])) + if (is.null(exposureOutcome[[exposureId]])) { stop(paste("Variable", exposureId, " not found in exposure-outcome pair")) + } exposureIds <- c(exposureIds, exposureOutcome[[exposureId]]) } else { exposureIds <- c(exposureIds, as.numeric(exposureId)) @@ -400,8 +427,9 @@ createReferenceTable <- function(sccsAnalysisList, } else { for (customCovariateId in sccsAnalysis$getDbSccsDataArgs$customCovariateIds) { if (is.character(customCovariateId)) { - if (is.null(exposureOutcome[[customCovariateId]])) + if (is.null(exposureOutcome[[customCovariateId]])) { stop(paste("Variable", customCovariateId, " not found in exposure-outcome pair")) + } customCovariateIds <- c(customCovariateIds, exposureOutcome[[customCovariateId]]) } else { customCovariateIds <- c(customCovariateIds, customCovariateId) @@ -413,15 +441,17 @@ createReferenceTable <- function(sccsAnalysisList, if (sccsAnalysis$getDbSccsDataArgs$useNestingCohort) { nestingCohortId <- sccsAnalysis$getDbSccsDataArgs$nestingCohortId } - row <- list(outcomeId = outcomeId, - exposureIds = exposureIds, - customCovariateIds = customCovariateIds, - nestingCohortId = nestingCohortId, - deleteCovariatesSmallCount = sccsAnalysis$getDbSccsDataArgs$deleteCovariatesSmallCount, - studyStartDate = sccsAnalysis$getDbSccsDataArgs$studyStartDate, - studyEndDate = sccsAnalysis$getDbSccsDataArgs$studyEndDate, - maxCasesPerOutcome = sccsAnalysis$getDbSccsDataArgs$maxCasesPerOutcome, - rowId = rowId) + row <- list( + outcomeId = outcomeId, + exposureIds = exposureIds, + customCovariateIds = customCovariateIds, + nestingCohortId = nestingCohortId, + deleteCovariatesSmallCount = sccsAnalysis$getDbSccsDataArgs$deleteCovariatesSmallCount, + studyStartDate = sccsAnalysis$getDbSccsDataArgs$studyStartDate, + studyEndDate = sccsAnalysis$getDbSccsDataArgs$studyEndDate, + maxCasesPerOutcome = sccsAnalysis$getDbSccsDataArgs$maxCasesPerOutcome, + rowId = rowId + ) conceptsPerLoad[[length(conceptsPerLoad) + 1]] <- row rowId <- rowId + 1 } @@ -429,20 +459,28 @@ createReferenceTable <- function(sccsAnalysisList, # Group loads where possible if (combineDataFetchAcrossOutcomes) { - uniqueLoads <- unique(ParallelLogger::selectFromList(conceptsPerLoad, - c("nestingCohortId", - "deleteCovariatesSmallCount", - "studyStartDate", - "studyEndDate", - "maxCasesPerOutcome"))) + uniqueLoads <- unique(ParallelLogger::selectFromList( + conceptsPerLoad, + c( + "nestingCohortId", + "deleteCovariatesSmallCount", + "studyStartDate", + "studyEndDate", + "maxCasesPerOutcome" + ) + )) } else { - uniqueLoads <- unique(ParallelLogger::selectFromList(conceptsPerLoad, - c("nestingCohortId", - "deleteCovariatesSmallCount", - "studyStartDate", - "studyEndDate", - "maxCasesPerOutcome", - "outcomeId"))) + uniqueLoads <- unique(ParallelLogger::selectFromList( + conceptsPerLoad, + c( + "nestingCohortId", + "deleteCovariatesSmallCount", + "studyStartDate", + "studyEndDate", + "maxCasesPerOutcome", + "outcomeId" + ) + )) } # Compute unions of concept sets @@ -474,14 +512,16 @@ createReferenceTable <- function(sccsAnalysisList, } rowIds <- c(rowIds, groupable$rowId) } - loadConceptsPerLoad[[loadId]] <- list(exposureIds = exposureIds, - outcomeIds = outcomeIds, - customCovariateIds = customCovariateIds, - nestingCohortId = groupables[[1]]$nestingCohortId, - deleteCovariatesSmallCount = groupables[[1]]$deleteCovariatesSmallCount, - studyStartDate = groupables[[1]]$studyStartDate, - studyEndDate = groupables[[1]]$studyEndDate, - maxCasesPerOutcome = groupables[[1]]$maxCasesPerOutcome) + loadConceptsPerLoad[[loadId]] <- list( + exposureIds = exposureIds, + outcomeIds = outcomeIds, + customCovariateIds = customCovariateIds, + nestingCohortId = groupables[[1]]$nestingCohortId, + deleteCovariatesSmallCount = groupables[[1]]$deleteCovariatesSmallCount, + studyStartDate = groupables[[1]]$studyStartDate, + studyEndDate = groupables[[1]]$studyEndDate, + maxCasesPerOutcome = groupables[[1]]$maxCasesPerOutcome + ) sccsDataFileName <- .createSccsDataFileName(loadId) referenceTable$loadId[rowIds] <- loadId referenceTable$sccsDataFile[rowIds] <- sccsDataFileName @@ -491,16 +531,26 @@ createReferenceTable <- function(sccsAnalysisList, # Add study population filenames -------------------------- analysisIds <- unlist(ParallelLogger::selectFromList(sccsAnalysisList, "analysisId")) uniqueStudyPopArgs <- unique(ParallelLogger::selectFromList(sccsAnalysisList, "createStudyPopulationArgs")) - uniqueStudyPopArgs <- lapply(uniqueStudyPopArgs, function(x) return(x[[1]])) - studyPopId <- sapply(sccsAnalysisList, - function(sccsAnalysis, uniqueStudyPopArgs) return(which.list(uniqueStudyPopArgs, - sccsAnalysis$createStudyPopulationArgs)), - uniqueStudyPopArgs) + uniqueStudyPopArgs <- lapply(uniqueStudyPopArgs, function(x) { + return(x[[1]]) + }) + studyPopId <- sapply( + sccsAnalysisList, + function(sccsAnalysis, uniqueStudyPopArgs) { + return(which.list( + uniqueStudyPopArgs, + sccsAnalysis$createStudyPopulationArgs + )) + }, + uniqueStudyPopArgs + ) analysisIdToStudyPopId <- tibble(analysisId = analysisIds, studyPopId = studyPopId) referenceTable <- inner_join(referenceTable, analysisIdToStudyPopId, by = "analysisId") - referenceTable$studyPopFile <- .createStudyPopulationFileName(loadId = referenceTable$loadId, - studyPopId = referenceTable$studyPopId, - outcomeId = referenceTable$outcomeId) + referenceTable$studyPopFile <- .createStudyPopulationFileName( + loadId = referenceTable$loadId, + studyPopId = referenceTable$studyPopId, + outcomeId = referenceTable$outcomeId + ) # Add interval data and model filenames ----------------------------------------------------- for (sccsAnalysis in sccsAnalysisList) { @@ -511,16 +561,20 @@ createReferenceTable <- function(sccsAnalysisList, } generateFileName <- function(i) { - return(.createSccsIntervalDataFileName(paste("Analysis_", referenceTable$analysisId[i], sep = ""), - referenceTable$exposureId[i], - referenceTable$outcomeId[i])) + return(.createSccsIntervalDataFileName( + paste("Analysis_", referenceTable$analysisId[i], sep = ""), + referenceTable$exposureId[i], + referenceTable$outcomeId[i] + )) } referenceTable$sccsIntervalDataFile <- generateFileName(1:nrow(referenceTable)) generateFileName <- function(i) { - return(.createSccsModelFileName(paste("Analysis_", referenceTable$analysisId[i], sep = ""), - referenceTable$exposureId[i], - referenceTable$outcomeId[i])) + return(.createSccsModelFileName( + paste("Analysis_", referenceTable$analysisId[i], sep = ""), + referenceTable$exposureId[i], + referenceTable$outcomeId[i] + )) } referenceTable$sccsModelFile <- generateFileName(1:nrow(referenceTable)) @@ -535,9 +589,11 @@ createReferenceTable <- function(sccsAnalysisList, referenceTable <- referenceTable %>% anti_join(analysesToExclude, by = matchingColumns) countAfter <- nrow(referenceTable) - ParallelLogger::logInfo(sprintf("Removed %d of the %d exposure-outcome-analysis combinations as specified by the user.", - countBefore - countAfter, - countBefore)) + ParallelLogger::logInfo(sprintf( + "Removed %d of the %d exposure-outcome-analysis combinations as specified by the user.", + countBefore - countAfter, + countBefore + )) } return(referenceTable) @@ -545,7 +601,11 @@ createReferenceTable <- function(sccsAnalysisList, which.list <- function(list, object) { return(do.call("c", lapply(1:length(list), function(i, list, object) { - if (identical(list[[i]], object)) return(i) else return(c()) + if (identical(list[[i]], object)) { + return(i) + } else { + return(c()) + } }, list, object))) } @@ -596,9 +656,11 @@ createSccsModelObject <- function(params) { sccsIntervalData <- loadSccsIntervalData(params$sccsIntervalDataFileName) params$args$sccsIntervalData <- sccsIntervalData # sccsModel <- do.call("fitSccsModel", params$args) - sccsModel <- fitSccsModel(sccsIntervalData = sccsIntervalData, - prior = params$args$prior, - control = params$args$control) + sccsModel <- fitSccsModel( + sccsIntervalData = sccsIntervalData, + prior = params$args$prior, + control = params$args$control + ) saveRDS(sccsModel, params$sccsModelFileName) return(NULL) } @@ -633,9 +695,10 @@ createSccsModelObject <- function(params) { if (is.null(type)) { if (is.list(value)) { stop(paste("Multiple ", - label, - "s specified, but none selected in analyses (comparatorType).", - sep = "")) + label, + "s specified, but none selected in analyses (comparatorType).", + sep = "" + )) } return(value) } else { @@ -672,25 +735,31 @@ summarizeSccsAnalyses <- function(referenceTable, outputFolder) { estimates <- sccsModel$estimates[sccsModel$estimates$originalEraId == referenceTable$exposureId[i], ] if (!is.null(estimates) && nrow(estimates) != 0) { for (j in 1:nrow(estimates)) { - estimatesToInsert <- c(rr = exp(estimates$logRr[j]), - ci95lb = exp(estimates$logLb95[j]), - ci95ub = exp(estimates$logUb95[j]), - logRr = estimates$logRr[j], - seLogRr = estimates$seLogRr[j], - llr = estimates$llr) + estimatesToInsert <- c( + rr = exp(estimates$logRr[j]), + ci95lb = exp(estimates$logLb95[j]), + ci95ub = exp(estimates$logUb95[j]), + logRr = estimates$logRr[j], + seLogRr = estimates$seLogRr[j], + llr = estimates$llr + ) if (grepl(".*, day -?[0-9]+--?[0-9]*$", estimates$covariateName[j])) { name <- as.character(estimates$covariateName[j]) - pos1 <- attr(regexpr("^[^:]*:", name),"match.length") - 1 + pos1 <- attr(regexpr("^[^:]*:", name), "match.length") - 1 pos2 <- regexpr(", day -?[0-9]+--?[0-9]*$", name) + 2 - label <- paste(substr(name, 1, pos1), - substr(name, pos2, nchar(name))) + label <- paste( + substr(name, 1, pos1), + substr(name, pos2, nchar(name)) + ) } else { label <- sub(":.*$", "", estimates$covariateName[j]) } - names(estimatesToInsert) <- paste0(names(estimatesToInsert), - "(", - label, - ")") + names(estimatesToInsert) <- paste0( + names(estimatesToInsert), + "(", + label, + ")" + ) for (colName in names(estimatesToInsert)) { if (!(colName %in% colnames(result))) { result$newVar <- as.numeric(NA) diff --git a/R/SccsData.R b/R/SccsData.R index c1ee7d1..aa2e16f 100644 --- a/R/SccsData.R +++ b/R/SccsData.R @@ -47,12 +47,15 @@ setClass("SccsData", contains = "Andromeda") #' #' @export saveSccsData <- function(SccsData, file) { - if (missing(SccsData)) + if (missing(SccsData)) { stop("Must specify SccsData") - if (missing(file)) + } + if (missing(file)) { stop("Must specify file") - if (!inherits(SccsData, "SccsData")) + } + if (!inherits(SccsData, "SccsData")) { stop("Data not of class SccsData") + } Andromeda::saveAndromeda(SccsData, file) } @@ -69,10 +72,12 @@ saveSccsData <- function(SccsData, file) { #' #' @export loadSccsData <- function(file) { - if (!file.exists(file)) + if (!file.exists(file)) { stop("Cannot find file ", file) - if (file.info(file)$isdir) - stop(file , " is a folder, but should be a file") + } + if (file.info(file)$isdir) { + stop(file, " is a folder, but should be a file") + } SccsData <- Andromeda::loadAndromeda(file) class(SccsData) <- "SccsData" attr(class(SccsData), "package") <- "SelfControlledCaseSeries" @@ -91,11 +96,15 @@ setMethod("show", "SccsData", function(object) { if (length(metaData$exposureIds) == 0) { cli::cat_line("All exposures") } else { - cli::cat_line(paste("Exposure cohort ID(s):", - paste(metaData$exposureIds, collapse = ","))) + cli::cat_line(paste( + "Exposure cohort ID(s):", + paste(metaData$exposureIds, collapse = ",") + )) } - cli::cat_line(paste("Outcome cohort ID(s):", - paste(metaData$outcomeIds, collapse = ","))) + cli::cat_line(paste( + "Outcome cohort ID(s):", + paste(metaData$outcomeIds, collapse = ",") + )) cli::cat_line("") cli::cat_line(pillar::style_subtle("Inherits from Andromeda:")) class(object) <- "Andromeda" @@ -109,8 +118,9 @@ setMethod("show", "SccsData", function(object) { #' @export #' @rdname SccsData-class setMethod("summary", "SccsData", function(object) { - if (!Andromeda::isValidAndromeda(object)) + if (!Andromeda::isValidAndromeda(object)) { stop("Object is not valid. Probably the Andromeda object was closed.") + } caseCount <- object$cases %>% count() %>% pull() @@ -120,17 +130,21 @@ setMethod("summary", "SccsData", function(object) { filter(.data$eraType == "hoi") %>% inner_join(object$cases, by = "caseId") %>% group_by(.data$eraId) %>% - summarise(outcomeSubjects = n_distinct(.data$personId), - outcomeEvents = count(), - outcomeObsPeriods = n_distinct(.data$caseId)) %>% + summarise( + outcomeSubjects = n_distinct(.data$personId), + outcomeEvents = count(), + outcomeObsPeriods = n_distinct(.data$caseId) + ) %>% rename(outcomeId = .data$eraId) %>% collect() - result <- list(metaData = attr(object, "metaData"), - caseCount = caseCount, - outcomeCounts = outcomeCounts, - eraTypeCount = object$eraRef %>% count() %>% pull(), - eraCount = object$eras %>% count() %>% pull()) + result <- list( + metaData = attr(object, "metaData"), + caseCount = caseCount, + outcomeCounts = outcomeCounts, + eraTypeCount = object$eraRef %>% count() %>% pull(), + eraCount = object$eras %>% count() %>% pull() + ) class(result) <- "summary.SccsData" return(result) }) @@ -143,11 +157,15 @@ print.summary.SccsData <- function(x, ...) { if (length(metaData$exposureIds) == 0) { writeLines("All exposures") } else { - writeLines(paste("Exposure cohort ID(s):", - paste(x$metaData$exposureIds, collapse = ","))) + writeLines(paste( + "Exposure cohort ID(s):", + paste(x$metaData$exposureIds, collapse = ",") + )) } - writeLines(paste("Outcome cohort ID(s):", - paste(metaData$outcomeIds, collapse = ","))) + writeLines(paste( + "Outcome cohort ID(s):", + paste(metaData$outcomeIds, collapse = ",") + )) writeLines("") writeLines("Outcome counts:") outcomeCounts <- as.data.frame(x$outcomeCounts) diff --git a/R/SccsIntervalData.R b/R/SccsIntervalData.R index 26d0542..a38d209 100644 --- a/R/SccsIntervalData.R +++ b/R/SccsIntervalData.R @@ -48,12 +48,15 @@ setClass("SccsIntervalData", contains = "Andromeda") #' #' @export saveSccsIntervalData <- function(SccsIntervalData, file) { - if (missing(SccsIntervalData)) + if (missing(SccsIntervalData)) { stop("Must specify SccsIntervalData") - if (missing(file)) + } + if (missing(file)) { stop("Must specify file") - if (!inherits(SccsIntervalData, "SccsIntervalData")) + } + if (!inherits(SccsIntervalData, "SccsIntervalData")) { stop("Data not of class SccsIntervalData") + } Andromeda::saveAndromeda(SccsIntervalData, file) } @@ -70,10 +73,12 @@ saveSccsIntervalData <- function(SccsIntervalData, file) { #' #' @export loadSccsIntervalData <- function(file) { - if (!file.exists(file)) + if (!file.exists(file)) { stop("Cannot find file ", file) - if (file.info(file)$isdir) - stop(file , " is a folder, but should be a file") + } + if (file.info(file)$isdir) { + stop(file, " is a folder, but should be a file") + } SccsIntervalData <- Andromeda::loadAndromeda(file) class(SccsIntervalData) <- "SccsIntervalData" attr(class(SccsIntervalData), "package") <- "SelfControlledCaseSeries" @@ -103,21 +108,34 @@ setMethod("show", "SccsIntervalData", function(object) { #' @export #' @rdname SccsIntervalData-class setMethod("summary", "SccsIntervalData", function(object) { - if (!Andromeda::isValidAndromeda(object)) + if (!Andromeda::isValidAndromeda(object)) { stop("Object is not valid. Probably the Andromeda object was closed.") + } - caseCount <- object$outcomes %>% summarise(n = n_distinct(.data$stratumId)) %>% pull() - eraCount <- object$outcomes %>% count() %>% pull() - outcomeCount <- object$outcomes %>% summarise(n = sum(.data$y, na.rm = TRUE)) %>% pull() - covariateCount <- object$covariateRef %>% count() %>% pull() - covariateValueCount <- object$covariates %>% count() %>% pull() + caseCount <- object$outcomes %>% + summarise(n = n_distinct(.data$stratumId)) %>% + pull() + eraCount <- object$outcomes %>% + count() %>% + pull() + outcomeCount <- object$outcomes %>% + summarise(n = sum(.data$y, na.rm = TRUE)) %>% + pull() + covariateCount <- object$covariateRef %>% + count() %>% + pull() + covariateValueCount <- object$covariates %>% + count() %>% + pull() - result <- list(metaData = attr(object, "metaData"), - caseCount = caseCount, - eraCount = eraCount, - outcomeCount = outcomeCount, - covariateCount = covariateCount, - covariateValueCount = covariateValueCount) + result <- list( + metaData = attr(object, "metaData"), + caseCount = caseCount, + eraCount = eraCount, + outcomeCount = outcomeCount, + covariateCount = covariateCount, + covariateValueCount = covariateValueCount + ) class(result) <- "summary.SccsIntervalData" return(result) diff --git a/R/ScriDataConversion.R b/R/ScriDataConversion.R index c7fa5c4..2aff510 100644 --- a/R/ScriDataConversion.R +++ b/R/ScriDataConversion.R @@ -46,8 +46,9 @@ createScriIntervalData <- function(studyPopulation, sccsData, eraCovariateSettings, controlIntervalSettings) { - if (class(controlIntervalSettings) != "ControlIntervalSettings") + if (class(controlIntervalSettings) != "ControlIntervalSettings") { stop("The controlIntervalSettings argument should be of type 'ControlIntervalSettings'") + } start <- Sys.time() if (nrow(studyPopulation$outcomes) == 0) { @@ -70,7 +71,7 @@ createScriIntervalData <- function(studyPopulation, } else { covariateSettings <- list(eraCovariateSettings) } - covariateSettings[[length(covariateSettings) + 1]] <- controlIntervalSettings + covariateSettings[[length(covariateSettings) + 1]] <- controlIntervalSettings settings <- addEraCovariateSettings(settings, covariateSettings, sccsData) settings$metaData$covariateSettingsList <- cleanCovariateSettingsList(settings$covariateSettingsList) metaData <- append(studyPopulation$metaData, settings$metaData) @@ -83,23 +84,25 @@ createScriIntervalData <- function(studyPopulation, arrange(.data$caseId) controlIntervalId <- settings$covariateSettingsList[sapply(settings$covariateSettingsList, function(x) x$isControlInterval)][[1]]$outputIds[1, 1] - data <- convertToSccs(cases = cases, - outcomes = outcomes, - eras = eras, - includeAge = FALSE, - ageOffset = 0, - ageDesignMatrix = matrix(), - includeSeason = FALSE, - seasonDesignMatrix = matrix(), - includeCalendarTime = FALSE, - calendarTimeOffset = 0, - calendarTimeDesignMatrix = matrix(), - timeCovariateCases = numeric(0), - covariateSettingsList = settings$covariateSettingsList, - eventDependentObservation = FALSE, - censorModel = list(model = 0, p = c(0)), - scri = TRUE, - controlIntervalId = controlIntervalId) + data <- convertToSccs( + cases = cases, + outcomes = outcomes, + eras = eras, + includeAge = FALSE, + ageOffset = 0, + ageDesignMatrix = matrix(), + includeSeason = FALSE, + seasonDesignMatrix = matrix(), + includeCalendarTime = FALSE, + calendarTimeOffset = 0, + calendarTimeDesignMatrix = matrix(), + timeCovariateCases = numeric(0), + covariateSettingsList = settings$covariateSettingsList, + eventDependentObservation = FALSE, + censorModel = list(model = 0, p = c(0)), + scri = TRUE, + controlIntervalId = controlIntervalId + ) if (is.null(data$outcomes) || is.null(data$covariates)) { warning("Conversion resulted in empty data set. Perhaps no one with the outcome had any exposure of interest?") @@ -108,14 +111,12 @@ createScriIntervalData <- function(studyPopulation, if (nrow(settings$covariateRef) > 0) { data$covariateRef <- settings$covariateRef } - } else { metaData$covariateStatistics <- collect(data$covariateStatistics) metaData$daysObserved <- pull(data$observedDays, .data$observedDays) data$covariateStatistics <- NULL data$observedDays <- NULL data$covariateRef <- settings$covariateRef - } attr(data, "metaData") <- metaData class(data) <- "SccsIntervalData" @@ -129,5 +130,8 @@ createScriIntervalData <- function(studyPopulation, cleanCovariateSettingsList <- function(covariateSettingsList) { # Remove control interval settings and field: noCi <- covariateSettingsList[!sapply(covariateSettingsList, function(x) x$isControlInterval)] - return(lapply(noCi, function(x) {x$isControlInterval <- NULL; return(x)})) + return(lapply(noCi, function(x) { + x$isControlInterval <- NULL + return(x) + })) } diff --git a/R/Simulation.R b/R/Simulation.R index 3fa4255..5199115 100644 --- a/R/Simulation.R +++ b/R/Simulation.R @@ -51,8 +51,9 @@ createSimulationRiskWindow <- function(start = 0, # Second: overwrite defaults with actual values: values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3))) for (name in names(values)) { - if (name %in% names(analysis)) + if (name %in% names(analysis)) { analysis[[name]] <- values[[name]] + } } class(analysis) <- "simulationRiskWindow" return(analysis) @@ -105,8 +106,10 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365, usageRate = c(0.01, 0.01), meanPrescriptionDurations = c(14, 30), sdPrescriptionDurations = c(7, 14), - simulationRiskWindows = list(createSimulationRiskWindow(relativeRisks = 1), - createSimulationRiskWindow(relativeRisks = 1.5)), + simulationRiskWindows = list( + createSimulationRiskWindow(relativeRisks = 1), + createSimulationRiskWindow(relativeRisks = 1.5) + ), includeAgeEffect = TRUE, ageKnots = 5, includeSeasonality = TRUE, @@ -122,8 +125,9 @@ createSccsSimulationSettings <- function(meanPatientTime = 4 * 365, # Second: overwrite defaults with actual values: values <- lapply(as.list(match.call())[-1], function(x) eval(x, envir = sys.frame(-3))) for (name in names(values)) { - if (name %in% names(analysis)) + if (name %in% names(analysis)) { analysis[[name]] <- values[[name]] + } } class(analysis) <- "sccsSimulationSettings" return(analysis) @@ -140,50 +144,59 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf maxCalendarDays <- as.numeric(settings$maxCalendarTime) - as.numeric(settings$minCalendarTime) observationDays[observationDays > maxCalendarDays] <- maxCalendarDays ageInDays <- round(runif(n, settings$minAge, settings$maxAge - observationDays)) - startDate <- round(runif(n, - rep(as.numeric(settings$minCalendarTime), n), - as.numeric(settings$maxCalendarTime) - observationDays)) + startDate <- round(runif( + n, + rep(as.numeric(settings$minCalendarTime), n), + as.numeric(settings$maxCalendarTime) - observationDays + )) startDate <- as.Date(startDate, origin = "1970-01-01") startYear <- as.numeric(format(startDate, format = "%Y")) startMonth <- as.numeric(format(startDate, format = "%m")) startDay <- as.numeric(format(startDate, format = "%d")) - cases <- tibble(observationPeriodId = 1:n, - caseId = 1:n, - personId = 1:n, - observationDays = observationDays, - ageInDays = ageInDays, - startYear = startYear, - startMonth = startMonth, - startDay = startDay, - startDate = as.numeric(startDate), - censoredDays = 0, - noninformativeEndCensor = 0) + cases <- tibble( + observationPeriodId = 1:n, + caseId = 1:n, + personId = 1:n, + observationDays = observationDays, + ageInDays = ageInDays, + startYear = startYear, + startMonth = startMonth, + startDay = startDay, + startDate = as.numeric(startDate), + censoredDays = 0, + noninformativeEndCensor = 0 + ) ### Generate eras ### eras <- tibble() for (i in 1:length(settings$eraIds)) { # i <- 1 patientsOnDrug <- sample.int(nrow(cases), - settings$patientUsages[i] * nrow(cases), - replace = FALSE) + settings$patientUsages[i] * nrow(cases), + replace = FALSE + ) patientsOnDrug <- patientsOnDrug[order(patientsOnDrug)] count <- rpois(length(patientsOnDrug), observationDays[patientsOnDrug] * settings$usageRate[i]) observationPeriodId <- rep(patientsOnDrug, count) patientsOnDrug <- patientsOnDrug[count != 0] startDay <- round(runif(sum(count), 0, cases$observationDays[observationPeriodId])) - duration <- round(rnorm(sum(count), - settings$meanPrescriptionDurations[i], - settings$sdPrescriptionDurations[i])) + duration <- round(rnorm( + sum(count), + settings$meanPrescriptionDurations[i], + settings$sdPrescriptionDurations[i] + )) duration[duration < 1] <- 1 endDay <- startDay + duration endDay[endDay > cases$observationDays[observationPeriodId]] <- cases$observationDays[observationPeriodId][endDay > - cases$observationDays[observationPeriodId]] - newEras <- tibble(eraType = "rx", - caseId = observationPeriodId, - eraId = settings$eraIds[i], - value = 1, - startDay = startDay, - endDay = endDay) + cases$observationDays[observationPeriodId]] + newEras <- tibble( + eraType = "rx", + caseId = observationPeriodId, + eraId = settings$eraIds[i], + value = 1, + startDay = startDay, + endDay = endDay + ) eras <- rbind(eras, newEras) } eras <- eras[order(eras$caseId, eras$eraId), ] @@ -227,12 +240,14 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf truncatedEnds <- riskEnds truncatedEnds[truncatedEnds > end] <- end filteredIndex <- truncatedEnds >= start - riskEras <- tibble(eraType = "rx", - caseId = sourceEras$caseId[filteredIndex], - eraId = eraId, - value = 1, - startDay = sourceEras$startDay[filteredIndex] + start, - endDay = sourceEras$startDay[filteredIndex] + truncatedEnds[filteredIndex]) + riskEras <- tibble( + eraType = "rx", + caseId = sourceEras$caseId[filteredIndex], + eraId = eraId, + value = 1, + startDay = sourceEras$startDay[filteredIndex] + start, + endDay = sourceEras$startDay[filteredIndex] + truncatedEnds[filteredIndex] + ) newEras <- rbind(newEras, riskEras) eraIds <- c(eraIds, eraId) rrs <- c(rrs, simulationRiskWindow$relativeRisks[j]) @@ -242,24 +257,28 @@ simulateBatch <- function(settings, ageFun, seasonFun, calendarTimeFun, caseIdOf } newEras <- newEras[order(newEras$caseId, newEras$eraId), ] eraRrs <- tibble(eraId = eraIds, rr = rrs) - outcomes <- simulateSccsOutcomes(cases, - newEras, - baselineRates, - eraRrs, - settings$includeAgeEffect, - settings$minAge, - ageRrs, - settings$includeSeasonality, - seasonRrs, - settings$includeCalendarTimeEffect, - as.numeric(settings$minCalendarTime), - calendarTimeRrs) - outcomes <- tibble(eraType = "hoi", - caseId = outcomes$caseId, - eraId = settings$outcomeId, - value = 1, - startDay = outcomes$startDay, - endDay = outcomes$startDay) + outcomes <- simulateSccsOutcomes( + cases, + newEras, + baselineRates, + eraRrs, + settings$includeAgeEffect, + settings$minAge, + ageRrs, + settings$includeSeasonality, + seasonRrs, + settings$includeCalendarTimeEffect, + as.numeric(settings$minCalendarTime), + calendarTimeRrs + ) + outcomes <- tibble( + eraType = "hoi", + caseId = outcomes$caseId, + eraId = settings$outcomeId, + value = 1, + startDay = outcomes$startDay, + endDay = outcomes$startDay + ) # ** Remove non-cases *** caseIds <- unique(outcomes$caseId) @@ -325,29 +344,39 @@ simulateSccsData <- function(nCases, settings) { lastCaseId <- max(batch$cases$caseId) } else { cases <- rbind(cases, batch$cases[1:need, ]) - eras <- rbind(eras, - batch$eras[batch$eras$caseId %in% batch$cases$caseId[1:need],]) + eras <- rbind( + eras, + batch$eras[batch$eras$caseId %in% batch$cases$caseId[1:need], ] + ) } } cases$observationPeriodId <- as.character(cases$observationPeriodId) cases$personId <- as.character(cases$personId) - data <- Andromeda::andromeda(cases = cases, - eras = eras, - eraRef = tibble(eraId = settings$eraIds, - eraType = "", - eraName = "")) + data <- Andromeda::andromeda( + cases = cases, + eras = eras, + eraRef = tibble( + eraId = settings$eraIds, + eraType = "", + eraName = "" + ) + ) - attr(data, "metaData") <- list(sccsSimulationSettings = settings, - ageFun = ageFun, - seasonFun = seasonFun, - calendarTimeFun = calendarTimeFun, - exposureIds = settings$eraIds, - outcomeIds = settings$outcomeId, - attrition = tibble(outcomeId = settings$outcomeId, - description = "Outcomes", - outcomeSubjects = 0, - outcomeEvents = 0, - outcomeObsPeriods = 0)) + attr(data, "metaData") <- list( + sccsSimulationSettings = settings, + ageFun = ageFun, + seasonFun = seasonFun, + calendarTimeFun = calendarTimeFun, + exposureIds = settings$eraIds, + outcomeIds = settings$outcomeId, + attrition = tibble( + outcomeId = settings$outcomeId, + description = "Outcomes", + outcomeSubjects = 0, + outcomeEvents = 0, + outcomeObsPeriods = 0 + ) + ) class(data) <- "SccsData" attr(class(data), "package") <- "SelfControlledCaseSeries" diff --git a/R/StudyPopulation.R b/R/StudyPopulation.R index f8b3586..1e381d4 100644 --- a/R/StudyPopulation.R +++ b/R/StudyPopulation.R @@ -74,18 +74,24 @@ createStudyPopulation <- function(sccsData, filter(row_number(.data$startDay) == 1) %>% ungroup() - attrition <- bind_rows(attrition, - countOutcomes(outcomes, cases, "First outcome only")) + attrition <- bind_rows( + attrition, + countOutcomes(outcomes, cases, "First outcome only") + ) } cases <- cases %>% - mutate(startAgeInDays = .data$ageInDays, - endAgeInDays = .data$ageInDays + .data$observationDays - 1) + mutate( + startAgeInDays = .data$ageInDays, + endAgeInDays = .data$ageInDays + .data$observationDays - 1 + ) if (naivePeriod != 0) { cases <- cases %>% - mutate(startAgeInDays = case_when(naivePeriod > .data$censoredDays ~ .data$startAgeInDays + naivePeriod - .data$censoredDays, - TRUE ~ .data$startAgeInDays)) %>% + mutate(startAgeInDays = case_when( + naivePeriod > .data$censoredDays ~ .data$startAgeInDays + naivePeriod - .data$censoredDays, + TRUE ~ .data$startAgeInDays + )) %>% filter(.data$endAgeInDays > .data$startAgeInDays) outcomes <- outcomes %>% @@ -96,8 +102,10 @@ createStudyPopulation <- function(sccsData, cases <- cases %>% filter(.data$caseId %in% unique(outcomes$caseId)) - attrition <- bind_rows(attrition, - countOutcomes(outcomes, cases, sprintf("%s days naive period", naivePeriod))) + attrition <- bind_rows( + attrition, + countOutcomes(outcomes, cases, sprintf("%s days naive period", naivePeriod)) + ) } if (!is.null(minAge) || !is.null(maxAge)) { @@ -105,18 +113,26 @@ createStudyPopulation <- function(sccsData, if (!is.null(minAge)) { minAgeInDays <- minAge * 365.25 cases <- cases %>% - mutate(startAgeInDays = case_when(.data$startAgeInDays < minAgeInDays ~ minAgeInDays, - TRUE ~ .data$startAgeInDays)) %>% + mutate(startAgeInDays = case_when( + .data$startAgeInDays < minAgeInDays ~ minAgeInDays, + TRUE ~ .data$startAgeInDays + )) %>% filter(.data$endAgeInDays > .data$startAgeInDays) labels <- c(labels, sprintf("Age >= %s", minAge)) } if (!is.null(maxAge)) { maxAgeInDays <- round((maxAge + 1) * 365.25) cases <- cases %>% - mutate(noninformativeEndCensor = case_when(.data$endAgeInDays > maxAgeInDays ~ 1, - TRUE ~ noninformativeEndCensor), - endAgeInDays = case_when(.data$endAgeInDays > maxAgeInDays ~ maxAgeInDays, - TRUE ~ .data$endAgeInDays)) %>% + mutate( + noninformativeEndCensor = case_when( + .data$endAgeInDays > maxAgeInDays ~ 1, + TRUE ~ noninformativeEndCensor + ), + endAgeInDays = case_when( + .data$endAgeInDays > maxAgeInDays ~ maxAgeInDays, + TRUE ~ .data$endAgeInDays + ) + ) %>% filter(.data$endAgeInDays > .data$startAgeInDays) labels <- c(labels, sprintf("Age <= %s", maxAge)) } @@ -124,22 +140,28 @@ createStudyPopulation <- function(sccsData, outcomes <- outcomes %>% inner_join(select(cases, .data$observationPeriodId, .data$caseId, .data$startAgeInDays, .data$endAgeInDays, .data$ageInDays), by = "caseId") %>% filter(.data$startDay >= .data$startAgeInDays - .data$ageInDays & - .data$startDay <= .data$endAgeInDays - .data$ageInDays) %>% + .data$startDay <= .data$endAgeInDays - .data$ageInDays) %>% select(-.data$startAgeInDays, -.data$endAgeInDays, -.data$ageInDays) - attrition <- bind_rows(attrition, - countOutcomes(outcomes, cases, paste(labels, collapse = " & "))) + attrition <- bind_rows( + attrition, + countOutcomes(outcomes, cases, paste(labels, collapse = " & ")) + ) } - metaData <- list(exposureIds = attr(sccsData, "metaData")$exposureIds, - outcomeId = unique(outcomes$eraId), - attrition = attrition) + metaData <- list( + exposureIds = attr(sccsData, "metaData")$exposureIds, + outcomeId = unique(outcomes$eraId), + attrition = attrition + ) - cases$startDate <- as.Date(paste(cases$startYear, cases$startMonth, cases$startDay, sep = "-"), format="%Y-%m-%d") + cases$startDate <- as.Date(paste(cases$startYear, cases$startMonth, cases$startDay, sep = "-"), format = "%Y-%m-%d") cases <- cases %>% - mutate(offset = .data$startAgeInDays - .data$ageInDays, - startDate = .data$startDate + .data$startAgeInDays - .data$ageInDays, - endDay = .data$endAgeInDays - .data$startAgeInDays) %>% + mutate( + offset = .data$startAgeInDays - .data$ageInDays, + startDate = .data$startDate + .data$startAgeInDays - .data$ageInDays, + endDay = .data$endAgeInDays - .data$startAgeInDays + ) %>% mutate(ageInDays = .data$startAgeInDays) %>% select(.data$observationPeriodId, .data$caseId, .data$personId, .data$startDate, .data$endDay, .data$ageInDays, .data$offset, .data$noninformativeEndCensor) @@ -159,10 +181,12 @@ countOutcomes <- function(outcomes, cases, description) { counts <- outcomes %>% inner_join(cases, by = "caseId") %>% group_by(.data$eraId) %>% - summarise(outcomeSubjects = n_distinct(.data$personId), - outcomeEvents = n(), - outcomeObsPeriods = n_distinct(.data$caseId), - .groups = "drop_last") %>% + summarise( + outcomeSubjects = n_distinct(.data$personId), + outcomeEvents = n(), + outcomeObsPeriods = n_distinct(.data$caseId), + .groups = "drop_last" + ) %>% rename(outcomeId = .data$eraId) %>% mutate(description = description) return(counts) diff --git a/docs/404.html b/docs/404.html index 1ae74d7..d10ebbe 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - -
- + + + + -