From 95bfa2b7d8d9cf37dba148ef051d475a0ca71067 Mon Sep 17 00:00:00 2001 From: Marc Suchard Date: Mon, 25 Sep 2023 09:37:39 -0700 Subject: [PATCH] counting process reformating --- NAMESPACE | 3 + R/TimeEffects.R | 132 ++++++++++++++++++++++++++++++++ man/Cyclops-package.Rd | 41 ++++++++++ man/convertToTimeVaryingCoef.Rd | 39 ++++++++++ man/splitTime.Rd | 26 +++++++ 5 files changed, 241 insertions(+) create mode 100644 R/TimeEffects.R create mode 100644 man/Cyclops-package.Rd create mode 100644 man/convertToTimeVaryingCoef.Rd create mode 100644 man/splitTime.Rd diff --git a/NAMESPACE b/NAMESPACE index d632ced3..4fc36ad5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ S3method(survfit,cyclopsFit) S3method(vcov,cyclopsFit) export(Multitype) export(convertToCyclopsData) +export(convertToTimeVaryingCoef) export(coverage) export(createAutoGridCrossValidationControl) export(createControl) @@ -40,6 +41,7 @@ export(meanLinearPredictor) export(mse) export(readCyclopsData) export(simulateCyclopsData) +export(splitTime) import(Matrix) import(Rcpp) import(dplyr) @@ -72,5 +74,6 @@ importFrom(stats,terms) importFrom(stats,time) importFrom(stats,vcov) importFrom(survival,Surv) +importFrom(survival,survSplit) importFrom(survival,survfit) useDynLib(Cyclops, .registration = TRUE) diff --git a/R/TimeEffects.R b/R/TimeEffects.R new file mode 100644 index 00000000..5ae7aa0e --- /dev/null +++ b/R/TimeEffects.R @@ -0,0 +1,132 @@ +#' @title Split the analysis time into several intervals for time-varying coefficients. +#' +#' @description +#' \code{splitTime} split the analysis time into several intervals for time-varying coefficients +#' +#' @param shortOut A data frame containing the outcomes with predefined columns (see below). +#' @param cut Numeric: Time points to cut at +#' +#' These columns are expected in the shortOut object: +#' \tabular{lll}{ +#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr +#' \verb{y} \tab(real) \tab Observed event status \cr +#' \verb{time} \tab(real) \tab Observed event time \cr +#' } +#' +#' @return A long outcome table for time-varying coefficients. +#' @importFrom survival survfit survSplit +#' @export +splitTime <- function(shortOut, cut) { + + if (!"time" %in% colnames(shortOut)) stop("Must provide observed event time.") + if (!"y" %in% colnames(shortOut)) stop("Must provide observed event status.") + if ("rowId" %in% colnames(shortOut)) { + shortOut <- shortOut %>% + rename(subjectId = rowId) %>% + arrange(subjectId) + } else { + shortOut <- shortOut %>% + mutate(subjectId = row_number()) + } + + shortOut <- collect(shortOut) + longOut <- do.call('survSplit', list(formula = Surv(shortOut$time, shortOut$y)~., + data = shortOut, + cut = cut, + episode = "stratumId", + id = "newSubjectId")) + longOut <- longOut %>% + rename(y = event) %>% + mutate(time = tstop - tstart) %>% + select(-c(newSubjectId, tstart, tstop)) %>% + arrange(stratumId, subjectId) + + # Restore rowIds + SubjectIds <- shortOut$subjectId + newSubjectId <- max(SubjectIds)+1 + longOut$rowId <-c(SubjectIds, # rowId = subjectId at 1st stratum + newSubjectId:(newSubjectId+(nrow(longOut)-length(SubjectIds))-1)) # create new distinct rowIds for other strata + + # Reorder columns + longOut <- longOut %>% + select(rowId, everything()) %>% + select(subjectId, everything()) %>% + select(stratumId, everything()) + + return(longOut) +} + +#' @title Convert short sparse covariate table to long sparse covariate table for time-varying coefficients. +#' +#' @description +#' \code{convertToTimeVaryingCoef} convert short sparse covariate table to long sparse covariate table for time-varying coefficients. +#' +#' @param shortCov A data frame containing the covariate with predefined columns (see below). +#' @param longOut A data frame containing the outcomes with predefined columns (see below), output of \code{splitTime}. +#' @param timeVaryCoefId Integer: A numeric identifier of a time-varying coefficient +#' +#' @details +#' These columns are expected in the shortCov object: +#' \tabular{lll}{ +#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr +#' \verb{covariateId} \tab(integer) \tab A numeric identifier of a covariate \cr +#' \verb{covariateValue} \tab(real) \tab The value of the specified covariate \cr +#' } +#' +#' These columns are expected in the longOut object: +#' \tabular{lll}{ +#' \verb{stratumId} \tab(integer) \tab Stratum ID for time-varying models \cr +#' \verb{subjectId} \tab(integer) \tab Subject ID is used to link multiple covariates (x) at different time intervals to a single subject \cr +#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr +#' \verb{y} \tab(real) \tab The outcome variable \cr +#' \verb{time} \tab(real) \tab For models that use time (e.g. Poisson or Cox regression) this contains time \cr +#' \tab \tab(e.g. number of days) \cr +#' } +#' @return A long sparse covariate table for time-varying coefficients. +#' @export +convertToTimeVaryingCoef <- function(shortCov, longOut, timeVaryCoefId) { + + # Process time-varying coefficients + timeVaryCoefId <- sort(unique(timeVaryCoefId)) + numTime <- length(timeVaryCoefId) # number of time-varying covariates + maxCovId <- max(shortCov$covariateId) + + # First stratum + longCov <- shortCov + longCov$stratumId <- 1 + colnames(longCov)[which(names(longCov) == "rowId")] <- "subjectId" + colnames(shortCov)[which(names(shortCov) == "rowId")] <- "subjectId" + + # Rest of strata + maxStrata <- max(longOut$stratumId) + for (st in 2:maxStrata) { + + # get valid subjects in current stratum + subId <- longOut[longOut$stratumId == st, ]$subjectId + + # get valid sparse covariates information in current stratum + curStrata <- shortCov[shortCov$subjectId %in% subId, ] + + if (any(curStrata$covariateId %in% timeVaryCoefId)) { # skip when valid subjects only have non-zero time-indep covariates + curStrata$stratumId <- st # assign current stratumId + + # recode covariateId for time-varying coefficients + # TODO update label + for (i in 1:numTime) { + curStrata[curStrata$covariateId == timeVaryCoefId[i], "covariateId"] <- maxCovId + numTime * (st - 2) + i + } + + # bind current stratum to longCov + longCov <- rbind(longCov, curStrata) + } + } + + # match rowId in longCov + longCov$rowId <- NA + for (i in 1:nrow(longCov)) { + longCov$rowId[i] <- longOut[with(longOut, subjectId == longCov$subjectId[i] & stratumId == longCov$stratumId[i]), "rowId"] + } + + return(longCov) +} + diff --git a/man/Cyclops-package.Rd b/man/Cyclops-package.Rd new file mode 100644 index 00000000..d726da88 --- /dev/null +++ b/man/Cyclops-package.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Cyclops-package.R +\docType{package} +\name{Cyclops-package} +\alias{Cyclops} +\alias{Cyclops-package} +\title{Cyclops: Cyclic Coordinate Descent for Logistic, Poisson and Survival Analysis} +\description{ +This model fitting tool incorporates cyclic coordinate descent and majorization-minimization approaches to fit a variety of regression models found in large-scale observational healthcare data. Implementations focus on computational optimization and fine-scale parallelization to yield efficient inference in massive datasets. Please see: Suchard, Simpson, Zorych, Ryan and Madigan (2013) \doi{10.1145/2414416.2414791}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/ohdsi/cyclops} + \item Report bugs at \url{https://github.com/ohdsi/cyclops/issues} +} + +} +\author{ +\strong{Maintainer}: Marc A. Suchard \email{msuchard@ucla.edu} + +Authors: +\itemize{ + \item Martijn J. Schuemie + \item Trevor R. Shaddox + \item Yuxi Tian + \item Jianxiao Yang + \item Eric Kawaguchi +} + +Other contributors: +\itemize{ + \item Sushil Mittal [contributor] + \item Observational Health Data Sciences and Informatics [copyright holder] + \item Marcus Geelnard (provided the TinyThread library) [copyright holder, contributor] + \item Rutgers University (provided the HParSearch routine) [copyright holder, contributor] + \item R Development Core Team (provided the ZeroIn routine) [copyright holder, contributor] +} + +} +\keyword{internal} diff --git a/man/convertToTimeVaryingCoef.Rd b/man/convertToTimeVaryingCoef.Rd new file mode 100644 index 00000000..b587f8f2 --- /dev/null +++ b/man/convertToTimeVaryingCoef.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TimeEffects.R +\name{convertToTimeVaryingCoef} +\alias{convertToTimeVaryingCoef} +\title{Convert short sparse covariate table to long sparse covariate table for time-varying coefficients.} +\usage{ +convertToTimeVaryingCoef(shortCov, longOut, timeVaryCoefId) +} +\arguments{ +\item{shortCov}{A data frame containing the covariate with predefined columns (see below).} + +\item{longOut}{A data frame containing the outcomes with predefined columns (see below), output of \code{splitTime}.} + +\item{timeVaryCoefId}{Integer: A numeric identifier of a time-varying coefficient} +} +\value{ +A long sparse covariate table for time-varying coefficients. +} +\description{ +\code{convertToTimeVaryingCoef} convert short sparse covariate table to long sparse covariate table for time-varying coefficients. +} +\details{ +These columns are expected in the shortCov object: +\tabular{lll}{ + \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr + \verb{covariateId} \tab(integer) \tab A numeric identifier of a covariate \cr + \verb{covariateValue} \tab(real) \tab The value of the specified covariate \cr +} + +These columns are expected in the longOut object: +\tabular{lll}{ + \verb{stratumId} \tab(integer) \tab Stratum ID for time-varying models \cr + \verb{subjectId} \tab(integer) \tab Subject ID is used to link multiple covariates (x) at different time intervals to a single subject \cr + \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr + \verb{y} \tab(real) \tab The outcome variable \cr + \verb{time} \tab(real) \tab For models that use time (e.g. Poisson or Cox regression) this contains time \cr + \tab \tab(e.g. number of days) \cr +} +} diff --git a/man/splitTime.Rd b/man/splitTime.Rd new file mode 100644 index 00000000..fedc4a7e --- /dev/null +++ b/man/splitTime.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TimeEffects.R +\name{splitTime} +\alias{splitTime} +\title{Split the analysis time into several intervals for time-varying coefficients.} +\usage{ +splitTime(shortOut, cut) +} +\arguments{ +\item{shortOut}{A data frame containing the outcomes with predefined columns (see below).} + +\item{cut}{Numeric: Time points to cut at + +These columns are expected in the shortOut object: +\tabular{lll}{ + \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr + \verb{y} \tab(real) \tab Observed event status \cr + \verb{time} \tab(real) \tab Observed event time \cr +}} +} +\value{ +A long outcome table for time-varying coefficients. +} +\description{ +\code{splitTime} split the analysis time into several intervals for time-varying coefficients +}