Skip to content

estimate censoring probabilities #855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: parsnip
Title: A Common API to Modeling and Analysis Functions
Version: 1.0.3.9002
Version: 1.0.3.9003
Authors@R: c(
person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre")),
person("Davis", "Vaughan", , "[email protected]", role = "aut"),
Expand Down Expand Up @@ -54,6 +54,7 @@ Suggests:
mgcv,
modeldata,
nlme,
prodlim,
ranger (>= 0.12.0),
remotes,
rmarkdown,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ S3method(nullmodel,default)
S3method(predict,"_elnet")
S3method(predict,"_lognet")
S3method(predict,"_multnet")
S3method(predict,censoring_model_reverse_km)
S3method(predict,model_fit)
S3method(predict,model_spec)
S3method(predict,nullmodel)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# parsnip (development version)

* For censored regression models, a "reverse Kaplan-Meier" curve is computed for the censoring distribution. This can be used when evaluating this type of model (#855).

# parsnip 1.0.3

* Adds documentation and tuning infrastructure for the new `flexsurvspline` engine for the `survival_reg()` model specification from the `censored` package (@mattwarkentin, #831).
Expand Down
106 changes: 106 additions & 0 deletions R/censoring_probs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# nocov start
# tested in tidymodels/extratests#67

new_reverse_km_fit <-
function(formula,
object,
pkgs = character(0),
label = character(0),
extra_cls = character(0)) {
res <- list(formula = formula, fit = object, label = label, required_pkgs = pkgs)
class(res) <- c(paste0("censoring_model_", label), "censoring_model", extra_cls)
res
}

# ------------------------------------------------------------------------------
# estimate the reverse km curve for censored regression models

reverse_km <- function(obj, eval_env) {
if (obj$mode != "censored regression") {
return(list())
}
rlang::check_installed("prodlim")

# Note: even when fit_xy() is called, eval_env will still have
# objects data and formula in them
f <- eval_env$formula
km_form <- stats::update(f, ~ 1)
cl <-
rlang::call2(
"prodlim",
formula = km_form,
.ns = "prodlim",
reverse = TRUE,
type = "surv",
x = FALSE,
data = rlang::expr(eval_env$data)
)

if (!is.null(eval_env$weights)) {
cl <- rlang::call_modify(cl, caseweights = rlang::expr(eval_env$weights))
}
rkm <- try(rlang::eval_tidy(cl), silent = TRUE)
new_reverse_km_fit(f, object = rkm, label = "reverse_km", pkgs = "prodlim")
}

# ------------------------------------------------------------------------------
# Basic S3 methods

print.censoring_model <- function(x, ...) {
cat(x$label, "model for predicting the probability of censoring\n")
invisible(x)
}

predict.censoring_model <- function(object, ...) {
rlang::abort(
paste("Don't know how to predict with a censoring model of type:", object$label)
)
invisible(NULL)
}

#' @export
predict.censoring_model_reverse_km <- function(object, new_data = NULL, time, as_vector = FALSE, ...) {
rlang::check_installed("prodlim")

res <- rep(NA_real_, length(time))
if (length(time) == 0) {
return(res)
}

# Some time values might be NA (for Graf category 2)
is_na <- which(is.na(time))
if (length(is_na) > 0) {
time <- time[-is_na]
}

if (is.null(new_data)) {
tmp <-
purrr::map_dbl(time, ~ predict(object$fit, times = .x, type = "surv"))
} else {
tmp <-
purrr::map_dbl(time, ~ predict(object$fit, newdata = new_data, times = .x, type = "surv"))
}

zero_prob <- purrr::map_lgl(tmp, ~ !is.na(.x) && .x == 0)
if (any(zero_prob)) {
# Don't want censoring probabilities of zero so add an epsilon
# Either use 1/n or half of the minimum survival probability
n <- max(object$fit$n.risk)
half_min_surv_prob <- min(object$fit$surv[object$fit$surv > 0]) / 2
eps <- min(1 / n, half_min_surv_prob)
tmp[zero_prob] <- eps
}

if (length(is_na) > 0) {
res[-is_na] <- tmp
} else {
res <- tmp
}

if (!as_vector) {
res <- tibble::tibble(.prob_censored = unname(res))
}
res
}

# nocov end
8 changes: 8 additions & 0 deletions R/fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
#' `options(contrasts = c(unordered = "contr.helmert", ordered = "contr.poly"))`.
#' See the help page for [stats::contr.treatment()] for more possible contrast
#' types.
#'
#' For models with `"censored regression"` modes, an additional computation is
#' executed and saved in the parsnip object. The `censor_probs` element contains
#' a "reverse Kaplan-Meier" curve that models the probability of censoring. This
#' may be used later to compute inverse probability censoring weights for
#' performance measures.
#' @examples
#' # Although `glm()` only has a formula interface, different
#' # methods for specifying the model can be used
Expand Down Expand Up @@ -206,6 +212,7 @@ fit.model_spec <-

rlang::abort(glue::glue("{interfaces} is unknown."))
)
res$censor_probs <- reverse_km(object, eval_env)
model_classes <- class(res$fit)
class(res) <- c(paste0("_", model_classes[1]), "model_fit")
res
Expand Down Expand Up @@ -317,6 +324,7 @@ fit_xy.model_spec <-
),
rlang::abort(glue::glue("{interfaces} is unknown."))
)
res$censor_probs <- reverse_km(object, eval_env)
model_classes <- class(res$fit)
class(res) <- c(paste0("_", model_classes[1]), "model_fit")
res
Expand Down
2 changes: 0 additions & 2 deletions R/fit_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,5 +197,3 @@ xy_form <- function(object, env, control, ...) {
res$preproc <- data_obj[c("x_var", "y_var")]
res
}


6 changes: 6 additions & 0 deletions man/fit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions tests/testthat/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Note that some functionality in parsnip is tested outside of the package. Due to a high degree of dependencies, many additional tests are in the [extratexts](https://github.com/tidymodels/extratests/tree/main/tests/testthat) repository. These are run nightly with CRAN and Github versions of parsnip as well as other tidymodels packages.