Skip to content

Commit 9060711

Browse files
topepohfrick
andauthored
estimate censoring probabilities (#855)
* add reverse KM curve estimate * documention uodate * fix typos * fix formula name * Apply suggestions from code review Co-authored-by: Hannah Frick <[email protected]> * put censoring functions in their own file * generalize the censoring model interface for future use * nocov and fix logic when NAs are there * updated on reviewer code * testing notes * updated number --------- Co-authored-by: Hannah Frick <[email protected]>
1 parent 6cb9225 commit 9060711

File tree

8 files changed

+127
-3
lines changed

8 files changed

+127
-3
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: parsnip
22
Title: A Common API to Modeling and Analysis Functions
3-
Version: 1.0.3.9002
3+
Version: 1.0.3.9003
44
Authors@R: c(
55
person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre")),
66
person("Davis", "Vaughan", , "[email protected]", role = "aut"),
@@ -54,6 +54,7 @@ Suggests:
5454
mgcv,
5555
modeldata,
5656
nlme,
57+
prodlim,
5758
ranger (>= 0.12.0),
5859
remotes,
5960
rmarkdown,

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ S3method(nullmodel,default)
2929
S3method(predict,"_elnet")
3030
S3method(predict,"_lognet")
3131
S3method(predict,"_multnet")
32+
S3method(predict,censoring_model_reverse_km)
3233
S3method(predict,model_fit)
3334
S3method(predict,model_spec)
3435
S3method(predict,nullmodel)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# parsnip (development version)
22

3+
* For censored regression models, a "reverse Kaplan-Meier" curve is computed for the censoring distribution. This can be used when evaluating this type of model (#855).
4+
35
# parsnip 1.0.3
46

57
* Adds documentation and tuning infrastructure for the new `flexsurvspline` engine for the `survival_reg()` model specification from the `censored` package (@mattwarkentin, #831).

R/censoring_probs.R

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# nocov start
2+
# tested in tidymodels/extratests#67
3+
4+
new_reverse_km_fit <-
5+
function(formula,
6+
object,
7+
pkgs = character(0),
8+
label = character(0),
9+
extra_cls = character(0)) {
10+
res <- list(formula = formula, fit = object, label = label, required_pkgs = pkgs)
11+
class(res) <- c(paste0("censoring_model_", label), "censoring_model", extra_cls)
12+
res
13+
}
14+
15+
# ------------------------------------------------------------------------------
16+
# estimate the reverse km curve for censored regression models
17+
18+
reverse_km <- function(obj, eval_env) {
19+
if (obj$mode != "censored regression") {
20+
return(list())
21+
}
22+
rlang::check_installed("prodlim")
23+
24+
# Note: even when fit_xy() is called, eval_env will still have
25+
# objects data and formula in them
26+
f <- eval_env$formula
27+
km_form <- stats::update(f, ~ 1)
28+
cl <-
29+
rlang::call2(
30+
"prodlim",
31+
formula = km_form,
32+
.ns = "prodlim",
33+
reverse = TRUE,
34+
type = "surv",
35+
x = FALSE,
36+
data = rlang::expr(eval_env$data)
37+
)
38+
39+
if (!is.null(eval_env$weights)) {
40+
cl <- rlang::call_modify(cl, caseweights = rlang::expr(eval_env$weights))
41+
}
42+
rkm <- try(rlang::eval_tidy(cl), silent = TRUE)
43+
new_reverse_km_fit(f, object = rkm, label = "reverse_km", pkgs = "prodlim")
44+
}
45+
46+
# ------------------------------------------------------------------------------
47+
# Basic S3 methods
48+
49+
print.censoring_model <- function(x, ...) {
50+
cat(x$label, "model for predicting the probability of censoring\n")
51+
invisible(x)
52+
}
53+
54+
predict.censoring_model <- function(object, ...) {
55+
rlang::abort(
56+
paste("Don't know how to predict with a censoring model of type:", object$label)
57+
)
58+
invisible(NULL)
59+
}
60+
61+
#' @export
62+
predict.censoring_model_reverse_km <- function(object, new_data = NULL, time, as_vector = FALSE, ...) {
63+
rlang::check_installed("prodlim")
64+
65+
res <- rep(NA_real_, length(time))
66+
if (length(time) == 0) {
67+
return(res)
68+
}
69+
70+
# Some time values might be NA (for Graf category 2)
71+
is_na <- which(is.na(time))
72+
if (length(is_na) > 0) {
73+
time <- time[-is_na]
74+
}
75+
76+
if (is.null(new_data)) {
77+
tmp <-
78+
purrr::map_dbl(time, ~ predict(object$fit, times = .x, type = "surv"))
79+
} else {
80+
tmp <-
81+
purrr::map_dbl(time, ~ predict(object$fit, newdata = new_data, times = .x, type = "surv"))
82+
}
83+
84+
zero_prob <- purrr::map_lgl(tmp, ~ !is.na(.x) && .x == 0)
85+
if (any(zero_prob)) {
86+
# Don't want censoring probabilities of zero so add an epsilon
87+
# Either use 1/n or half of the minimum survival probability
88+
n <- max(object$fit$n.risk)
89+
half_min_surv_prob <- min(object$fit$surv[object$fit$surv > 0]) / 2
90+
eps <- min(1 / n, half_min_surv_prob)
91+
tmp[zero_prob] <- eps
92+
}
93+
94+
if (length(is_na) > 0) {
95+
res[-is_na] <- tmp
96+
} else {
97+
res <- tmp
98+
}
99+
100+
if (!as_vector) {
101+
res <- tibble::tibble(.prob_censored = unname(res))
102+
}
103+
res
104+
}
105+
106+
# nocov end

R/fit.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@
5454
#' `options(contrasts = c(unordered = "contr.helmert", ordered = "contr.poly"))`.
5555
#' See the help page for [stats::contr.treatment()] for more possible contrast
5656
#' types.
57+
#'
58+
#' For models with `"censored regression"` modes, an additional computation is
59+
#' executed and saved in the parsnip object. The `censor_probs` element contains
60+
#' a "reverse Kaplan-Meier" curve that models the probability of censoring. This
61+
#' may be used later to compute inverse probability censoring weights for
62+
#' performance measures.
5763
#' @examples
5864
#' # Although `glm()` only has a formula interface, different
5965
#' # methods for specifying the model can be used
@@ -206,6 +212,7 @@ fit.model_spec <-
206212

207213
rlang::abort(glue::glue("{interfaces} is unknown."))
208214
)
215+
res$censor_probs <- reverse_km(object, eval_env)
209216
model_classes <- class(res$fit)
210217
class(res) <- c(paste0("_", model_classes[1]), "model_fit")
211218
res
@@ -317,6 +324,7 @@ fit_xy.model_spec <-
317324
),
318325
rlang::abort(glue::glue("{interfaces} is unknown."))
319326
)
327+
res$censor_probs <- reverse_km(object, eval_env)
320328
model_classes <- class(res$fit)
321329
class(res) <- c(paste0("_", model_classes[1]), "model_fit")
322330
res

R/fit_helpers.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,5 +197,3 @@ xy_form <- function(object, env, control, ...) {
197197
res$preproc <- data_obj[c("x_var", "y_var")]
198198
res
199199
}
200-
201-

man/fit.Rd

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Note that some functionality in parsnip is tested outside of the package. Due to a high degree of dependencies, many additional tests are in the [extratexts](https://github.com/tidymodels/extratests/tree/main/tests/testthat) repository. These are run nightly with CRAN and Github versions of parsnip as well as other tidymodels packages.
2+

0 commit comments

Comments
 (0)