Skip to content

Commit 070f1b2

Browse files
topepohfrick
andauthored
Survival helpers (#893)
* helper functions * doc, version, and test update * small doc update * update name change * convert to a portable standalone document with unexported functions * Apply suggestions from code review Co-authored-by: Hannah Frick <[email protected]> * updates from reviewer feedback * update snapshot --------- Co-authored-by: Hannah Frick <[email protected]>
1 parent 1b18612 commit 070f1b2

File tree

7 files changed

+176
-7
lines changed

7 files changed

+176
-7
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: parsnip
22
Title: A Common API to Modeling and Analysis Functions
3-
Version: 1.0.4.9000
3+
Version: 1.0.4.9001
44
Authors@R: c(
55
person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre")),
66
person("Davis", "Vaughan", , "[email protected]", role = "aut"),
@@ -76,4 +76,4 @@ Config/testthat/edition: 3
7676
Encoding: UTF-8
7777
LazyData: true
7878
Roxygen: list(markdown = TRUE)
79-
RoxygenNote: 7.2.3.9000
79+
RoxygenNote: 7.2.3

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
* Fixed bug with prediction from a boosted tree model fitted with `"xgboost"` using a custom objective function (#875).
99

10+
* Several internal functions (to help work with `Surv` objects) were added as a standalone file that can be used in other packages via `usethis::use_standalone("tidymodels/parsnip")`.
11+
1012

1113
# parsnip 1.0.4
1214

R/fit.R

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
# General TODOs
2-
# Q: think about case weights in each instance below
3-
4-
# TODO write a better deparser for calls to avoid off-screen text and tabs
5-
61
#' Fit a Model Specification to a Dataset
72
#'
83
#' `fit()` and `fit_xy()` take a model specification, translate the required

R/ipcw.R

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# ------------------------------------------------------------------------------
2+
# Functions for using inverse probability of censoring weights (IPCW) in
3+
# censored regression models
4+
5+
# ------------------------------------------------------------------------------
6+
# Simple helpers for computing the probability of censoring
7+
8+
# For avoiding extremely large, outlier weights
9+
trunc_probs <- function(probs, trunc = 0.01) {
10+
is_complt_prob <- !is.na(probs)
11+
complt_prob <- probs[is_complt_prob]
12+
non_zero_min <- min(complt_prob[complt_prob > 0])
13+
if (non_zero_min < trunc) {
14+
trunc <- non_zero_min / 2
15+
}
16+
probs[is_complt_prob] <-
17+
ifelse(probs[is_complt_prob] <= trunc, trunc, probs[is_complt_prob])
18+
probs
19+
}
20+
21+
.filter_eval_time <- function(eval_time, fail = TRUE) {
22+
# will still propagate nulls:
23+
eval_time <- eval_time[!is.na(eval_time)]
24+
eval_time <- unique(eval_time)
25+
eval_time <- sort(eval_time)
26+
eval_time <- eval_time[eval_time >= 0 & is.finite(eval_time)]
27+
if (fail && identical(eval_time, numeric(0))) {
28+
rlang::abort(
29+
"There were no usable evaluation times (finite, non-missing, and >= 0).",
30+
call = NULL
31+
)
32+
}
33+
eval_time
34+
}

R/standalone-survival.R

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# ---
2+
# repo: tidymodels/parsnip
3+
# file: standalone-survival.R
4+
# last-updated: 2023-02-28
5+
# license: https://unlicense.org
6+
# ---
7+
8+
# This file provides a portable set of helper functions for Surv objects
9+
10+
# ## Changelog
11+
12+
# 2023-02-28:
13+
# * Initial version
14+
15+
16+
# @param surv A [survival::Surv()] object
17+
# @details
18+
# `.is_censored_right()` always returns a logical while
19+
# `.check_censored_right()` will fail if `FALSE`.
20+
#
21+
# `.extract_status()` will return the data as 0/1 even if the original object
22+
# used the legacy encoding of 1/2. See [survival::Surv()].
23+
# @return
24+
# - `.extract_surv_status()` returns a vector.
25+
# - `.extract_surv_time()` returns a vector when the type is `"right"` or `"left"`
26+
# and a tibble otherwise.
27+
# - Functions starting with `.is_` or `.check_` return logicals although the
28+
# latter will fail when `FALSE`.
29+
30+
# nocov start
31+
# These are tested in the extratests repo since it would require a dependency
32+
# on the survival package. https://github.com/tidymodels/extratests/pull/78
33+
.is_censored_right <- function(surv) {
34+
.check_cens_type(surv, fail = FALSE)
35+
}
36+
37+
.check_censored_right <- function(surv) {
38+
.check_cens_type(surv, fail = TRUE)
39+
} # will add more as we need them
40+
41+
.extract_surv_time <- function(surv) {
42+
.is_surv(surv)
43+
keepers <- c("time", "start", "stop", "time1", "time2")
44+
res <- surv[, colnames(surv) %in% keepers]
45+
if (NCOL(res) > 1) {
46+
res <- tibble::tibble(as.data.frame(res))
47+
}
48+
res
49+
}
50+
51+
.extract_surv_status <- function(surv) {
52+
.is_surv(surv)
53+
res <- surv[, "status"]
54+
un_vals <- sort(unique(res))
55+
event_type_to_01 <- !(.extract_surv_type(surv) %in% c("interval", "interval2", "mstate"))
56+
if (
57+
event_type_to_01 &&
58+
(identical(un_vals, 1:2) | identical(un_vals, c(1.0, 2.0))) ) {
59+
res <- res - 1
60+
}
61+
res
62+
}
63+
64+
.is_surv <- function(surv, fail = TRUE) {
65+
is_surv <- inherits(surv, "Surv")
66+
if (!is_surv && fail) {
67+
rlang::abort("The object does not have class `Surv`.", call = NULL)
68+
}
69+
is_surv
70+
}
71+
72+
.extract_surv_type <- function(surv) {
73+
attr(surv, "type")
74+
}
75+
76+
.check_cens_type <- function(surv, type = "right", fail = TRUE) {
77+
.is_surv(surv)
78+
obj_type <- .extract_surv_type(surv)
79+
good_type <- all(obj_type %in% type)
80+
if (!good_type && fail) {
81+
c_list <- paste0("'", type, "'")
82+
msg <- cli::format_inline("For this usage, the allowed censoring type{?s} {?is/are}: {c_list}")
83+
rlang::abort(msg, call = NULL)
84+
}
85+
good_type
86+
}
87+
88+
# nocov end

tests/testthat/_snaps/ipcw.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# time filtering
2+
3+
Code
4+
parsnip:::.filter_eval_time(-1)
5+
Condition
6+
Error:
7+
! There were no usable evaluation times (finite, non-missing, and >= 0).
8+

tests/testthat/test-ipcw.R

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
test_that('probability truncation', {
2+
probs <- seq(0, 1, length.out = 5)
3+
4+
expect_equal(
5+
min(parsnip:::trunc_probs(probs, .4)),
6+
min(probs[probs > 0]) / 2
7+
)
8+
expect_equal(
9+
min(parsnip:::trunc_probs(c(NA, probs), .4), na.rm = TRUE),
10+
min(probs[probs > 0]) / 2
11+
)
12+
expect_equal(
13+
min(parsnip:::trunc_probs(probs)),
14+
0.01
15+
)
16+
expect_equal(
17+
min(parsnip:::trunc_probs((1:200)/200)),
18+
1 / 200
19+
)
20+
})
21+
22+
23+
test_that('time filtering', {
24+
times_1 <- 0:10
25+
times_2 <- c(Inf, NA, -3, times_1, times_1)
26+
27+
expect_equal(
28+
parsnip:::.filter_eval_time(times_1),
29+
times_1
30+
)
31+
expect_equal(
32+
parsnip:::.filter_eval_time(times_1),
33+
times_1
34+
)
35+
expect_snapshot(error = TRUE, parsnip:::.filter_eval_time(-1))
36+
expect_null(parsnip:::.filter_eval_time(NULL))
37+
})
38+
39+
40+
41+
42+

0 commit comments

Comments
 (0)