Skip to content

Commit 9bd40d4

Browse files
committed
Handle data points outside of bounds.
1 parent 9c5eeef commit 9bd40d4

File tree

1 file changed

+25
-1
lines changed

1 file changed

+25
-1
lines changed

R/stat-density.r

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
#' @param bounds Known lower and upper bounds for estimated data. Default
1919
#' `c(-Inf, Inf)` means that there are no (finite) bounds. If any bound is
2020
#' finite, boundary effect of default density estimation will be corrected by
21-
#' reflecting tails outside `bounds` around their closest edge.
21+
#' reflecting tails outside `bounds` around their closest edge. Data points
22+
#' outside of bounds are removed with a warning.
2223
#' @section Computed variables:
2324
#' \describe{
2425
#' \item{density}{density estimate}
@@ -118,6 +119,12 @@ compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
118119
w <- w / sum(w)
119120
}
120121

122+
# Adjust data points and weights to all fit inside bounds
123+
sample_data <- fit_data_to_bounds(bounds, x, w)
124+
x <- sample_data$x
125+
w <- sample_data$w
126+
nx <- length(x)
127+
121128
# if less than 2 points return data frame of NAs and a warning
122129
if (nx < 2) {
123130
warn("Groups with fewer than two data points have been dropped.")
@@ -152,6 +159,23 @@ compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
152159
), n = length(dens$x))
153160
}
154161

162+
# Check if all data points are inside bounds. If not, warn and remove them.
163+
fit_data_to_bounds <- function(bounds, x, w) {
164+
is_inside_bounds <- (bounds[1] <= x) & (x <= bounds[2])
165+
166+
if (any(!is_inside_bounds)) {
167+
warn("Some data points are outside of `bounds`. Removing them.")
168+
x <- x[is_inside_bounds]
169+
w <- w[is_inside_bounds]
170+
w_sum <- sum(w)
171+
if (w_sum > 0) {
172+
w <- w / w_sum
173+
}
174+
}
175+
176+
return(list(x = x, w = w))
177+
}
178+
155179
# Update density estimation to mitigate boundary effect at known `bounds`:
156180
# - All x values will lie inside `bounds`.
157181
# - All y-values will be updated to have total probability of `bounds` be

0 commit comments

Comments
 (0)