|
18 | 18 | #' @param bounds Known lower and upper bounds for estimated data. Default
|
19 | 19 | #' `c(-Inf, Inf)` means that there are no (finite) bounds. If any bound is
|
20 | 20 | #' finite, boundary effect of default density estimation will be corrected by
|
21 |
| -#' reflecting tails outside `bounds` around their closest edge. |
| 21 | +#' reflecting tails outside `bounds` around their closest edge. Data points |
| 22 | +#' outside of bounds are removed with a warning. |
22 | 23 | #' @section Computed variables:
|
23 | 24 | #' \describe{
|
24 | 25 | #' \item{density}{density estimate}
|
@@ -118,6 +119,12 @@ compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
|
118 | 119 | w <- w / sum(w)
|
119 | 120 | }
|
120 | 121 |
|
| 122 | + # Adjust data points and weights to all fit inside bounds |
| 123 | + sample_data <- fit_data_to_bounds(bounds, x, w) |
| 124 | + x <- sample_data$x |
| 125 | + w <- sample_data$w |
| 126 | + nx <- length(x) |
| 127 | + |
121 | 128 | # if less than 2 points return data frame of NAs and a warning
|
122 | 129 | if (nx < 2) {
|
123 | 130 | warn("Groups with fewer than two data points have been dropped.")
|
@@ -152,6 +159,23 @@ compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
|
152 | 159 | ), n = length(dens$x))
|
153 | 160 | }
|
154 | 161 |
|
| 162 | +# Check if all data points are inside bounds. If not, warn and remove them. |
| 163 | +fit_data_to_bounds <- function(bounds, x, w) { |
| 164 | + is_inside_bounds <- (bounds[1] <= x) & (x <= bounds[2]) |
| 165 | + |
| 166 | + if (any(!is_inside_bounds)) { |
| 167 | + warn("Some data points are outside of `bounds`. Removing them.") |
| 168 | + x <- x[is_inside_bounds] |
| 169 | + w <- w[is_inside_bounds] |
| 170 | + w_sum <- sum(w) |
| 171 | + if (w_sum > 0) { |
| 172 | + w <- w / w_sum |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + return(list(x = x, w = w)) |
| 177 | +} |
| 178 | + |
155 | 179 | # Update density estimation to mitigate boundary effect at known `bounds`:
|
156 | 180 | # - All x values will lie inside `bounds`.
|
157 | 181 | # - All y-values will be updated to have total probability of `bounds` be
|
|
0 commit comments