12
12
# ' and one of them must be unused. The ECDF will be calculated on the given aesthetic
13
13
# ' and will be output on the unused one.
14
14
# '
15
+ # ' If the `weight` aesthetic is provided, a weighted ECDF will be computed. In
16
+ # ' this case, the ECDF is incremented by `weight / sum(weight)` instead of
17
+ # ' `1 / length(x)` for each observation.
18
+ # '
15
19
# ' @inheritParams layer
16
20
# ' @inheritParams geom_point
17
21
# ' @param na.rm If `FALSE` (the default), removes missing values with
20
24
# ' of points to interpolate with.
21
25
# ' @param pad If `TRUE`, pad the ecdf with additional points (-Inf, 0)
22
26
# ' and (Inf, 1)
27
+ # ' @eval rd_aesthetics("stat", "ecdf")
23
28
# ' @eval rd_computed_vars(
24
29
# ' ecdf = "Cumulative density corresponding to `x`.",
25
30
# ' y = "`r lifecycle::badge('superseded')` For backward compatibility."
26
31
# ' )
32
+ # ' @section Dropped variables:
33
+ # ' \describe{
34
+ # ' \item{weight}{After calculation, weights of individual observations (if
35
+ # ' supplied), are no longer available.}
36
+ # ' }
27
37
# ' @export
28
38
# ' @examples
29
39
# ' set.seed(1)
41
51
# ' # Multiple ECDFs
42
52
# ' ggplot(df, aes(x, colour = g)) +
43
53
# ' stat_ecdf()
54
+ # '
55
+ # ' # Using weighted eCDF
56
+ # ' weighted <- data.frame(x = 1:10, weights = c(1:5, 5:1))
57
+ # ' plain <- data.frame(x = rep(weighted$x, weighted$weights))
58
+ # '
59
+ # ' ggplot(plain, aes(x)) +
60
+ # ' stat_ecdf(linewidth = 1) +
61
+ # ' stat_ecdf(
62
+ # ' aes(weight = weights),
63
+ # ' data = weighted, colour = "green"
64
+ # ' )
44
65
stat_ecdf <- function (mapping = NULL , data = NULL ,
45
66
geom = " step" , position = " identity" ,
46
67
... ,
@@ -74,7 +95,7 @@ stat_ecdf <- function(mapping = NULL, data = NULL,
74
95
StatEcdf <- ggproto(" StatEcdf" , Stat ,
75
96
required_aes = c(" x|y" ),
76
97
77
- default_aes = aes(x = after_stat(ecdf ), y = after_stat(ecdf )),
98
+ default_aes = aes(x = after_stat(ecdf ), y = after_stat(ecdf ), weight = NULL ),
78
99
79
100
setup_params = function (self , data , params ) {
80
101
params $ flipped_aes <- has_flipped_aes(data , params , main_is_orthogonal = FALSE , main_is_continuous = TRUE )
@@ -100,7 +121,7 @@ StatEcdf <- ggproto("StatEcdf", Stat,
100
121
if (pad ) {
101
122
x <- c(- Inf , x , Inf )
102
123
}
103
- data_ecdf <- stats :: ecdf (data $ x )(x )
124
+ data_ecdf <- wecdf (data $ x , data $ weight )(x )
104
125
105
126
df_ecdf <- data_frame0(
106
127
x = x ,
@@ -110,6 +131,63 @@ StatEcdf <- ggproto("StatEcdf", Stat,
110
131
)
111
132
df_ecdf $ flipped_aes <- flipped_aes
112
133
flip_data(df_ecdf , flipped_aes )
113
- }
134
+ },
135
+
136
+ dropped_aes = " weight"
114
137
)
115
138
139
+ # Weighted eCDF function
140
+ wecdf <- function (x , weights = NULL ) {
141
+
142
+ weights <- weights %|| % 1
143
+ weights <- vec_recycle(weights , length(x ))
144
+
145
+ # Sort vectors
146
+ ord <- order(x , na.last = NA )
147
+ x <- x [ord ]
148
+ weights <- weights [ord ]
149
+
150
+ if (any(! is.finite(weights ))) {
151
+ cli :: cli_warn(c(paste0(
152
+ " The {.field weight} aesthetic does not support non-finite or " ,
153
+ " {.code NA} values."
154
+ ), " i" = " These weights were replaced by {.val 0}." ))
155
+ weights [! is.finite(weights )] <- 0
156
+ }
157
+
158
+ # `total` replaces `length(x)`
159
+ total <- sum(weights )
160
+
161
+ if (abs(total ) < 1000 * .Machine $ double.eps ) {
162
+ if (total == 0 ) {
163
+ cli :: cli_abort(paste0(
164
+ " Cannot compute eCDF when the {.field weight} aesthetic sums up to " ,
165
+ " {.val 0}."
166
+ ))
167
+ }
168
+ cli :: cli_warn(c(
169
+ " The sum of the {.field weight} aesthetic is close to {.val 0}." ,
170
+ " i" = " Computed eCDF might be unstable."
171
+ ))
172
+ }
173
+
174
+ # Link each observation to unique value
175
+ vals <- unique0(x )
176
+ matched <- match(x , vals )
177
+
178
+ # Instead of tabulating `matched`, as we would for unweighted `ecdf(x)`,
179
+ # we sum weights per unique value of `x`
180
+ agg_weights <- vapply(
181
+ split(weights , matched ),
182
+ sum , numeric (1 )
183
+ )
184
+
185
+ # Like `ecdf(x)`, we return an approx function
186
+ approxfun(
187
+ vals ,
188
+ cumsum(agg_weights ) / total ,
189
+ method = " constant" ,
190
+ yleft = 0 , yright = 1 ,
191
+ f = 0 , ties = " ordered"
192
+ )
193
+ }
0 commit comments