15
15
# ' not line-up, and hence you won't be able to stack density values.
16
16
# ' This parameter only matters if you are displaying multiple densities in
17
17
# ' one plot or if you are manually adjusting the scale limits.
18
+ # ' @param bounds Known lower and upper bounds for estimated data. Default
19
+ # ' `c(-Inf, Inf)` means that there are no (finite) bounds. If any bound is
20
+ # ' finite, boundary effect of default density estimation will be corrected by
21
+ # ' reflecting tails outside `bounds` around their closest edge. Data points
22
+ # ' outside of bounds are removed with a warning.
18
23
# ' @section Computed variables:
19
24
# ' \describe{
20
25
# ' \item{density}{density estimate}
@@ -36,6 +41,7 @@ stat_density <- function(mapping = NULL, data = NULL,
36
41
n = 512 ,
37
42
trim = FALSE ,
38
43
na.rm = FALSE ,
44
+ bounds = c(- Inf , Inf ),
39
45
orientation = NA ,
40
46
show.legend = NA ,
41
47
inherit.aes = TRUE ) {
@@ -55,6 +61,7 @@ stat_density <- function(mapping = NULL, data = NULL,
55
61
n = n ,
56
62
trim = trim ,
57
63
na.rm = na.rm ,
64
+ bounds = bounds ,
58
65
orientation = orientation ,
59
66
...
60
67
)
@@ -70,6 +77,8 @@ StatDensity <- ggproto("StatDensity", Stat,
70
77
71
78
default_aes = aes(x = after_stat(density ), y = after_stat(density ), fill = NA , weight = NULL ),
72
79
80
+ dropped_aes = " weight" ,
81
+
73
82
setup_params = function (self , data , params ) {
74
83
params $ flipped_aes <- has_flipped_aes(data , params , main_is_orthogonal = FALSE , main_is_continuous = TRUE )
75
84
@@ -85,7 +94,8 @@ StatDensity <- ggproto("StatDensity", Stat,
85
94
extra_params = c(" na.rm" , " orientation" ),
86
95
87
96
compute_group = function (data , scales , bw = " nrd0" , adjust = 1 , kernel = " gaussian" ,
88
- n = 512 , trim = FALSE , na.rm = FALSE , flipped_aes = FALSE ) {
97
+ n = 512 , trim = FALSE , na.rm = FALSE , bounds = c(- Inf , Inf ),
98
+ flipped_aes = FALSE ) {
89
99
data <- flip_data(data , flipped_aes )
90
100
if (trim ) {
91
101
range <- range(data $ x , na.rm = TRUE )
@@ -94,22 +104,30 @@ StatDensity <- ggproto("StatDensity", Stat,
94
104
}
95
105
96
106
density <- compute_density(data $ x , data $ weight , from = range [1 ],
97
- to = range [2 ], bw = bw , adjust = adjust , kernel = kernel , n = n )
107
+ to = range [2 ], bw = bw , adjust = adjust , kernel = kernel , n = n ,
108
+ bounds = bounds )
98
109
density $ flipped_aes <- flipped_aes
99
110
flip_data(density , flipped_aes )
100
111
}
101
112
102
113
)
103
114
104
115
compute_density <- function (x , w , from , to , bw = " nrd0" , adjust = 1 ,
105
- kernel = " gaussian" , n = 512 ) {
116
+ kernel = " gaussian" , n = 512 ,
117
+ bounds = c(- Inf , Inf )) {
106
118
nx <- length(x )
107
119
if (is.null(w )) {
108
120
w <- rep(1 / nx , nx )
109
121
} else {
110
122
w <- w / sum(w )
111
123
}
112
124
125
+ # Adjust data points and weights to all fit inside bounds
126
+ sample_data <- fit_data_to_bounds(bounds , x , w )
127
+ x <- sample_data $ x
128
+ w <- sample_data $ w
129
+ nx <- length(x )
130
+
113
131
# if less than 2 points return data frame of NAs and a warning
114
132
if (nx < 2 ) {
115
133
cli :: cli_warn(" Groups with fewer than two data points have been dropped." )
@@ -124,8 +142,16 @@ compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
124
142
))
125
143
}
126
144
127
- dens <- stats :: density(x , weights = w , bw = bw , adjust = adjust ,
128
- kernel = kernel , n = n , from = from , to = to )
145
+ # Decide whether to use boundary correction
146
+ if (any(is.finite(bounds ))) {
147
+ dens <- stats :: density(x , weights = w , bw = bw , adjust = adjust ,
148
+ kernel = kernel , n = n )
149
+
150
+ dens <- reflect_density(dens = dens , bounds = bounds , from = from , to = to )
151
+ } else {
152
+ dens <- stats :: density(x , weights = w , bw = bw , adjust = adjust ,
153
+ kernel = kernel , n = n , from = from , to = to )
154
+ }
129
155
130
156
data_frame0(
131
157
x = dens $ x ,
@@ -137,3 +163,57 @@ compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
137
163
.size = length(dens $ x )
138
164
)
139
165
}
166
+
167
+ # Check if all data points are inside bounds. If not, warn and remove them.
168
+ fit_data_to_bounds <- function (bounds , x , w ) {
169
+ is_inside_bounds <- (bounds [1 ] < = x ) & (x < = bounds [2 ])
170
+
171
+ if (any(! is_inside_bounds )) {
172
+ cli :: cli_warn(" Some data points are outside of `bounds`. Removing them." )
173
+ x <- x [is_inside_bounds ]
174
+ w <- w [is_inside_bounds ]
175
+ w_sum <- sum(w )
176
+ if (w_sum > 0 ) {
177
+ w <- w / w_sum
178
+ }
179
+ }
180
+
181
+ return (list (x = x , w = w ))
182
+ }
183
+
184
+ # Update density estimation to mitigate boundary effect at known `bounds`:
185
+ # - All x values will lie inside `bounds`.
186
+ # - All y-values will be updated to have total probability of `bounds` be
187
+ # closer to 1. This is done by reflecting tails outside of `bounds` around
188
+ # their closest edge. This leads to those tails lie inside of `bounds`
189
+ # (completely, if they are not wider than `bounds` itself, which is a common
190
+ # situation) and correct boundary effect of default density estimation.
191
+ #
192
+ # `dens` - output of `stats::density`.
193
+ # `bounds` - two-element vector with left and right known (user supplied)
194
+ # bounds of x values.
195
+ # `from`, `to` - numbers used as corresponding arguments of `stats::density()`
196
+ # in case of no boundary correction.
197
+ reflect_density <- function (dens , bounds , from , to ) {
198
+ # No adjustment is needed if no finite bounds are supplied
199
+ if (all(is.infinite(bounds ))) {
200
+ return (dens )
201
+ }
202
+
203
+ # Estimate linearly with zero tails (crucial to account for infinite bound)
204
+ f_dens <- stats :: approxfun(
205
+ x = dens $ x , y = dens $ y , method = " linear" , yleft = 0 , yright = 0
206
+ )
207
+
208
+ # Create a uniform x-grid inside `bounds`
209
+ left <- max(from , bounds [1 ])
210
+ right <- min(to , bounds [2 ])
211
+ out_x <- seq(from = left , to = right , length.out = length(dens $ x ))
212
+
213
+ # Update density estimation by adding reflected tails from outside `bounds`
214
+ left_reflection <- f_dens(bounds [1 ] + (bounds [1 ] - out_x ))
215
+ right_reflection <- f_dens(bounds [2 ] + (bounds [2 ] - out_x ))
216
+ out_y <- f_dens(out_x ) + left_reflection + right_reflection
217
+
218
+ list (x = out_x , y = out_y )
219
+ }
0 commit comments