Skip to content

Commit 06509f7

Browse files
committed
Auto merge of #2495 - jtgeibel:web-capacity-report-only, r=pietroalbini
Add a report-only mode to the `BalanceCapacity` middleware This adds a single environment variable that can be quickly set or removed to toggle capacity enforcement. If this variable is set to any value (including the empty string) then no requests will be rejected due to load. This will allow us to evolve the heuristics used in this middleware to find the right balance before turning on enforcement. This also increases the default logging percentage to a higher default. r? @pietroalbini
2 parents 47e65bb + 6494ac6 commit 06509f7

File tree

1 file changed

+43
-29
lines changed

1 file changed

+43
-29
lines changed

src/middleware/balance_capacity.rs

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//! we should avoid dropping download requests even if that means rejecting some legitimate
99
//! requests to other endpoints.
1010
11+
use std::env;
1112
use std::sync::atomic::{AtomicUsize, Ordering};
1213

1314
use super::prelude::*;
@@ -17,7 +18,8 @@ use conduit::{RequestExt, StatusCode};
1718
pub(super) struct BalanceCapacity {
1819
handler: Option<Box<dyn Handler>>,
1920
capacity: usize,
20-
in_flight_requests: AtomicUsize,
21+
in_flight_non_dl_requests: AtomicUsize,
22+
report_only: bool,
2123
log_at_percentage: usize,
2224
throttle_at_percentage: usize,
2325
dl_only_at_percentage: usize,
@@ -28,12 +30,39 @@ impl BalanceCapacity {
2830
Self {
2931
handler: None,
3032
capacity,
31-
in_flight_requests: AtomicUsize::new(0),
32-
log_at_percentage: read_env_percentage("WEB_CAPACITY_LOG_PCT", 20),
33+
in_flight_non_dl_requests: AtomicUsize::new(0),
34+
report_only: env::var("WEB_CAPACITY_REPORT_ONLY").ok().is_some(),
35+
log_at_percentage: read_env_percentage("WEB_CAPACITY_LOG_PCT", 50),
3336
throttle_at_percentage: read_env_percentage("WEB_CAPACITY_THROTTLE_PCT", 70),
3437
dl_only_at_percentage: read_env_percentage("WEB_CAPACITY_DL_ONLY_PCT", 80),
3538
}
3639
}
40+
41+
/// Handle a request normally
42+
fn handle(&self, request: &mut dyn RequestExt) -> AfterResult {
43+
self.handler.as_ref().unwrap().call(request)
44+
}
45+
46+
/// Handle a request when load exceeds a threshold
47+
///
48+
/// In report-only mode, log metadata is added but the request is still served. Otherwise,
49+
/// the request is rejected with a service unavailable response.
50+
fn handle_high_load(&self, request: &mut dyn RequestExt, note: &str) -> AfterResult {
51+
if self.report_only {
52+
// In report-only mode we serve all requests but add log metadata
53+
super::log_request::add_custom_metadata(request, "would_reject", note);
54+
self.handle(request)
55+
} else {
56+
// Reject the request
57+
super::log_request::add_custom_metadata(request, "cause", note);
58+
let body = "Service temporarily unavailable";
59+
Response::builder()
60+
.status(StatusCode::SERVICE_UNAVAILABLE)
61+
.header(header::CONTENT_LENGTH, body.len())
62+
.body(Body::from_static(body.as_bytes()))
63+
.map_err(box_error)
64+
}
65+
}
3766
}
3867

3968
impl AroundMiddleware for BalanceCapacity {
@@ -44,58 +73,43 @@ impl AroundMiddleware for BalanceCapacity {
4473

4574
impl Handler for BalanceCapacity {
4675
fn call(&self, request: &mut dyn RequestExt) -> AfterResult {
76+
// Download requests are always accepted and do not affect the request count
77+
if request.path().starts_with("/api/v1/crates/") && request.path().ends_with("/download") {
78+
return self.handle(request);
79+
}
80+
4781
// The _drop_on_exit ensures the counter is decremented for all exit paths (including panics)
48-
let (_drop_on_exit, count) = RequestCounter::add_one(&self.in_flight_requests);
49-
let handler = self.handler.as_ref().unwrap();
82+
let (_drop_on_exit, count) = RequestCounter::add_one(&self.in_flight_non_dl_requests);
5083
let load = 100 * count / self.capacity;
5184

5285
// Begin logging request count so early stages of load increase can be located
5386
if load >= self.log_at_percentage {
54-
super::log_request::add_custom_metadata(request, "in_flight_requests", count);
55-
}
56-
57-
// Download requests are always accepted
58-
if request.path().starts_with("/api/v1/crates/") && request.path().ends_with("/download") {
59-
return handler.call(request);
87+
super::log_request::add_custom_metadata(request, "in_flight_non_dl_requests", count);
6088
}
6189

6290
// Reject read-only requests as load nears capacity. Bots are likely to send only safe
6391
// requests and this helps prioritize requests that users may be reluctant to retry.
6492
if load >= self.throttle_at_percentage && request.method().is_safe() {
65-
return over_capacity_response(request);
93+
return self.handle_high_load(request, "over capacity (throttle)");
6694
}
6795

6896
// As load reaches capacity, all non-download requests are rejected
6997
if load >= self.dl_only_at_percentage {
70-
return over_capacity_response(request);
98+
return self.handle_high_load(request, "over capacity (download only)");
7199
}
72100

73-
handler.call(request)
101+
self.handle(request)
74102
}
75103
}
76104

77-
fn over_capacity_response(request: &mut dyn RequestExt) -> AfterResult {
78-
// TODO: Generate an alert so we can investigate
79-
super::log_request::add_custom_metadata(request, "cause", "over capacity");
80-
let body = "Service temporarily unavailable";
81-
Response::builder()
82-
.status(StatusCode::SERVICE_UNAVAILABLE)
83-
.header(header::CONTENT_LENGTH, body.len())
84-
.body(Body::from_static(body.as_bytes()))
85-
.map_err(box_error)
86-
}
87-
88105
fn read_env_percentage(name: &str, default: usize) -> usize {
89-
if let Ok(value) = std::env::var(name) {
106+
if let Ok(value) = env::var(name) {
90107
value.parse().unwrap_or(default)
91108
} else {
92109
default
93110
}
94111
}
95112

96-
// FIXME(JTG): I've copied the following from my `conduit-hyper` crate. Once we transition from
97-
// `civet`, we could pass the in_flight_request count from `condut-hyper` via a request extension.
98-
99113
/// A struct that stores a reference to an atomic counter so it can be decremented when dropped
100114
struct RequestCounter<'a> {
101115
counter: &'a AtomicUsize,

0 commit comments

Comments
 (0)