Skip to content

Commit 9a29dbe

Browse files
committed
Send variance to frontend
1 parent 463f30b commit 9a29dbe

File tree

4 files changed

+213
-120
lines changed

4 files changed

+213
-120
lines changed

site/src/api.rs

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -70,25 +70,6 @@ pub struct CommitResponse {
7070
pub commit: Option<String>,
7171
}
7272

73-
pub mod data {
74-
use crate::comparison::ArtifactData;
75-
use collector::Bound;
76-
use serde::{Deserialize, Serialize};
77-
78-
#[derive(Debug, Clone, Serialize, Deserialize)]
79-
pub struct Request {
80-
pub start: Bound,
81-
pub end: Bound,
82-
83-
/// Which statistic to return data for
84-
pub stat: String,
85-
}
86-
87-
/// List of DateData's from oldest to newest
88-
#[derive(Debug, Clone, Serialize)]
89-
pub struct Response(pub Vec<ArtifactData>);
90-
}
91-
9273
pub mod graph {
9374
use collector::Bound;
9475
use serde::{Deserialize, Serialize};
@@ -157,6 +138,7 @@ pub mod bootstrap {
157138
}
158139

159140
pub mod comparison {
141+
use crate::comparison;
160142
use collector::Bound;
161143
use database::Date;
162144
use serde::{Deserialize, Serialize};
@@ -171,6 +153,8 @@ pub mod comparison {
171153

172154
#[derive(Debug, Clone, Serialize)]
173155
pub struct Response {
156+
/// The variance data for each benchmark, if any.
157+
pub variance: Option<HashMap<String, comparison::BenchmarkVariance>>,
174158
/// The names for the previous artifact before `a`, if any.
175159
pub prev: Option<String>,
176160

site/src/comparison.rs

Lines changed: 196 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::load::SiteCtxt;
88
use crate::selector::{self, Tag};
99

1010
use collector::Bound;
11+
use log::debug;
1112
use serde::Serialize;
1213

1314
use std::collections::HashMap;
@@ -100,6 +101,7 @@ pub async fn handle_compare(
100101
let is_contiguous = comparison.is_contiguous(&*conn, &master_commits).await;
101102

102103
Ok(api::comparison::Response {
104+
variance: comparison.benchmark_variances.map(|b| b.data),
103105
prev,
104106
a: comparison.a.into(),
105107
b: comparison.b.into(),
@@ -237,21 +239,6 @@ async fn compare_given_commits(
237239
Some(b) => b,
238240
None => return Ok(None),
239241
};
240-
let mut prevs = Vec::with_capacity(10);
241-
let mut commit = a.clone();
242-
while prevs.len() < 100 {
243-
match prev_commit(&commit, master_commits) {
244-
Some(c) => {
245-
let new = ArtifactId::Commit(database::Commit {
246-
sha: c.sha.clone(),
247-
date: database::Date(c.time),
248-
});
249-
commit = new.clone();
250-
prevs.push(new);
251-
}
252-
None => break,
253-
}
254-
}
255242
let aids = Arc::new(vec![a.clone(), b.clone()]);
256243

257244
// get all crates, cache, and profile combinations for the given stat
@@ -264,58 +251,40 @@ async fn compare_given_commits(
264251
// `responses` contains series iterators. The first element in the iterator is the data
265252
// for `a` and the second is the data for `b`
266253
let mut responses = ctxt.query::<Option<f64>>(query.clone(), aids).await?;
267-
let mut rps = ctxt
268-
.query::<Option<f64>>(query, Arc::new(prevs.clone()))
269-
.await?;
270254

271255
let conn = ctxt.conn().await;
272-
273-
let mut others = Vec::new();
274-
for aid in prevs {
275-
others.push(ArtifactData::consume_one(&*conn, aid, &mut rps, master_commits).await)
276-
}
277-
// println!("{:#?}", others.iter().map(|o| &o.data).collect::<Vec<_>>());
278-
let mut h: HashMap<String, Vec<f64>> = HashMap::new();
279-
for o in others {
280-
let data = &o.data;
281-
for (k, v) in data {
282-
for (c, val) in v {
283-
h.entry(format!("{}-{}", k, c)).or_default().push(*val);
284-
}
285-
}
286-
}
287-
for (bench, results) in h {
288-
println!("Bechmark: {}", bench);
289-
let results_len = results.len();
290-
let results_mean = results.iter().sum::<f64>() / results.len() as f64;
291-
let mut deltas = results
292-
.windows(2)
293-
.map(|window| (window[0] - window[1]).abs())
294-
.collect::<Vec<_>>();
295-
deltas.sort_by(|d1, d2| d1.partial_cmp(d2).unwrap_or(std::cmp::Ordering::Equal));
296-
let non_significant = deltas
297-
.iter()
298-
.zip(results)
299-
.take_while(|(&d, r)| d / r < 0.05)
300-
.collect::<Vec<_>>();
301-
println!(
302-
"Significant changes: {}",
303-
results_len - non_significant.len()
304-
);
305-
let mean =
306-
non_significant.iter().map(|(&d, _)| d).sum::<f64>() / (non_significant.len() as f64);
307-
let coefficient_of_variance = (mean / results_mean) * 100.0;
308-
println!("\tCoefficient of variance: {:.3}%", coefficient_of_variance);
309-
}
310-
311256
Ok(Some(Comparison {
257+
benchmark_variances: BenchmarkVariances::calculate(ctxt, a.clone(), master_commits, stat)
258+
.await?,
312259
a: ArtifactData::consume_one(&*conn, a.clone(), &mut responses, master_commits).await,
313260
b: ArtifactData::consume_one(&*conn, b.clone(), &mut responses, master_commits).await,
314261
}))
315262
}
316263

264+
fn previous_commits(
265+
mut from: ArtifactId,
266+
n: usize,
267+
master_commits: &[collector::MasterCommit],
268+
) -> Vec<ArtifactId> {
269+
let mut prevs = Vec::with_capacity(n);
270+
while prevs.len() < n {
271+
match prev_commit(&from, master_commits) {
272+
Some(c) => {
273+
let new = ArtifactId::Commit(database::Commit {
274+
sha: c.sha.clone(),
275+
date: database::Date(c.time),
276+
});
277+
from = new.clone();
278+
prevs.push(new);
279+
}
280+
None => break,
281+
}
282+
}
283+
prevs
284+
}
285+
317286
/// Data associated with a specific artifact
318-
#[derive(Debug, Clone, Serialize)]
287+
#[derive(Debug, Clone)]
319288
pub struct ArtifactData {
320289
/// The artifact in question
321290
pub artifact: ArtifactId,
@@ -345,25 +314,7 @@ impl ArtifactData {
345314
where
346315
T: Iterator<Item = (ArtifactId, Option<f64>)>,
347316
{
348-
let mut data = HashMap::new();
349-
350-
for response in series {
351-
let (id, point) = response.series.next().expect("must have element");
352-
assert_eq!(artifact, id);
353-
354-
let point = if let Some(pt) = point {
355-
pt
356-
} else {
357-
continue;
358-
};
359-
data.entry(format!(
360-
"{}-{}",
361-
response.path.get::<Crate>().unwrap(),
362-
response.path.get::<Profile>().unwrap(),
363-
))
364-
.or_insert_with(Vec::new)
365-
.push((response.path.get::<Cache>().unwrap().to_string(), point));
366-
}
317+
let data = data_from_series(series);
367318

368319
let bootstrap = conn
369320
.get_bootstrap(&[conn.artifact_id(&artifact).await])
@@ -405,6 +356,32 @@ impl ArtifactData {
405356
}
406357
}
407358

359+
fn data_from_series<T>(
360+
series: &mut [selector::SeriesResponse<T>],
361+
) -> HashMap<String, Vec<(String, f64)>>
362+
where
363+
T: Iterator<Item = (ArtifactId, Option<f64>)>,
364+
{
365+
let mut data = HashMap::new();
366+
for response in series {
367+
let (_, point) = response.series.next().expect("must have element");
368+
369+
let point = if let Some(pt) = point {
370+
pt
371+
} else {
372+
continue;
373+
};
374+
data.entry(format!(
375+
"{}-{}",
376+
response.path.get::<Crate>().unwrap(),
377+
response.path.get::<Profile>().unwrap(),
378+
))
379+
.or_insert_with(Vec::new)
380+
.push((response.path.get::<Cache>().unwrap().to_string(), point));
381+
}
382+
data
383+
}
384+
408385
impl From<ArtifactData> for api::comparison::ArtifactData {
409386
fn from(data: ArtifactData) -> Self {
410387
api::comparison::ArtifactData {
@@ -426,6 +403,10 @@ impl From<ArtifactData> for api::comparison::ArtifactData {
426403

427404
// A comparison of two artifacts
428405
pub struct Comparison {
406+
/// Data on how variable benchmarks have historically been
407+
///
408+
/// Is `None` if we cannot determine historical variance
409+
pub benchmark_variances: Option<BenchmarkVariances>,
429410
pub a: ArtifactData,
430411
pub b: ArtifactData,
431412
}
@@ -482,6 +463,144 @@ impl Comparison {
482463
}
483464
}
484465

466+
/// A description of the amount of variance a certain benchmark is historically
467+
/// experiencing at a given point in time.
468+
pub struct BenchmarkVariances {
469+
/// Variance data on a per benchmark basis
470+
/// Key: $benchmark-$profile-$cache
471+
/// Value: `BenchmarkVariance`
472+
pub data: HashMap<String, BenchmarkVariance>,
473+
}
474+
475+
impl BenchmarkVariances {
476+
async fn calculate(
477+
ctxt: &SiteCtxt,
478+
from: ArtifactId,
479+
master_commits: &[collector::MasterCommit],
480+
stat: String,
481+
) -> Result<Option<Self>, BoxedError> {
482+
// get all crates, cache, and profile combinations for the given stat
483+
let query = selector::Query::new()
484+
.set::<String>(Tag::Crate, selector::Selector::All)
485+
.set::<String>(Tag::Cache, selector::Selector::All)
486+
.set::<String>(Tag::Profile, selector::Selector::All)
487+
.set(Tag::ProcessStatistic, selector::Selector::One(stat));
488+
489+
let num_commits = 100;
490+
let previous_commits = Arc::new(previous_commits(from, num_commits, master_commits));
491+
if previous_commits.len() < num_commits {
492+
return Ok(None);
493+
}
494+
let mut previous_commit_series = ctxt
495+
.query::<Option<f64>>(query, previous_commits.clone())
496+
.await?;
497+
498+
let mut variance_data: HashMap<String, BenchmarkVariance> = HashMap::new();
499+
for _ in previous_commits.iter() {
500+
let series_data = data_from_series(&mut previous_commit_series);
501+
for (bench_and_profile, data) in series_data {
502+
for (cache, val) in data {
503+
variance_data
504+
.entry(format!("{}-{}", bench_and_profile, cache))
505+
.or_default()
506+
.push(val);
507+
}
508+
}
509+
}
510+
511+
for (bench, results) in variance_data.iter_mut() {
512+
debug!("Calculating variance for: {}", bench);
513+
results.calculate_description();
514+
}
515+
Ok(Some(Self {
516+
data: variance_data,
517+
}))
518+
}
519+
}
520+
521+
#[derive(Debug, Default, Clone, Serialize)]
522+
pub struct BenchmarkVariance {
523+
data: Vec<f64>,
524+
description: BenchmarkVarianceDescription,
525+
}
526+
527+
impl BenchmarkVariance {
528+
/// The ratio of change that we consider significant.
529+
const SIGNFICANT_DELTA_THRESHOLD: f64 = 0.02;
530+
/// The percentage of significant changes that we consider too high
531+
const SIGNFICANT_CHANGE_THRESHOLD: f64 = 5.0;
532+
/// The percentage of change that constitutes noisy data
533+
const NOISE_THRESHOLD: f64 = 0.1;
534+
535+
fn push(&mut self, value: f64) {
536+
self.data.push(value);
537+
}
538+
539+
fn mean(&self) -> f64 {
540+
self.data.iter().sum::<f64>() / self.data.len() as f64
541+
}
542+
543+
fn calculate_description(&mut self) {
544+
self.description = BenchmarkVarianceDescription::Normal;
545+
546+
let results_mean = self.mean();
547+
let mut deltas = self
548+
.data
549+
.windows(2)
550+
.map(|window| (window[0] - window[1]).abs())
551+
.collect::<Vec<_>>();
552+
deltas.sort_by(|d1, d2| d1.partial_cmp(d2).unwrap_or(std::cmp::Ordering::Equal));
553+
let non_significant = deltas
554+
.iter()
555+
.zip(self.data.iter())
556+
.take_while(|(&d, &r)| d / r < Self::SIGNFICANT_DELTA_THRESHOLD)
557+
.collect::<Vec<_>>();
558+
559+
let percent_significant_changes =
560+
((self.data.len() - non_significant.len()) as f64 / self.data.len() as f64) * 100.0;
561+
debug!(
562+
"Percent significant changes: {:.1}%",
563+
percent_significant_changes
564+
);
565+
566+
if percent_significant_changes >= Self::SIGNFICANT_CHANGE_THRESHOLD {
567+
self.description =
568+
BenchmarkVarianceDescription::HighlyVariable(percent_significant_changes);
569+
return;
570+
}
571+
572+
let delta_mean =
573+
non_significant.iter().map(|(&d, _)| d).sum::<f64>() / (non_significant.len() as f64);
574+
let percent_change = (delta_mean / results_mean) * 100.0;
575+
debug!("Percent change: {:.3}%", percent_change);
576+
if percent_change > Self::NOISE_THRESHOLD {
577+
self.description = BenchmarkVarianceDescription::Noisy(percent_change);
578+
}
579+
}
580+
}
581+
582+
#[derive(Debug, Clone, Copy, Serialize)]
583+
#[serde(tag = "type", content = "percent")]
584+
pub enum BenchmarkVarianceDescription {
585+
Normal,
586+
/// A highly variable benchmark that produces many significant changes.
587+
/// This might indicate a benchmark which is very sensitive to compiler changes.
588+
///
589+
/// Cotains the percentage of significant changes.
590+
HighlyVariable(f64),
591+
/// A noisy benchmark which is likely to see changes in performance simply between
592+
/// compiler runs.
593+
///
594+
/// Contains the percent change that happens on average
595+
Noisy(f64),
596+
}
597+
598+
impl Default for BenchmarkVarianceDescription {
599+
fn default() -> Self {
600+
Self::Normal
601+
}
602+
}
603+
485604
/// Gets the previous commit
486605
pub fn prev_commit<'a>(
487606
artifact: &ArtifactId,

site/src/server.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ use serde::de::DeserializeOwned;
2323
use serde::Serialize;
2424

2525
pub use crate::api::{
26-
self, bootstrap, comparison, dashboard, data, github, graph, info, self_profile,
27-
self_profile_raw, status, triage, CommitResponse, ServerResult, StyledBenchmarkName,
26+
self, bootstrap, comparison, dashboard, github, graph, info, self_profile, self_profile_raw,
27+
status, triage, CommitResponse, ServerResult, StyledBenchmarkName,
2828
};
2929
use crate::db::{self, ArtifactId, Cache, Crate, Lookup, Profile};
3030
use crate::interpolate::Interpolated;

0 commit comments

Comments
 (0)