@@ -8,10 +8,12 @@ use crate::load::SiteCtxt;
8
8
use crate :: selector:: { self , Tag } ;
9
9
10
10
use collector:: Bound ;
11
+ use log:: debug;
11
12
use serde:: Serialize ;
12
13
13
14
use std:: collections:: HashMap ;
14
15
use std:: error:: Error ;
16
+ use std:: hash:: Hash ;
15
17
use std:: sync:: Arc ;
16
18
17
19
type BoxedError = Box < dyn Error + Send + Sync > ;
@@ -99,6 +101,7 @@ pub async fn handle_compare(
99
101
let is_contiguous = comparison. is_contiguous ( & * conn, & master_commits) . await ;
100
102
101
103
Ok ( api:: comparison:: Response {
104
+ variance : comparison. benchmark_variances . map ( |b| b. data ) ,
102
105
prev,
103
106
a : comparison. a . into ( ) ,
104
107
b : comparison. b . into ( ) ,
@@ -222,7 +225,7 @@ pub async fn compare(
222
225
}
223
226
224
227
/// Compare two bounds on a given stat
225
- pub async fn compare_given_commits (
228
+ async fn compare_given_commits (
226
229
start : Bound ,
227
230
end : Bound ,
228
231
stat : String ,
@@ -247,18 +250,41 @@ pub async fn compare_given_commits(
247
250
248
251
// `responses` contains series iterators. The first element in the iterator is the data
249
252
// for `a` and the second is the data for `b`
250
- let mut responses = ctxt. query :: < Option < f64 > > ( query, aids) . await ?;
253
+ let mut responses = ctxt. query :: < Option < f64 > > ( query. clone ( ) , aids) . await ?;
251
254
252
255
let conn = ctxt. conn ( ) . await ;
253
-
254
256
Ok ( Some ( Comparison {
257
+ benchmark_variances : BenchmarkVariances :: calculate ( ctxt, a. clone ( ) , master_commits, stat)
258
+ . await ?,
255
259
a : ArtifactData :: consume_one ( & * conn, a. clone ( ) , & mut responses, master_commits) . await ,
256
260
b : ArtifactData :: consume_one ( & * conn, b. clone ( ) , & mut responses, master_commits) . await ,
257
261
} ) )
258
262
}
259
263
264
+ fn previous_commits (
265
+ mut from : ArtifactId ,
266
+ n : usize ,
267
+ master_commits : & [ collector:: MasterCommit ] ,
268
+ ) -> Vec < ArtifactId > {
269
+ let mut prevs = Vec :: with_capacity ( n) ;
270
+ while prevs. len ( ) < n {
271
+ match prev_commit ( & from, master_commits) {
272
+ Some ( c) => {
273
+ let new = ArtifactId :: Commit ( database:: Commit {
274
+ sha : c. sha . clone ( ) ,
275
+ date : database:: Date ( c. time ) ,
276
+ } ) ;
277
+ from = new. clone ( ) ;
278
+ prevs. push ( new) ;
279
+ }
280
+ None => break ,
281
+ }
282
+ }
283
+ prevs
284
+ }
285
+
260
286
/// Data associated with a specific artifact
261
- #[ derive( Debug , Clone , Serialize ) ]
287
+ #[ derive( Debug , Clone ) ]
262
288
pub struct ArtifactData {
263
289
/// The artifact in question
264
290
pub artifact : ArtifactId ,
@@ -288,25 +314,7 @@ impl ArtifactData {
288
314
where
289
315
T : Iterator < Item = ( ArtifactId , Option < f64 > ) > ,
290
316
{
291
- let mut data = HashMap :: new ( ) ;
292
-
293
- for response in series {
294
- let ( id, point) = response. series . next ( ) . expect ( "must have element" ) ;
295
- assert_eq ! ( artifact, id) ;
296
-
297
- let point = if let Some ( pt) = point {
298
- pt
299
- } else {
300
- continue ;
301
- } ;
302
- data. entry ( format ! (
303
- "{}-{}" ,
304
- response. path. get:: <Crate >( ) . unwrap( ) ,
305
- response. path. get:: <Profile >( ) . unwrap( ) ,
306
- ) )
307
- . or_insert_with ( Vec :: new)
308
- . push ( ( response. path . get :: < Cache > ( ) . unwrap ( ) . to_string ( ) , point) ) ;
309
- }
317
+ let data = data_from_series ( series) ;
310
318
311
319
let bootstrap = conn
312
320
. get_bootstrap ( & [ conn. artifact_id ( & artifact) . await ] )
@@ -348,6 +356,32 @@ impl ArtifactData {
348
356
}
349
357
}
350
358
359
+ fn data_from_series < T > (
360
+ series : & mut [ selector:: SeriesResponse < T > ] ,
361
+ ) -> HashMap < String , Vec < ( String , f64 ) > >
362
+ where
363
+ T : Iterator < Item = ( ArtifactId , Option < f64 > ) > ,
364
+ {
365
+ let mut data = HashMap :: new ( ) ;
366
+ for response in series {
367
+ let ( _, point) = response. series . next ( ) . expect ( "must have element" ) ;
368
+
369
+ let point = if let Some ( pt) = point {
370
+ pt
371
+ } else {
372
+ continue ;
373
+ } ;
374
+ data. entry ( format ! (
375
+ "{}-{}" ,
376
+ response. path. get:: <Crate >( ) . unwrap( ) ,
377
+ response. path. get:: <Profile >( ) . unwrap( ) ,
378
+ ) )
379
+ . or_insert_with ( Vec :: new)
380
+ . push ( ( response. path . get :: < Cache > ( ) . unwrap ( ) . to_string ( ) , point) ) ;
381
+ }
382
+ data
383
+ }
384
+
351
385
impl From < ArtifactData > for api:: comparison:: ArtifactData {
352
386
fn from ( data : ArtifactData ) -> Self {
353
387
api:: comparison:: ArtifactData {
@@ -369,20 +403,18 @@ impl From<ArtifactData> for api::comparison::ArtifactData {
369
403
370
404
// A comparison of two artifacts
371
405
pub struct Comparison {
406
+ /// Data on how variable benchmarks have historically been
407
+ ///
408
+ /// Is `None` if we cannot determine historical variance
409
+ pub benchmark_variances : Option < BenchmarkVariances > ,
372
410
pub a : ArtifactData ,
373
411
pub b : ArtifactData ,
374
412
}
375
413
376
414
impl Comparison {
377
415
/// Gets the previous commit before `a`
378
416
pub fn prev ( & self , master_commits : & [ collector:: MasterCommit ] ) -> Option < String > {
379
- match & self . a . artifact {
380
- ArtifactId :: Commit ( a) => master_commits
381
- . iter ( )
382
- . find ( |c| c. sha == a. sha )
383
- . map ( |c| c. parent_sha . clone ( ) ) ,
384
- ArtifactId :: Artifact ( _) => None ,
385
- }
417
+ prev_commit ( & self . a . artifact , master_commits) . map ( |c| c. parent_sha . clone ( ) )
386
418
}
387
419
388
420
/// Determines if `a` and `b` are contiguous
@@ -405,13 +437,7 @@ impl Comparison {
405
437
406
438
/// Gets the sha of the next commit after `b`
407
439
pub fn next ( & self , master_commits : & [ collector:: MasterCommit ] ) -> Option < String > {
408
- match & self . b . artifact {
409
- ArtifactId :: Commit ( b) => master_commits
410
- . iter ( )
411
- . find ( |c| c. parent_sha == b. sha )
412
- . map ( |c| c. sha . clone ( ) ) ,
413
- ArtifactId :: Artifact ( _) => None ,
414
- }
440
+ next_commit ( & self . a . artifact , master_commits) . map ( |c| c. parent_sha . clone ( ) )
415
441
}
416
442
417
443
fn get_benchmarks < ' a > ( & ' a self ) -> Vec < BenchmarkComparison < ' a > > {
@@ -437,6 +463,169 @@ impl Comparison {
437
463
}
438
464
}
439
465
466
+ /// A description of the amount of variance a certain benchmark is historically
467
+ /// experiencing at a given point in time.
468
+ pub struct BenchmarkVariances {
469
+ /// Variance data on a per benchmark basis
470
+ /// Key: $benchmark-$profile-$cache
471
+ /// Value: `BenchmarkVariance`
472
+ pub data : HashMap < String , BenchmarkVariance > ,
473
+ }
474
+
475
+ impl BenchmarkVariances {
476
+ async fn calculate (
477
+ ctxt : & SiteCtxt ,
478
+ from : ArtifactId ,
479
+ master_commits : & [ collector:: MasterCommit ] ,
480
+ stat : String ,
481
+ ) -> Result < Option < Self > , BoxedError > {
482
+ // get all crates, cache, and profile combinations for the given stat
483
+ let query = selector:: Query :: new ( )
484
+ . set :: < String > ( Tag :: Crate , selector:: Selector :: All )
485
+ . set :: < String > ( Tag :: Cache , selector:: Selector :: All )
486
+ . set :: < String > ( Tag :: Profile , selector:: Selector :: All )
487
+ . set ( Tag :: ProcessStatistic , selector:: Selector :: One ( stat) ) ;
488
+
489
+ let num_commits = 100 ;
490
+ let previous_commits = Arc :: new ( previous_commits ( from, num_commits, master_commits) ) ;
491
+ if previous_commits. len ( ) < num_commits {
492
+ return Ok ( None ) ;
493
+ }
494
+ let mut previous_commit_series = ctxt
495
+ . query :: < Option < f64 > > ( query, previous_commits. clone ( ) )
496
+ . await ?;
497
+
498
+ let mut variance_data: HashMap < String , BenchmarkVariance > = HashMap :: new ( ) ;
499
+ for _ in previous_commits. iter ( ) {
500
+ let series_data = data_from_series ( & mut previous_commit_series) ;
501
+ for ( bench_and_profile, data) in series_data {
502
+ for ( cache, val) in data {
503
+ variance_data
504
+ . entry ( format ! ( "{}-{}" , bench_and_profile, cache) )
505
+ . or_default ( )
506
+ . push ( val) ;
507
+ }
508
+ }
509
+ }
510
+
511
+ for ( bench, results) in variance_data. iter_mut ( ) {
512
+ debug ! ( "Calculating variance for: {}" , bench) ;
513
+ results. calculate_description ( ) ;
514
+ }
515
+ Ok ( Some ( Self {
516
+ data : variance_data,
517
+ } ) )
518
+ }
519
+ }
520
+
521
+ #[ derive( Debug , Default , Clone , Serialize ) ]
522
+ pub struct BenchmarkVariance {
523
+ data : Vec < f64 > ,
524
+ description : BenchmarkVarianceDescription ,
525
+ }
526
+
527
+ impl BenchmarkVariance {
528
+ /// The ratio of change that we consider significant.
529
+ const SIGNFICANT_DELTA_THRESHOLD : f64 = 0.01 ;
530
+ /// The percentage of significant changes that we consider too high
531
+ const SIGNFICANT_CHANGE_THRESHOLD : f64 = 5.0 ;
532
+ /// The percentage of change that constitutes noisy data
533
+ const NOISE_THRESHOLD : f64 = 0.1 ;
534
+
535
+ fn push ( & mut self , value : f64 ) {
536
+ self . data . push ( value) ;
537
+ }
538
+
539
+ fn mean ( & self ) -> f64 {
540
+ self . data . iter ( ) . sum :: < f64 > ( ) / self . data . len ( ) as f64
541
+ }
542
+
543
+ fn calculate_description ( & mut self ) {
544
+ self . description = BenchmarkVarianceDescription :: Normal ;
545
+
546
+ let results_mean = self . mean ( ) ;
547
+ let mut deltas = self
548
+ . data
549
+ . windows ( 2 )
550
+ . map ( |window| ( window[ 0 ] - window[ 1 ] ) . abs ( ) )
551
+ . collect :: < Vec < _ > > ( ) ;
552
+ deltas. sort_by ( |d1, d2| d1. partial_cmp ( d2) . unwrap_or ( std:: cmp:: Ordering :: Equal ) ) ;
553
+ let non_significant = deltas
554
+ . iter ( )
555
+ . zip ( self . data . iter ( ) )
556
+ . take_while ( |( & d, & r) | d / r < Self :: SIGNFICANT_DELTA_THRESHOLD )
557
+ . collect :: < Vec < _ > > ( ) ;
558
+
559
+ let percent_significant_changes =
560
+ ( ( deltas. len ( ) - non_significant. len ( ) ) as f64 / deltas. len ( ) as f64 ) * 100.0 ;
561
+ debug ! (
562
+ "Percent significant changes: {:.1}%" ,
563
+ percent_significant_changes
564
+ ) ;
565
+
566
+ if percent_significant_changes > Self :: SIGNFICANT_CHANGE_THRESHOLD {
567
+ self . description =
568
+ BenchmarkVarianceDescription :: HighlyVariable ( percent_significant_changes) ;
569
+ return ;
570
+ }
571
+
572
+ let delta_mean =
573
+ non_significant. iter ( ) . map ( |( & d, _) | d) . sum :: < f64 > ( ) / ( non_significant. len ( ) as f64 ) ;
574
+ let percent_change = ( delta_mean / results_mean) * 100.0 ;
575
+ debug ! ( "Percent change: {:.3}%" , percent_change) ;
576
+ if percent_change > Self :: NOISE_THRESHOLD {
577
+ self . description = BenchmarkVarianceDescription :: Noisy ( percent_change) ;
578
+ }
579
+ }
580
+ }
581
+
582
+ #[ derive( Debug , Clone , Copy , Serialize ) ]
583
+ #[ serde( tag = "type" , content = "percent" ) ]
584
+ pub enum BenchmarkVarianceDescription {
585
+ Normal ,
586
+ /// A highly variable benchmark that produces many significant changes.
587
+ /// This might indicate a benchmark which is very sensitive to compiler changes.
588
+ ///
589
+ /// Cotains the percentage of significant changes.
590
+ HighlyVariable ( f64 ) ,
591
+ /// A noisy benchmark which is likely to see changes in performance simply between
592
+ /// compiler runs.
593
+ ///
594
+ /// Contains the percent change that happens on average
595
+ Noisy ( f64 ) ,
596
+ }
597
+
598
+ impl Default for BenchmarkVarianceDescription {
599
+ fn default ( ) -> Self {
600
+ Self :: Normal
601
+ }
602
+ }
603
+
604
+ /// Gets the previous commit
605
+ pub fn prev_commit < ' a > (
606
+ artifact : & ArtifactId ,
607
+ master_commits : & ' a [ collector:: MasterCommit ] ,
608
+ ) -> Option < & ' a collector:: MasterCommit > {
609
+ match & artifact {
610
+ ArtifactId :: Commit ( a) => {
611
+ let current = master_commits. iter ( ) . find ( |c| c. sha == a. sha ) ?;
612
+ master_commits. iter ( ) . find ( |c| c. sha == current. parent_sha )
613
+ }
614
+ ArtifactId :: Artifact ( _) => None ,
615
+ }
616
+ }
617
+
618
+ /// Gets the next commit
619
+ pub fn next_commit < ' a > (
620
+ artifact : & ArtifactId ,
621
+ master_commits : & ' a [ collector:: MasterCommit ] ,
622
+ ) -> Option < & ' a collector:: MasterCommit > {
623
+ match artifact {
624
+ ArtifactId :: Commit ( b) => master_commits. iter ( ) . find ( |c| c. parent_sha == b. sha ) ,
625
+ ArtifactId :: Artifact ( _) => None ,
626
+ }
627
+ }
628
+
440
629
// A single comparison based on benchmark and cache state
441
630
#[ derive( Debug ) ]
442
631
pub struct BenchmarkComparison < ' a > {
0 commit comments