Merge pull request #313 from pv/continuous-bug

mdboom · mdboom · commit 208c2d69612b · 2015-09-04T09:33:01.000-04:00
Refactor asv continuous to use asv compare table printing
diff --git a/asv/commands/compare.py b/asv/commands/compare.py
@@ -4,6 +4,7 @@
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
 
+import six
 import itertools
 
 from . import Command
@@ -116,24 +117,39 @@ def run(cls, conf, hash_1, hash_2, factor=2, split=False, machine=None):
             raise util.UserError(
                 "Results for machine '{0} not found".format(machine))
 
+        cls.print_table(conf, hash_1, hash_2, factor=factor, machine=machine,
+                        split=split)
+
+    @classmethod
+    def print_table(cls, conf, hash_1, hash_2, factor, split,
+                    resultset_1=None, resultset_2=None, machine=None,
+                    sort_by_ratio=False, only_changed=False):
         results_1 = {}
         results_2 = {}
 
-        for result in iter_results_for_machine_and_hash(
-                conf.results_dir, machine, hash_1):
-            for key in result.results:
-                for name, value in unroll_result(key, result.results[key]):
-                    if name not in results_1:
-                        results_1[name] = []
-                    results_1[name].append(value)
-
-        for result in iter_results_for_machine_and_hash(
-                conf.results_dir, machine, hash_2):
-            for key in result.results:
-                for name, value in unroll_result(key, result.results[key]):
-                    if name not in results_2:
-                        results_2[name] = []
-                    results_2[name].append(value)
+        def results_default_iter(commit_hash):
+            for result in iter_results_for_machine_and_hash(
+                    conf.results_dir, machine, commit_hash):
+                for key, value in six.iteritems(result.results):
+                    yield key, value
+
+        if resultset_1 is None:
+            resultset_1 = results_default_iter(hash_1)
+
+        if resultset_2 is None:
+            resultset_2 = results_default_iter(hash_2)
+
+        for key, result in resultset_1:
+            for name, value in unroll_result(key, result):
+                if name not in results_1:
+                    results_1[name] = []
+                results_1[name].append(value)
+
+        for key, result in resultset_2:
+            for name, value in unroll_result(key, result):
+                if name not in results_2:
+                    results_2[name] = []
+                results_2[name].append(value)
 
         if len(results_1) == 0:
             raise util.UserError(
@@ -146,7 +162,7 @@ def run(cls, conf, hash_1, hash_2, factor=2, split=False, machine=None):
         benchmarks_1 = set(results_1.keys())
         benchmarks_2 = set(results_2.keys())
 
-        common_benchmarks = sorted(list(benchmarks_1 & benchmarks_2))
+        joint_benchmarks = sorted(list(benchmarks_1 | benchmarks_2))
 
         bench = {}
 
@@ -157,44 +173,74 @@ def run(cls, conf, hash_1, hash_2, factor=2, split=False, machine=None):
         else:
             bench['all'] = []
 
-        for benchmark in common_benchmarks:
+        worsened = False
+        improved = False
 
-            time_1 = mean(results_1[benchmark])
-            time_2 = mean(results_2[benchmark])
+        for benchmark in joint_benchmarks:
+            if benchmark in results_1:
+                time_1 = mean(results_1[benchmark])
+            else:
+                time_1 = float("nan")
+
+            if benchmark in results_2:
+                time_2 = mean(results_2[benchmark])
+            else:
+                time_2 = float("nan")
 
             if _isna(time_1) or _isna(time_2):
                 ratio = 'n/a'
+                ratio_num = 1e9
             else:
-                ratio = "{0:6.2f}".format(time_2 / time_1)
-
-            if _isna(time_1) and _isna(time_2):
+                try:
+                    ratio_num = time_2 / time_1
+                    ratio = "{0:6.2f}".format(ratio_num)
+                except ZeroDivisionError:
+                    ratio_num = 1e9
+                    ratio = "n/a"
+
+            if time_1 is not None and time_2 is None:
+                # introduced a failure
                 color = 'red'
-                mark = ' '
-            elif _isna(time_1) and not _isna(time_2):
+                mark = '!'
+                worsened = True
+            elif time_1 is None and time_2 is not None:
+                # fixed a failure
                 color = 'green'
-                mark = '-'
-            elif not _isna(time_1) and _isna(time_2):
+                mark = ' '
+                improved = True
+            elif time_1 is None and time_2 is None:
+                # both failed
                 color = 'red'
-                mark = '!'
+                mark = ' '
+            elif _isna(time_1) or _isna(time_2):
+                # either one was skipped
+                color = 'default'
+                mark = ' '
             elif time_2 < time_1 / factor:
                 color = 'green'
                 mark = '-'
+                improved = True
             elif time_2 > time_1 * factor:
                 color = 'red'
                 mark = '+'
+                worsened = True
             else:
                 color = 'default'
                 mark = ' '
+
+            if only_changed and mark == ' ':
+                continue
+
             details = "{0:1s} {1:>9s}  {2:>9s} {3:>9s}  ".format(
                 mark,
                 human_value(time_1, "seconds"),
                 human_value(time_2, "seconds"),
                 ratio)
 
             if split:
-                bench[color].append((color, details, benchmark))
+                bench[color].append((color, details, benchmark, ratio_num))
             else:
-                bench['all'].append((color, details, benchmark))
+                bench['all'].append((color, details, benchmark, ratio_num))
 
         if split:
             keys = ['green', 'default', 'red']
@@ -212,12 +258,18 @@ def run(cls, conf, hash_1, hash_2, factor=2, split=False, machine=None):
             if len(bench[key]) == 0:
                 continue
 
-            print("")
-            print(titles[key])
-            print("")
+            if not only_changed:
+                print("")
+                print(titles[key])
+                print("")
             print("    before     after       ratio")
             print("  [{0:8s}] [{1:8s}]".format(hash_1[:8], hash_2[:8]))
 
-            for color, details, benchmark in bench[key]:
+            if sort_by_ratio:
+                bench[key].sort(key=lambda v: v[3], reverse=True)
+
+            for color, details, benchmark, ratio in bench[key]:
                 color_print(details, color, end='')
                 print(benchmark)
+
+        return worsened, improved
diff --git a/asv/commands/continuous.py b/asv/commands/continuous.py
@@ -5,17 +5,15 @@
                         unicode_literals)
 
 import os
-
 import six
 
 from . import Command
 from .run import Run
-from .compare import unroll_result
+from .compare import Compare
 
-from ..console import truncate_left, color_print
 from ..repo import get_repo
+from ..console import color_print
 from .. import results
-from .. import util
 
 from . import common_args
 
@@ -75,73 +73,27 @@ def run(cls, conf, branch=None, base=None, factor=2.0, show_stderr=False, bench=
         if result:
             return result
 
-        tabulated = []
-        all_benchmarks = {}
-        for commit_hash in commit_hashes:
-            subtab = {}
-            totals = {}
-
+        def results_iter(commit_hash):
             for env in run_objs['environments']:
                 filename = results.get_filename(
                     run_objs['machine_params']['machine'], commit_hash, env.name)
                 filename = os.path.join(conf.results_dir, filename)
                 result = results.Results.load(filename)
-
-                for benchmark_name, benchmark in six.iteritems(run_objs['benchmarks']):
-                    for name, value in unroll_result(benchmark_name,
-                                                     result.results.get(benchmark_name, None)):
-                        if value is not None:
-                            all_benchmarks[name] = benchmark
-                            subtab.setdefault(name, 0.0)
-                            totals.setdefault(name, 0)
-                            subtab[name] += value
-                            totals[name] += 1
-
-            for name in totals.keys():
-                subtab[name] /= totals[name]
-
-            tabulated.append(subtab)
-
-        after, before = tabulated
-
-        table = []
-        slowed_down = False
-        for name, benchmark in six.iteritems(all_benchmarks):
-            if before[name] == 0:
-                if after[name] == 0:
-                    change = 1.0
-                else:
-                    change = float('inf')
-            else:
-                change = after[name] / before[name]
-
-            if change > factor or change < 1.0 / factor:
-                table.append(
-                    (change, before[name], after[name], name, benchmark))
-            if change > factor:
-                slowed_down = True
-
-        print("")
-
-        if not len(table):
+                for name, benchmark in six.iteritems(run_objs['benchmarks']):
+                    yield name, result.results.get(name, float("nan"))
+
+        status = Compare.print_table(conf, parent, head,
+                                     resultset_1=results_iter(parent),
+                                     resultset_2=results_iter(head),
+                                     factor=factor, split=False, only_changed=True,
+                                     sort_by_ratio=True)
+        worsened, improved = status
+
+        if worsened:
+            color_print("SOME BENCHMARKS HAVE CHANGED SIGNIFICANTLY.\n", 'red')
+        elif improved:
+            color_print("SOME BENCHMARKS HAVE CHANGED SIGNIFICANTLY.\n", 'green')
+        else:
             color_print("BENCHMARKS NOT SIGNIFICANTLY CHANGED.\n", 'green')
-            return 0
-
-        table.sort(reverse=True)
-
-        color_print(
-            "{0:40s}   {1:>8}   {2:>8}   {3:>8}\n".format("BENCHMARK", "BEFORE", "AFTER", "FACTOR"),
-            'blue')
-        for change, before, after, name, benchmark in table:
-            before_display = util.human_value(before, benchmark['unit'])
-            after_display = util.human_value(after, benchmark['unit'])
-
-            print("{0:40s}   {1:>8}   {2:>8}   {3:.8f}x".format(
-                truncate_left(name, 40),
-                before_display, after_display, change))
-
-        print("")
-        color_print(
-            "SOME BENCHMARKS HAVE CHANGED SIGNIFICANTLY.\n", 'red')
 
-        return slowed_down
+        return worsened
diff --git a/test/example_results/cheetah/fcf8c079-py2.7-Cython-numpy1.8.json b/test/example_results/cheetah/fcf8c079-py2.7-Cython-numpy1.8.json
@@ -29,7 +29,7 @@
         "time_units.time_simple_unit_parse": 0.003806138038635254, 
         "time_units.time_unit_compose": 0.0015271089076995849, 
         "time_units.time_unit_parse": 0.011466357707977295, 
-        "time_units.time_unit_to": 4.8321509361267087e-05, 
+        "time_units.time_unit_to": 1.8321509361267087e-05, 
         "time_units.time_very_simple_unit_parse": 1.3104991912841798e-05,
 	"time_other.time_parameterized": {
 	    "params": [["1", "2", "3"]],
diff --git a/test/test_compare.py b/test/test_compare.py
@@ -25,6 +25,7 @@
 
     before     after       ratio
   [22b920c6] [fcf8c079]
+!       n/a     failed       n/a  params_examples.ParamSuite.track_value
      failed     failed       n/a  time_AAA_failure
         n/a        n/a       n/a  time_AAA_skip
 !  454.03μs     failed       n/a  time_coordinates.time_latitude
@@ -40,10 +41,23 @@
 +  125.11μs     3.81ms     30.42  time_units.time_simple_unit_parse
      1.64ms     1.53ms      0.93  time_units.time_unit_compose
 +  372.11μs    11.47ms     30.81  time_units.time_unit_parse
-    69.09μs    48.32μs      0.70  time_units.time_unit_to
+-   69.09μs    18.32μs      0.27  time_units.time_unit_to
     11.87μs    13.10μs      1.10  time_units.time_very_simple_unit_parse
 """
 
+REFERENCE_ONLY_CHANGED = """
+    before     after       ratio
+  [22b920c6] [fcf8c079]
+!       n/a     failed       n/a  params_examples.ParamSuite.track_value
+!  454.03μs     failed       n/a  time_coordinates.time_latitude
+!     3.00s     failed       n/a  time_other.time_parameterized(3)
++  933.71μs   108.22ms    115.90  time_quantity.time_quantity_init_array
++    1.75ms   152.84ms     87.28  time_quantity.time_quantity_array_conversion
++  372.11μs    11.47ms     30.81  time_units.time_unit_parse
++  125.11μs     3.81ms     30.42  time_units.time_simple_unit_parse
++    1.31ms     7.75ms      5.91  time_quantity.time_quantity_ufunc_sin
+-   69.09μs    18.32μs      0.27  time_units.time_unit_to
+"""
 
 def test_compare(capsys, tmpdir):
     tmpdir = six.text_type(tmpdir)
@@ -60,3 +74,12 @@ def test_compare(capsys, tmpdir):
 
     text, err = capsys.readouterr()
     assert text.strip() == REFERENCE.strip()
+
+    # Check print_table output as called from Continuous
+    status = Compare.print_table(conf, '22b920c6', 'fcf8c079', factor=2, machine='cheetah',
+                                 split=False, only_changed=True, sort_by_ratio=True)
+    worsened, improved = status
+    assert worsened
+    assert improved
+    text, err = capsys.readouterr()
+    assert text.strip() == REFERENCE_ONLY_CHANGED.strip()
diff --git a/test/test_workflow.py b/test/test_workflow.py
@@ -141,7 +141,7 @@ def test_continuous(capfd, basic_conf):
 
     text, err = capfd.readouterr()
     assert "SOME BENCHMARKS HAVE CHANGED SIGNIFICANTLY" in text
-    assert "params_examples.track_find_test(2)              1.0        6.0   6.00000000x" in text
+    assert "+     1.00s      6.00s      6.00  params_examples.track_find_test(2)" in text
     assert "params_examples.ClassOne" in text