From 4f4fa4c3c6aa5521ed96f9d3faa4116e0115da1f Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Mon, 14 Oct 2019 15:46:38 +0200 Subject: [PATCH 1/3] PERF: Add uint64 benchmark merge_asof (GH28922) --- asv_bench/benchmarks/join_merge.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 6aa82a43a4d6a..809e5b85fd9a4 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -303,6 +303,9 @@ def setup(self, direction): df1["time32"] = np.int32(df1.time) df2["time32"] = np.int32(df2.time) + df1["timeu64"] = np.uint64(df1.time) + df2["timeu64"] = np.uint64(df2.time) + self.df1a = df1[["time", "value1"]] self.df2a = df2[["time", "value2"]] self.df1b = df1[["time", "key", "value1"]] @@ -313,6 +316,8 @@ def setup(self, direction): self.df2d = df2[["time32", "value2"]] self.df1e = df1[["time", "key", "key2", "value1"]] self.df2e = df2[["time", "key", "key2", "value2"]] + self.df1f = df1[["timeu64", "value1"]] + self.df2f = df2[["timeu64", "value2"]] def time_on_int(self, direction): merge_asof(self.df1a, self.df2a, on="time", direction=direction) @@ -320,6 +325,9 @@ def time_on_int(self, direction): def time_on_int32(self, direction): merge_asof(self.df1d, self.df2d, on="time32", direction=direction) + def time_on_uint64(self, direction): + merge_asof(self.df1f, self.df2f, on="timeu64", direction=direction) + def time_by_object(self, direction): merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction) From 54ae56b62dc481453a1bae0ed36ba9d29416dc9e Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Mon, 14 Oct 2019 16:26:10 +0200 Subject: [PATCH 2/3] PERF: Parameterized tolerance benchmark (GH28922) --- asv_bench/benchmarks/join_merge.py | 45 ++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 809e5b85fd9a4..110e14853b48c 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -273,10 +273,11 @@ def time_merge_ordered(self): class MergeAsof: - params = [["backward", "forward", "nearest"]] - param_names = ["direction"] + params = [["backward", "forward", "nearest"], + [None, 5000]] + param_names = ["direction", "tolerance"] - def setup(self, direction): + def setup(self, direction, tolerance): one_count = 200000 two_count = 1000000 @@ -319,24 +320,38 @@ def setup(self, direction): self.df1f = df1[["timeu64", "value1"]] self.df2f = df2[["timeu64", "value2"]] - def time_on_int(self, direction): - merge_asof(self.df1a, self.df2a, on="time", direction=direction) + def time_on_int(self, direction, tolerance): + merge_asof( + self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance + ) - def time_on_int32(self, direction): - merge_asof(self.df1d, self.df2d, on="time32", direction=direction) + def time_on_int32(self, direction, tolerance): + merge_asof( + self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance + ) - def time_on_uint64(self, direction): - merge_asof(self.df1f, self.df2f, on="timeu64", direction=direction) + def time_on_uint64(self, direction, tolerance): + merge_asof( + self.df1f, self.df2f, on="timeu64", + direction=direction, tolerance=tolerance + ) - def time_by_object(self, direction): - merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction) + def time_by_object(self, direction, tolerance): + merge_asof( + self.df1b, self.df2b, on="time", by="key", + direction=direction, tolerance=tolerance + ) - def time_by_int(self, direction): - merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction) + def time_by_int(self, direction, tolerance): + merge_asof( + self.df1c, self.df2c, on="time", by="key2", + direction=direction, tolerance=tolerance + ) - def time_multiby(self, direction): + def time_multiby(self, direction, tolerance): merge_asof( - self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction + self.df1e, self.df2e, on="time", by=["key", "key2"], + direction=direction, tolerance=tolerance ) From aa521210aefef9cdd9205bfaaf5c163543968120 Mon Sep 17 00:00:00 2001 From: Jeroen Kant Date: Mon, 14 Oct 2019 17:40:36 +0200 Subject: [PATCH 3/3] PERF: Fixed formatting of asv's (#28922) --- asv_bench/benchmarks/join_merge.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 110e14853b48c..5cf9f6336ba0c 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -273,8 +273,7 @@ def time_merge_ordered(self): class MergeAsof: - params = [["backward", "forward", "nearest"], - [None, 5000]] + params = [["backward", "forward", "nearest"], [None, 5]] param_names = ["direction", "tolerance"] def setup(self, direction, tolerance): @@ -332,26 +331,37 @@ def time_on_int32(self, direction, tolerance): def time_on_uint64(self, direction, tolerance): merge_asof( - self.df1f, self.df2f, on="timeu64", - direction=direction, tolerance=tolerance + self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance ) def time_by_object(self, direction, tolerance): merge_asof( - self.df1b, self.df2b, on="time", by="key", - direction=direction, tolerance=tolerance + self.df1b, + self.df2b, + on="time", + by="key", + direction=direction, + tolerance=tolerance, ) def time_by_int(self, direction, tolerance): merge_asof( - self.df1c, self.df2c, on="time", by="key2", - direction=direction, tolerance=tolerance + self.df1c, + self.df2c, + on="time", + by="key2", + direction=direction, + tolerance=tolerance, ) def time_multiby(self, direction, tolerance): merge_asof( - self.df1e, self.df2e, on="time", by=["key", "key2"], - direction=direction, tolerance=tolerance + self.df1e, + self.df2e, + on="time", + by=["key", "key2"], + direction=direction, + tolerance=tolerance, )