From 485d7724761e653b7e4e383498a5d0bc6b2cc05e Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Thu, 11 May 2023 02:17:51 +0300 Subject: [PATCH 1/7] Add test for groupby with TimeGrouper --- pandas/tests/groupby/test_groupby.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2e432a768af9e..7bda7c575d994 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3058,3 +3058,27 @@ def test_groupby_selection_other_methods(df): tm.assert_frame_equal( g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) ) + + +def test_groupby_with_Time_Grouper(): + idx2 = [ + to_datetime("2016-08-31 22:08:12.000"), + to_datetime("2016-08-31 22:09:12.200"), + to_datetime("2016-08-31 22:20:12.400"), + ] + + test_data = DataFrame( + {"quant": [1.0, 1.0, 3.0], "quant2": [1.0, 1.0, 3.0], "time2": idx2} + ) + + expected_output = DataFrame( + { + "time2": date_range("2016-08-31 22:08:00", periods=13, freq="1T"), + "quant": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + "quant2": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + } + ) + + df = test_data.groupby(Grouper(key="time2", freq="1T")).count().reset_index() + + tm.assert_frame_equal(df, expected_output) From 66db1b963c402f5c89e26ce16c7dcce0abfb3263 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Thu, 11 May 2023 16:03:27 +0300 Subject: [PATCH 2/7] add asv_bench for block --- pandas/tests/groupby/test_groupby.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7bda7c575d994..2e432a768af9e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3058,27 +3058,3 @@ def test_groupby_selection_other_methods(df): tm.assert_frame_equal( g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) ) - - -def test_groupby_with_Time_Grouper(): - idx2 = [ - to_datetime("2016-08-31 22:08:12.000"), - to_datetime("2016-08-31 22:09:12.200"), - to_datetime("2016-08-31 22:20:12.400"), - ] - - test_data = DataFrame( - {"quant": [1.0, 1.0, 3.0], "quant2": [1.0, 1.0, 3.0], "time2": idx2} - ) - - expected_output = DataFrame( - { - "time2": date_range("2016-08-31 22:08:00", periods=13, freq="1T"), - "quant": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - "quant2": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - } - ) - - df = test_data.groupby(Grouper(key="time2", freq="1T")).count().reset_index() - - tm.assert_frame_equal(df, expected_output) From d9937ad6154b4ec0b64b1bb212a741001123bea9 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Thu, 11 May 2023 16:03:44 +0300 Subject: [PATCH 3/7] add asv_bench for block --- asv_bench/benchmarks/block.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 asv_bench/benchmarks/block.py diff --git a/asv_bench/benchmarks/block.py b/asv_bench/benchmarks/block.py new file mode 100644 index 0000000000000..2f2b0569d1236 --- /dev/null +++ b/asv_bench/benchmarks/block.py @@ -0,0 +1,29 @@ +from datetime import datetime + +import numpy as np + +import pandas as pd + + +class Block: + params = [ + (True, "True"), + (np.array(True), "np.array(True)"), + ] + + def setup(self, true_value): + self.df = pd.DataFrame( + False, + columns=np.arange(500).astype(str), + index=pd.date_range("2010-01-01", "2011-01-01"), + ) + + self.true_value = true_value + + def time_test(self, true_value): + """Test time for assigning a slice `True` and `np.array(True)`""" + tmp_df = self.df.copy() + + start = datetime(2010, 5, 1) + end = datetime(2010, 9, 1) + tmp_df.loc[start:end, :] = true_value From f990e83511b45f8960efb711ad76c7ee3b64ee27 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Thu, 11 May 2023 20:15:49 +0300 Subject: [PATCH 4/7] Delete block.py , move class Block to indexing.py , as requested and delete df.copy --- asv_bench/benchmarks/block.py | 29 ----------------------------- asv_bench/benchmarks/indexing.py | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 29 deletions(-) delete mode 100644 asv_bench/benchmarks/block.py diff --git a/asv_bench/benchmarks/block.py b/asv_bench/benchmarks/block.py deleted file mode 100644 index 2f2b0569d1236..0000000000000 --- a/asv_bench/benchmarks/block.py +++ /dev/null @@ -1,29 +0,0 @@ -from datetime import datetime - -import numpy as np - -import pandas as pd - - -class Block: - params = [ - (True, "True"), - (np.array(True), "np.array(True)"), - ] - - def setup(self, true_value): - self.df = pd.DataFrame( - False, - columns=np.arange(500).astype(str), - index=pd.date_range("2010-01-01", "2011-01-01"), - ) - - self.true_value = true_value - - def time_test(self, true_value): - """Test time for assigning a slice `True` and `np.array(True)`""" - tmp_df = self.df.copy() - - start = datetime(2010, 5, 1) - end = datetime(2010, 9, 1) - tmp_df.loc[start:end, :] = true_value diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 53827cfcf64fb..08e59953feeeb 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -531,4 +531,29 @@ def time_chained_indexing(self, mode): df2["C"] = 1.0 +from datetime import datetime + + +class Block: + params = [ + (True, "True"), + (np.array(True), "np.array(True)"), + ] + + def setup(self, true_value): + self.df = DataFrame( + False, + columns=np.arange(500).astype(str), + index=date_range("2010-01-01", "2011-01-01"), + ) + + self.true_value = true_value + + def time_test(self, true_value): + """Test time for assigning a slice `True` and `np.array(True)`""" + start = datetime(2010, 5, 1) + end = datetime(2010, 9, 1) + self.df.loc[start:end, :] = true_value + + from .pandas_vb_common import setup # noqa: F401 isort:skip From 6a73ec510b04af64b443d72c66d84411a468c768 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Fri, 12 May 2023 00:53:17 +0300 Subject: [PATCH 5/7] Add one more parameter to signarures seems to solve the issue --- asv_bench/benchmarks/indexing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 08e59953feeeb..7db89465ffe5a 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -540,7 +540,7 @@ class Block: (np.array(True), "np.array(True)"), ] - def setup(self, true_value): + def setup(self, true_value, name): self.df = DataFrame( False, columns=np.arange(500).astype(str), @@ -549,8 +549,7 @@ def setup(self, true_value): self.true_value = true_value - def time_test(self, true_value): - """Test time for assigning a slice `True` and `np.array(True)`""" + def time_test(self, true_value, name): start = datetime(2010, 5, 1) end = datetime(2010, 9, 1) self.df.loc[start:end, :] = true_value From f322047e24509018f94c397ce3b5a546ab9528d9 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Fri, 12 May 2023 00:59:28 +0300 Subject: [PATCH 6/7] change name to mode --- asv_bench/benchmarks/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 7db89465ffe5a..fea45298adaba 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -540,7 +540,7 @@ class Block: (np.array(True), "np.array(True)"), ] - def setup(self, true_value, name): + def setup(self, true_value, mode): self.df = DataFrame( False, columns=np.arange(500).astype(str), @@ -549,7 +549,7 @@ def setup(self, true_value, name): self.true_value = true_value - def time_test(self, true_value, name): + def time_test(self, true_value, mode): start = datetime(2010, 5, 1) end = datetime(2010, 9, 1) self.df.loc[start:end, :] = true_value From 5bde90cc1bea0f841e2146ab19bcf40d63cad728 Mon Sep 17 00:00:00 2001 From: steliospetrakis02 Date: Fri, 12 May 2023 20:47:46 +0300 Subject: [PATCH 7/7] Move import to the start of the file --- asv_bench/benchmarks/indexing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index fea45298adaba..84d95a23bd446 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -3,6 +3,7 @@ lower-level methods directly on Index and subclasses, see index_object.py, indexing_engine.py, and index_cached.py """ +from datetime import datetime import warnings import numpy as np @@ -531,9 +532,6 @@ def time_chained_indexing(self, mode): df2["C"] = 1.0 -from datetime import datetime - - class Block: params = [ (True, "True"),