From 6f8fdc047e936daa95540d6da923a8189f2d43d3 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou <47069922+alexifm@users.noreply.github.com> Date: Fri, 14 Jun 2019 13:16:41 -0700 Subject: [PATCH 01/18] fix a indices bug for categorical-datetime columns This is to fix a bug reported in #26859 --- pandas/core/groupby/ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ee9d57a537340..3acc7e6a3447a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -232,6 +232,7 @@ def indices(self): label_list = [ping.labels for ping in self.groupings] keys = [com.values_from_object(ping.group_index) for ping in self.groupings] + keys = [np.array(key) for key in keys] return get_indexer_dict(label_list, keys) @property From cfa0fefafbed98174eeddf45a3e3a2cc4f74f611 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou <47069922+alexifm@users.noreply.github.com> Date: Fri, 14 Jun 2019 19:04:38 -0700 Subject: [PATCH 02/18] test for DataFrameGroupby.indices Adds a test for a bug fix for DataFrameGroupby.indices in #26860 --- pandas/tests/groupby/test_groupby.py | 68 ++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 87b57b0609b36..7bbf9e5ec856a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1752,3 +1752,71 @@ def test_groupby_groups_in_BaseGrouper(): result = df.groupby(['beta', pd.Grouper(level='alpha')]) expected = df.groupby(['beta', 'alpha']) assert(result.groups == expected.groups) + + +def test_groupby_indices(): + # GH 26860 + # Test if DataFrame Groupby builds gb.indices correctly. + + int_series = pd.Series([1, 2, 3]) + int_series_cat = int_series.astype('category') + float_series = pd.Series([1., 2., 3.]) + float_series_cat = float_series.astype('category') + dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3']) + dt_series_cat = dt_series.astype('category') + period_series = dt_series.to_period('Q') + period_series_cat = period_series.astype('category') + + df = pd.DataFrame({ + 'int_series': int_series, + 'int_series_cat': int_series_cat, + 'float_series': float_series, + 'float_series_cat': float_series_cat, + 'dt_series': dt_series, + 'dt_series_cat': dt_series_cat, + 'period_series': period_series, + 'period_series_cat': period_series_cat + }) + from itertools import combinations + + target_key_choices = [ + df.iloc[i] + for i in range(df.shape[0]) + ] + target_indices_values = [ + np.array([i]) + for i in range(df.shape[0]) + ] + n_choices = len(df.columns) + + for i in range(1, n_choices + 1): + for combo in combinations(list(range(n_choices)), i): + print(combo) + combo = list(combo) + cols = list(df.columns[combo]) + if i == 1: + target_indices_keys = [ + key_choice.iloc[combo[0]] + for key_choice in target_key_choices + ] + else: + target_indices_keys = [ + tuple(key_choice.iloc[combo]) + for key_choice in target_key_choices + ] + + + indices = df.groupby(cols).indices + for target_key, key in zip(target_indices_keys, indices.keys()): + assert target_key == key + + for target_val, val in zip(target_indices_values, indices.values()): + assert target_val == val + + + + + + + + From 806f980748ba611b01ce324a4d84c2867f4695df Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou <47069922+alexifm@users.noreply.github.com> Date: Fri, 14 Jun 2019 19:09:01 -0700 Subject: [PATCH 03/18] pep8 formatting for test cleans up the test to adhere to pep8 formatting --- pandas/tests/groupby/test_groupby.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7bbf9e5ec856a..b2fe93dd66567 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1753,11 +1753,11 @@ def test_groupby_groups_in_BaseGrouper(): expected = df.groupby(['beta', 'alpha']) assert(result.groups == expected.groups) - + def test_groupby_indices(): # GH 26860 # Test if DataFrame Groupby builds gb.indices correctly. - + int_series = pd.Series([1, 2, 3]) int_series_cat = int_series.astype('category') float_series = pd.Series([1., 2., 3.]) @@ -1805,18 +1805,10 @@ def test_groupby_indices(): for key_choice in target_key_choices ] - indices = df.groupby(cols).indices - for target_key, key in zip(target_indices_keys, indices.keys()): + it = zip(target_indices_keys, indices.keys()) + for target_key, key in it: assert target_key == key - - for target_val, val in zip(target_indices_values, indices.values()): + it = zip(target_indices_values, indices.values()) + for target_val, val in it: assert target_val == val - - - - - - - - From 27086ba03f765a72bb0ce5b471a5336db8ce8466 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou <47069922+alexifm@users.noreply.github.com> Date: Fri, 14 Jun 2019 20:14:57 -0700 Subject: [PATCH 04/18] Fix tests to handly Py3.5 The test no longer depends on ordering of a dictionary. Also, the test matches the timestamp/datetime outputs that are the current standard in the code. --- pandas/tests/groupby/test_groupby.py | 46 ++++++++++++++++------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index b2fe93dd66567..034d0f1260861 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1779,9 +1779,17 @@ def test_groupby_indices(): }) from itertools import combinations + dts = [ + np.datetime64('2018-01-01T00:00:00.000000000'), + np.datetime64('2018-04-01T00:00:00.000000000'), + np.datetime64('2018-07-01T00:00:00.000000000') + ] + pers = [pd.Period(dt, freq='Q') for dt in dts] + target_key_choices = [ - df.iloc[i] - for i in range(df.shape[0]) + [1, 1, 1.0, 1.0, dts[0], dts[0], pers[0], pers[0]], + [2, 2, 2.0, 2.0, dts[1], dts[1], pers[1], pers[1]], + [3, 3, 3.0, 3.0, dts[2], dts[2], pers[2], pers[2]] ] target_indices_values = [ np.array([i]) @@ -1789,26 +1797,24 @@ def test_groupby_indices(): ] n_choices = len(df.columns) - for i in range(1, n_choices + 1): - for combo in combinations(list(range(n_choices)), i): - print(combo) + for n in range(1, n_choices + 1): + for combo in combinations(list(range(n_choices)), n): combo = list(combo) cols = list(df.columns[combo]) - if i == 1: - target_indices_keys = [ - key_choice.iloc[combo[0]] - for key_choice in target_key_choices - ] + if n == 1: + target_indices = {} + for i, key_choice in enumerate(target_key_choices): + key = key_choice[combo[0]] + if pd.api.types.is_datetime64_any_dtype(key): + key = pd.Timestamp(key) + target_indices[key] = target_indices_values[i] else: - target_indices_keys = [ - tuple(key_choice.iloc[combo]) - for key_choice in target_key_choices - ] + target_indices = {} + for i, key_choice in enumerate(target_key_choices): + key = tuple(key_choice[j] for j in combo) + target_indices[key] = target_indices_values[i] indices = df.groupby(cols).indices - it = zip(target_indices_keys, indices.keys()) - for target_key, key in it: - assert target_key == key - it = zip(target_indices_values, indices.values()) - for target_val, val in it: - assert target_val == val + assert set(target_indices.keys()) == set(indices.keys()) + for key in target_indices.keys(): + np.testing.assert_array_equal(target_indices[key], indices[key]) From 0d5385f75e35846be964b8fb646e940369d5c60a Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou <47069922+alexifm@users.noreply.github.com> Date: Fri, 14 Jun 2019 20:59:19 -0700 Subject: [PATCH 05/18] Fixing Py3.5 and other checks. Handle Py3.5 dict ordering issues. Cleanup for Pep8. No longer using numpy testing utility. --- pandas/tests/groupby/test_groupby.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 034d0f1260861..6cd9ac3921803 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1777,6 +1777,17 @@ def test_groupby_indices(): 'period_series': period_series, 'period_series_cat': period_series_cat }) + col_order = [ + 'int_series', + 'int_series_cat', + 'float_series', + 'float_series_cat', + 'dt_series', + 'dt_series_cat', + 'period_series', + 'period_series_cat' + ] + df = df[col_order] from itertools import combinations dts = [ @@ -1817,4 +1828,5 @@ def test_groupby_indices(): indices = df.groupby(cols).indices assert set(target_indices.keys()) == set(indices.keys()) for key in target_indices.keys(): - np.testing.assert_array_equal(target_indices[key], indices[key]) + assert pd.core.dtypes.missing.array_equivalent( + target_indices[key], indices[key]) From 3820a9446727938920f854a1755c536222d4aae7 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 12:33:23 -0700 Subject: [PATCH 06/18] updated groupby indices test to address comments --- pandas/tests/groupby/test_groupby.py | 166 +++++++++++++++------------ 1 file changed, 91 insertions(+), 75 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6cd9ac3921803..801fb516332e0 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -192,7 +192,7 @@ def test_pass_args_kwargs(ts, tsframe): def f(x, q=None, axis=0): return np.percentile(x, q, axis=axis) - g = lambda x: np.percentile(x, 80, axis=0) + def g(x): return np.percentile(x, 80, axis=0) # Series ts_grouped = ts.groupby(lambda x: x.month) @@ -414,7 +414,7 @@ def test_frame_groupby_columns(tsframe): assert len(aggregated.columns) == 2 # transform - tf = lambda x: x - x.mean() + def tf(x): return x - x.mean() groupedT = tsframe.T.groupby(mapping, axis=0) assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) @@ -992,7 +992,7 @@ def test_seriesgroupby_name_attr(df): assert result.count().name == 'C' assert result.mean().name == 'C' - testFunc = lambda x: np.sum(x) * 2 + def testFunc(x): return np.sum(x) * 2 assert result.agg(testFunc).name == 'C' @@ -1077,7 +1077,7 @@ def test_series_grouper_noncontig_index(): grouped = values.groupby(labels) # accessing the index elements causes segfault - f = lambda x: len(set(map(id, x.index))) + def f(x): return len(set(map(id, x.index))) grouped.agg(f) @@ -1754,79 +1754,95 @@ def test_groupby_groups_in_BaseGrouper(): assert(result.groups == expected.groups) -def test_groupby_indices(): +def _all_combinations(elems): + from itertools import chain, combinations + + out = chain.from_iterable( + combinations(elems, n + 1) for n in range(len(elems)) + ) + return list(out) + + +@pytest.mark.parametrize( + 'gb_cols', _all_combinations([ + 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', + 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' + ]), + ids=lambda cols: ",".join(cols) +) +def test_groupby_indices(gb_cols): # GH 26860 # Test if DataFrame Groupby builds gb.indices correctly. + gb_cols = list(gb_cols) + int_series = pd.Series([1, 2, 3]) - int_series_cat = int_series.astype('category') - float_series = pd.Series([1., 2., 3.]) - float_series_cat = float_series.astype('category') dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3']) - dt_series_cat = dt_series.astype('category') - period_series = dt_series.to_period('Q') - period_series_cat = period_series.astype('category') - - df = pd.DataFrame({ - 'int_series': int_series, - 'int_series_cat': int_series_cat, - 'float_series': float_series, - 'float_series_cat': float_series_cat, - 'dt_series': dt_series, - 'dt_series_cat': dt_series_cat, - 'period_series': period_series, - 'period_series_cat': period_series_cat - }) - col_order = [ - 'int_series', - 'int_series_cat', - 'float_series', - 'float_series_cat', - 'dt_series', - 'dt_series_cat', - 'period_series', - 'period_series_cat' - ] - df = df[col_order] - from itertools import combinations - - dts = [ - np.datetime64('2018-01-01T00:00:00.000000000'), - np.datetime64('2018-04-01T00:00:00.000000000'), - np.datetime64('2018-07-01T00:00:00.000000000') - ] - pers = [pd.Period(dt, freq='Q') for dt in dts] - - target_key_choices = [ - [1, 1, 1.0, 1.0, dts[0], dts[0], pers[0], pers[0]], - [2, 2, 2.0, 2.0, dts[1], dts[1], pers[1], pers[1]], - [3, 3, 3.0, 3.0, dts[2], dts[2], pers[2], pers[2]] - ] - target_indices_values = [ - np.array([i]) - for i in range(df.shape[0]) - ] - n_choices = len(df.columns) - - for n in range(1, n_choices + 1): - for combo in combinations(list(range(n_choices)), n): - combo = list(combo) - cols = list(df.columns[combo]) - if n == 1: - target_indices = {} - for i, key_choice in enumerate(target_key_choices): - key = key_choice[combo[0]] - if pd.api.types.is_datetime64_any_dtype(key): - key = pd.Timestamp(key) - target_indices[key] = target_indices_values[i] - else: - target_indices = {} - for i, key_choice in enumerate(target_key_choices): - key = tuple(key_choice[j] for j in combo) - target_indices[key] = target_indices_values[i] - - indices = df.groupby(cols).indices - assert set(target_indices.keys()) == set(indices.keys()) - for key in target_indices.keys(): - assert pd.core.dtypes.missing.array_equivalent( - target_indices[key], indices[key]) + df = pd.DataFrame( + data={ + 'int_series': int_series, + 'int_series_cat': int_series.astype('category'), + 'float_series': int_series.astype('float'), + 'float_series_cat': int_series.astype('float').astype('category'), + 'dt_series': dt_series, + 'dt_series_cat': dt_series.astype('category'), + 'period_series': dt_series.to_period('Q'), + 'period_series_cat': dt_series.to_period('Q').astype('category') + }, + columns=[ + 'int_series', + 'int_series_cat', + 'float_series', + 'float_series_cat', + 'dt_series', + 'dt_series_cat', + 'period_series', + 'period_series_cat' + ] + ) + + num_gb_cols = len(gb_cols) + + if num_gb_cols == 1: + s = df[gb_cols[0]] + col_vals = list(s.unique()) + + if pd.api.types.is_datetime64_any_dtype(s): + col_vals = list(map(pd.Timestamp, col_vals)) + + target = { + key: np.array([i]) + for i, key in enumerate(col_vals) + } + else: + col_vals = { + col: list(df[col].unique()) + for col in gb_cols + } + + def to_dt(elems): + elems = map(pd.Timestamp, elems) + elems = map(lambda dt: dt.to_datetime64(), elems) + elems = list(elems) + return elems + + for col in gb_cols: + if pd.api.types.is_datetime64_any_dtype(df[col]): + col_vals[col] = to_dt(col_vals[col]) + + elif pd.api.types.is_categorical_dtype(df[col]): + if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories): + col_vals[col] = to_dt(col_vals[col]) + + it = zip(*(col_vals[col] for col in col_vals.keys())) + target = { + key: np.array([i]) + for i, key in enumerate(it) + } + + indices = df.groupby(gb_cols).indices + + assert set(target.keys()) == set(indices.keys()) + for key in target.keys(): + assert pd.core.dtypes.missing.array_equivalent( + target[key], indices[key]) From 51bda3a92d994a18278e52952d21ff31e3f330d2 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 12:37:42 -0700 Subject: [PATCH 07/18] move test position --- pandas/tests/groupby/test_groupby.py | 188 +++++++++++++-------------- 1 file changed, 94 insertions(+), 94 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 801fb516332e0..49fc6be9c6881 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -340,6 +340,100 @@ def f3(x): df2.groupby('a').apply(f3) +def _all_combinations(elems): + from itertools import chain, combinations + + out = chain.from_iterable( + combinations(elems, n + 1) for n in range(len(elems)) + ) + return list(out) + + +@pytest.mark.parametrize( + 'gb_cols', _all_combinations([ + 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', + 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' + ]), + ids=lambda cols: ",".join(cols) +) +def test_groupby_indices(gb_cols): + # GH 26860 + # Test if DataFrame Groupby builds gb.indices correctly. + + gb_cols = list(gb_cols) + + int_series = pd.Series([1, 2, 3]) + dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3']) + df = pd.DataFrame( + data={ + 'int_series': int_series, + 'int_series_cat': int_series.astype('category'), + 'float_series': int_series.astype('float'), + 'float_series_cat': int_series.astype('float').astype('category'), + 'dt_series': dt_series, + 'dt_series_cat': dt_series.astype('category'), + 'period_series': dt_series.to_period('Q'), + 'period_series_cat': dt_series.to_period('Q').astype('category') + }, + columns=[ + 'int_series', + 'int_series_cat', + 'float_series', + 'float_series_cat', + 'dt_series', + 'dt_series_cat', + 'period_series', + 'period_series_cat' + ] + ) + + num_gb_cols = len(gb_cols) + + if num_gb_cols == 1: + s = df[gb_cols[0]] + col_vals = list(s.unique()) + + if pd.api.types.is_datetime64_any_dtype(s): + col_vals = list(map(pd.Timestamp, col_vals)) + + target = { + key: np.array([i]) + for i, key in enumerate(col_vals) + } + else: + col_vals = { + col: list(df[col].unique()) + for col in gb_cols + } + + def to_dt(elems): + elems = map(pd.Timestamp, elems) + elems = map(lambda dt: dt.to_datetime64(), elems) + elems = list(elems) + return elems + + for col in gb_cols: + if pd.api.types.is_datetime64_any_dtype(df[col]): + col_vals[col] = to_dt(col_vals[col]) + + elif pd.api.types.is_categorical_dtype(df[col]): + if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories): + col_vals[col] = to_dt(col_vals[col]) + + it = zip(*(col_vals[col] for col in col_vals.keys())) + target = { + key: np.array([i]) + for i, key in enumerate(it) + } + + indices = df.groupby(gb_cols).indices + + assert set(target.keys()) == set(indices.keys()) + for key in target.keys(): + assert pd.core.dtypes.missing.array_equivalent( + target[key], indices[key]) + + def test_attr_wrapper(ts): grouped = ts.groupby(lambda x: x.weekday()) @@ -1752,97 +1846,3 @@ def test_groupby_groups_in_BaseGrouper(): result = df.groupby(['beta', pd.Grouper(level='alpha')]) expected = df.groupby(['beta', 'alpha']) assert(result.groups == expected.groups) - - -def _all_combinations(elems): - from itertools import chain, combinations - - out = chain.from_iterable( - combinations(elems, n + 1) for n in range(len(elems)) - ) - return list(out) - - -@pytest.mark.parametrize( - 'gb_cols', _all_combinations([ - 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', - 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' - ]), - ids=lambda cols: ",".join(cols) -) -def test_groupby_indices(gb_cols): - # GH 26860 - # Test if DataFrame Groupby builds gb.indices correctly. - - gb_cols = list(gb_cols) - - int_series = pd.Series([1, 2, 3]) - dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3']) - df = pd.DataFrame( - data={ - 'int_series': int_series, - 'int_series_cat': int_series.astype('category'), - 'float_series': int_series.astype('float'), - 'float_series_cat': int_series.astype('float').astype('category'), - 'dt_series': dt_series, - 'dt_series_cat': dt_series.astype('category'), - 'period_series': dt_series.to_period('Q'), - 'period_series_cat': dt_series.to_period('Q').astype('category') - }, - columns=[ - 'int_series', - 'int_series_cat', - 'float_series', - 'float_series_cat', - 'dt_series', - 'dt_series_cat', - 'period_series', - 'period_series_cat' - ] - ) - - num_gb_cols = len(gb_cols) - - if num_gb_cols == 1: - s = df[gb_cols[0]] - col_vals = list(s.unique()) - - if pd.api.types.is_datetime64_any_dtype(s): - col_vals = list(map(pd.Timestamp, col_vals)) - - target = { - key: np.array([i]) - for i, key in enumerate(col_vals) - } - else: - col_vals = { - col: list(df[col].unique()) - for col in gb_cols - } - - def to_dt(elems): - elems = map(pd.Timestamp, elems) - elems = map(lambda dt: dt.to_datetime64(), elems) - elems = list(elems) - return elems - - for col in gb_cols: - if pd.api.types.is_datetime64_any_dtype(df[col]): - col_vals[col] = to_dt(col_vals[col]) - - elif pd.api.types.is_categorical_dtype(df[col]): - if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories): - col_vals[col] = to_dt(col_vals[col]) - - it = zip(*(col_vals[col] for col in col_vals.keys())) - target = { - key: np.array([i]) - for i, key in enumerate(it) - } - - indices = df.groupby(gb_cols).indices - - assert set(target.keys()) == set(indices.keys()) - for key in target.keys(): - assert pd.core.dtypes.missing.array_equivalent( - target[key], indices[key]) From b6ba1614bd5bade009c59aca141902ee5945e58d Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 12:39:10 -0700 Subject: [PATCH 08/18] move bug fix to address comment --- pandas/core/groupby/ops.py | 1 - pandas/core/sorting.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 3acc7e6a3447a..ee9d57a537340 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -232,7 +232,6 @@ def indices(self): label_list = [ping.labels for ping in self.groupings] keys = [com.values_from_object(ping.group_index) for ping in self.groupings] - keys = [np.array(key) for key in keys] return get_indexer_dict(label_list, keys) @property diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 21c0c8f747b10..652c7c0bd2f12 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -318,6 +318,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels): def get_indexer_dict(label_list, keys): """ return a diction of {labels} -> {indexers} """ + # address GH 26860 + keys = [np.asarray(key) for key in keys] shape = list(map(len, keys)) group_index = get_group_index(label_list, shape, sort=True, xnull=True) From 650a3ece08df0cc0a73c8e991c0489f3755a7932 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 13:03:07 -0700 Subject: [PATCH 09/18] undo pep8 autoformatting --- pandas/tests/groupby/test_groupby.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 49fc6be9c6881..bb381cb1b5b1b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -192,7 +192,7 @@ def test_pass_args_kwargs(ts, tsframe): def f(x, q=None, axis=0): return np.percentile(x, q, axis=axis) - def g(x): return np.percentile(x, 80, axis=0) + g = lambda x: np.percentile(x, 80, axis=0) # Series ts_grouped = ts.groupby(lambda x: x.month) @@ -508,7 +508,7 @@ def test_frame_groupby_columns(tsframe): assert len(aggregated.columns) == 2 # transform - def tf(x): return x - x.mean() + tf = lambda x: x - x.mean() groupedT = tsframe.T.groupby(mapping, axis=0) assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) @@ -1086,7 +1086,7 @@ def test_seriesgroupby_name_attr(df): assert result.count().name == 'C' assert result.mean().name == 'C' - def testFunc(x): return np.sum(x) * 2 + testFunc = lambda x: np.sum(x) * 2 assert result.agg(testFunc).name == 'C' @@ -1171,7 +1171,7 @@ def test_series_grouper_noncontig_index(): grouped = values.groupby(labels) # accessing the index elements causes segfault - def f(x): return len(set(map(id, x.index))) + f = lambda x: len(set(map(id, x.index))) grouped.agg(f) From c926c06334df01b08f8bdaa1883eaabc3e9f0998 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 13:10:11 -0700 Subject: [PATCH 10/18] format fix --- pandas/tests/groupby/test_groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 23bd6e2caf910..399e208975302 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1153,7 +1153,6 @@ def test_seriesgroupby_name_attr(df): assert result.count().name == "C" assert result.mean().name == "C" - testFunc = lambda x: np.sum(x) * 2 assert result.agg(testFunc).name == "C" From 39f394e5fa9ae80470a6beef37fc34cab2172cbb Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 13:22:34 -0700 Subject: [PATCH 11/18] add bug to what's new --- doc/source/whatsnew/v0.25.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 6974c7521a237..7f04ac6262464 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -99,7 +99,7 @@ Other ^^^^^ - Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`) -- +- Bug in :func:`get_indexer_dict` when passed keys are not numpy array. (:issue:`26860`) .. _whatsnew_0.252.contributors: From 182de89da69bd2be93a4b79ff1db647bffce4c86 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 14:25:54 -0700 Subject: [PATCH 12/18] provide simple test for original github issue; simplify full output test; address py3.5 issues --- pandas/tests/groupby/test_groupby.py | 122 +++++++++++++-------------- 1 file changed, 60 insertions(+), 62 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 399e208975302..5d5b09240a725 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -351,27 +351,27 @@ def f3(x): df2.groupby("a").apply(f3) -def _all_combinations(elems): - from itertools import chain, combinations +def test_groupby_indices_error(): + # GH 26860 + # Test if DataFrame Groupby builds gb.indices + dt = pd.to_datetime(['2018-01-01', '2018-02-01', '2018-03-01']) + df = pd.DataFrame({ + 'a': pd.Series(list('abc')), + 'b': pd.Series(dt, dtype='category'), + 'c': pd.Categorical.from_codes([-1, 0, 1], categories=[0, 1]) + }) - out = chain.from_iterable( - combinations(elems, n + 1) for n in range(len(elems)) - ) - return list(out) + df.groupby(['a', 'b']).indices -@pytest.mark.parametrize( - 'gb_cols', _all_combinations([ - 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', - 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' - ]), - ids=lambda cols: ",".join(cols) -) -def test_groupby_indices(gb_cols): +def test_groupby_indices_output(): # GH 26860 # Test if DataFrame Groupby builds gb.indices correctly. - gb_cols = list(gb_cols) + cols = [ + 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', + 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' + ] int_series = pd.Series([1, 2, 3]) dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3']) @@ -386,63 +386,61 @@ def test_groupby_indices(gb_cols): 'period_series': dt_series.to_period('Q'), 'period_series_cat': dt_series.to_period('Q').astype('category') }, - columns=[ - 'int_series', - 'int_series_cat', - 'float_series', - 'float_series_cat', - 'dt_series', - 'dt_series_cat', - 'period_series', - 'period_series_cat' - ] + columns=cols ) - num_gb_cols = len(gb_cols) - - if num_gb_cols == 1: - s = df[gb_cols[0]] - col_vals = list(s.unique()) - - if pd.api.types.is_datetime64_any_dtype(s): - col_vals = list(map(pd.Timestamp, col_vals)) + from itertools import chain, combinations - target = { - key: np.array([i]) - for i, key in enumerate(col_vals) - } - else: - col_vals = { - col: list(df[col].unique()) - for col in gb_cols - } + gb_cols_it = chain.from_iterable( + combinations(cols, n + 1) for n in range(len(cols)) + ) + for gb_cols in gb_cols_it: + gb_cols = list(gb_cols) + num_gb_cols = len(gb_cols) - def to_dt(elems): - elems = map(pd.Timestamp, elems) - elems = map(lambda dt: dt.to_datetime64(), elems) - elems = list(elems) - return elems + if num_gb_cols == 1: + s = df[gb_cols[0]] + col_vals = list(s.unique()) - for col in gb_cols: - if pd.api.types.is_datetime64_any_dtype(df[col]): - col_vals[col] = to_dt(col_vals[col]) + if pd.api.types.is_datetime64_any_dtype(s): + col_vals = list(map(pd.Timestamp, col_vals)) - elif pd.api.types.is_categorical_dtype(df[col]): - if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories): + target = { + key: np.array([i]) + for i, key in enumerate(col_vals) + } + else: + col_vals = { + col: list(df[col].unique()) + for col in gb_cols + } + + def to_dt(elems): + elems = map(pd.Timestamp, elems) + elems = map(lambda dt: dt.to_datetime64(), elems) + elems = list(elems) + return elems + + for col in gb_cols: + if pd.api.types.is_datetime64_any_dtype(df[col]): col_vals[col] = to_dt(col_vals[col]) - it = zip(*(col_vals[col] for col in col_vals.keys())) - target = { - key: np.array([i]) - for i, key in enumerate(it) - } + elif pd.api.types.is_categorical_dtype(df[col]): + if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories): + col_vals[col] = to_dt(col_vals[col]) + + it = zip(*(col_vals[col] for col in gb_cols)) + target = { + key: np.array([i]) + for i, key in enumerate(it) + } - indices = df.groupby(gb_cols).indices + indices = df.groupby(gb_cols).indices - assert set(target.keys()) == set(indices.keys()) - for key in target.keys(): - assert pd.core.dtypes.missing.array_equivalent( - target[key], indices[key]) + assert set(target.keys()) == set(indices.keys()) + for key in target.keys(): + assert pd.core.dtypes.missing.array_equivalent( + target[key], indices[key]) def test_attr_wrapper(ts): From 7b5b3705f6b9bdbe7c5cb01e347bce503c1f2ed2 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 14:44:34 -0700 Subject: [PATCH 13/18] update what's new for 1.0 --- doc/source/whatsnew/v0.25.2.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 7f04ac6262464..6974c7521a237 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -99,7 +99,7 @@ Other ^^^^^ - Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`) -- Bug in :func:`get_indexer_dict` when passed keys are not numpy array. (:issue:`26860`) +- .. _whatsnew_0.252.contributors: diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7a10447e3ad40..ce85ad94af8b3 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -176,7 +176,7 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - -- +- Bug in :meth:`DataFrameGroupBy.indices` raises exception when grouping on multiple columns and one is a categorical with datetime values. (:issue:`26860`) - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) Reshaping From c700c1a17440c09d34f5bd10405bd87550f4ca3c Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 14:44:57 -0700 Subject: [PATCH 14/18] address comments on cleaning up test --- pandas/tests/groupby/test_groupby.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5d5b09240a725..21b95ffb90eb1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -11,6 +11,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv import pandas.core.common as com +from pandas.api.types import is_datetime64_any_dtype, is_categorical_dtype import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, @@ -391,6 +392,12 @@ def test_groupby_indices_output(): from itertools import chain, combinations + def dt_to_ts(elems): + return [pd.Timestamp(el) for el in elems] + + def ts_to_dt(elems): + return [el.to_datetime64() for el in elems] + gb_cols_it = chain.from_iterable( combinations(cols, n + 1) for n in range(len(cols)) ) @@ -402,8 +409,8 @@ def test_groupby_indices_output(): s = df[gb_cols[0]] col_vals = list(s.unique()) - if pd.api.types.is_datetime64_any_dtype(s): - col_vals = list(map(pd.Timestamp, col_vals)) + if is_datetime64_any_dtype(s): + col_vals = dt_to_ts(col_vals) target = { key: np.array([i]) @@ -415,19 +422,12 @@ def test_groupby_indices_output(): for col in gb_cols } - def to_dt(elems): - elems = map(pd.Timestamp, elems) - elems = map(lambda dt: dt.to_datetime64(), elems) - elems = list(elems) - return elems - for col in gb_cols: - if pd.api.types.is_datetime64_any_dtype(df[col]): - col_vals[col] = to_dt(col_vals[col]) - - elif pd.api.types.is_categorical_dtype(df[col]): - if pd.api.types.is_datetime64_any_dtype(df[col].cat.categories): - col_vals[col] = to_dt(col_vals[col]) + is_dt = is_datetime64_any_dtype(df[col]) + is_cat_dt = is_categorical_dtype(df[col]) and \ + is_datetime64_any_dtype(df[col].cat.categories) + if is_dt or is_cat_dt: + col_vals[col] = ts_to_dt(dt_to_ts(col_vals[col])) it = zip(*(col_vals[col] for col in gb_cols)) target = { From 5543e0d690df27a94fcc8e3c45b5c74ea4835775 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 18:14:29 -0700 Subject: [PATCH 15/18] parametrize test --- pandas/tests/groupby/test_groupby.py | 91 +++++++++++++++------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 21b95ffb90eb1..ece1790b0b29e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -365,9 +365,21 @@ def test_groupby_indices_error(): df.groupby(['a', 'b']).indices -def test_groupby_indices_output(): +@pytest.mark.parametrize( + "gb_cols", [ + 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', + 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat', + [ + 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', + 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' + ] + ] +) +def test_groupby_indices_output(gb_cols): # GH 26860 # Test if DataFrame Groupby builds gb.indices correctly. + if isinstance(gb_cols, str): + gb_cols = [gb_cols] cols = [ 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', @@ -390,57 +402,50 @@ def test_groupby_indices_output(): columns=cols ) - from itertools import chain, combinations - def dt_to_ts(elems): return [pd.Timestamp(el) for el in elems] def ts_to_dt(elems): return [el.to_datetime64() for el in elems] - gb_cols_it = chain.from_iterable( - combinations(cols, n + 1) for n in range(len(cols)) - ) - for gb_cols in gb_cols_it: - gb_cols = list(gb_cols) - num_gb_cols = len(gb_cols) + num_gb_cols = len(gb_cols) - if num_gb_cols == 1: - s = df[gb_cols[0]] - col_vals = list(s.unique()) + if num_gb_cols == 1: + s = df[gb_cols[0]] + col_vals = list(s.unique()) - if is_datetime64_any_dtype(s): - col_vals = dt_to_ts(col_vals) + if is_datetime64_any_dtype(s): + col_vals = dt_to_ts(col_vals) - target = { - key: np.array([i]) - for i, key in enumerate(col_vals) - } - else: - col_vals = { - col: list(df[col].unique()) - for col in gb_cols - } - - for col in gb_cols: - is_dt = is_datetime64_any_dtype(df[col]) - is_cat_dt = is_categorical_dtype(df[col]) and \ - is_datetime64_any_dtype(df[col].cat.categories) - if is_dt or is_cat_dt: - col_vals[col] = ts_to_dt(dt_to_ts(col_vals[col])) - - it = zip(*(col_vals[col] for col in gb_cols)) - target = { - key: np.array([i]) - for i, key in enumerate(it) - } - - indices = df.groupby(gb_cols).indices - - assert set(target.keys()) == set(indices.keys()) - for key in target.keys(): - assert pd.core.dtypes.missing.array_equivalent( - target[key], indices[key]) + target = { + key: np.array([i]) + for i, key in enumerate(col_vals) + } + else: + col_vals = { + col: list(df[col].unique()) + for col in gb_cols + } + + for col in gb_cols: + is_dt = is_datetime64_any_dtype(df[col]) + is_cat_dt = is_categorical_dtype(df[col]) and \ + is_datetime64_any_dtype(df[col].cat.categories) + if is_dt or is_cat_dt: + col_vals[col] = ts_to_dt(dt_to_ts(col_vals[col])) + + it = zip(*(col_vals[col] for col in gb_cols)) + target = { + key: np.array([i]) + for i, key in enumerate(it) + } + + indices = df.groupby(gb_cols).indices + + assert set(target.keys()) == set(indices.keys()) + for key in target.keys(): + assert pd.core.dtypes.missing.array_equivalent( + target[key], indices[key]) def test_attr_wrapper(ts): From 4ae7db8a651ddafcad504d6d4662bef35a92bc2a Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 18:47:48 -0700 Subject: [PATCH 16/18] fix imports --- pandas/tests/groupby/test_groupby.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ece1790b0b29e..37e9ba91d123b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -10,8 +10,11 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv +from pandas.core.dtypes.common import ( + is_datetime64_any_dtype, + is_categorical_dtype +) import pandas.core.common as com -from pandas.api.types import is_datetime64_any_dtype, is_categorical_dtype import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, From abcfaff35256b1442edb57e4dd77274296c810ea Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 20:20:02 -0700 Subject: [PATCH 17/18] import fix --- pandas/tests/groupby/test_groupby.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 37e9ba91d123b..30f63a88080a3 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -8,12 +8,10 @@ from pandas.errors import PerformanceWarning +from pandas.core.dtypes.common import is_categorical_dtype, is_datetime64_any_dtype + import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv -from pandas.core.dtypes.common import ( - is_datetime64_any_dtype, - is_categorical_dtype -) import pandas.core.common as com import pandas.util.testing as tm from pandas.util.testing import ( From 85b3b1a84f622a3cb1a56810c4f6648e26122bc4 Mon Sep 17 00:00:00 2001 From: Alex Papanicolaou Date: Wed, 28 Aug 2019 20:58:17 -0700 Subject: [PATCH 18/18] black formatting --- pandas/tests/groupby/test_groupby.py | 96 ++++++++++++++++------------ 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 30f63a88080a3..eb2021031d5e5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -356,25 +356,40 @@ def f3(x): def test_groupby_indices_error(): # GH 26860 # Test if DataFrame Groupby builds gb.indices - dt = pd.to_datetime(['2018-01-01', '2018-02-01', '2018-03-01']) - df = pd.DataFrame({ - 'a': pd.Series(list('abc')), - 'b': pd.Series(dt, dtype='category'), - 'c': pd.Categorical.from_codes([-1, 0, 1], categories=[0, 1]) - }) + dt = pd.to_datetime(["2018-01-01", "2018-02-01", "2018-03-01"]) + df = pd.DataFrame( + { + "a": pd.Series(list("abc")), + "b": pd.Series(dt, dtype="category"), + "c": pd.Categorical.from_codes([-1, 0, 1], categories=[0, 1]), + } + ) - df.groupby(['a', 'b']).indices + df.groupby(["a", "b"]).indices @pytest.mark.parametrize( - "gb_cols", [ - 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', - 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat', + "gb_cols", + [ + "int_series", + "int_series_cat", + "float_series", + "float_series_cat", + "dt_series", + "dt_series_cat", + "period_series", + "period_series_cat", [ - 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', - 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' - ] - ] + "int_series", + "int_series_cat", + "float_series", + "float_series_cat", + "dt_series", + "dt_series_cat", + "period_series", + "period_series_cat", + ], + ], ) def test_groupby_indices_output(gb_cols): # GH 26860 @@ -383,24 +398,30 @@ def test_groupby_indices_output(gb_cols): gb_cols = [gb_cols] cols = [ - 'int_series', 'int_series_cat', 'float_series', 'float_series_cat', - 'dt_series', 'dt_series_cat', 'period_series', 'period_series_cat' + "int_series", + "int_series_cat", + "float_series", + "float_series_cat", + "dt_series", + "dt_series_cat", + "period_series", + "period_series_cat", ] int_series = pd.Series([1, 2, 3]) - dt_series = pd.to_datetime(['2018Q1', '2018Q2', '2018Q3']) + dt_series = pd.to_datetime(["2018Q1", "2018Q2", "2018Q3"]) df = pd.DataFrame( data={ - 'int_series': int_series, - 'int_series_cat': int_series.astype('category'), - 'float_series': int_series.astype('float'), - 'float_series_cat': int_series.astype('float').astype('category'), - 'dt_series': dt_series, - 'dt_series_cat': dt_series.astype('category'), - 'period_series': dt_series.to_period('Q'), - 'period_series_cat': dt_series.to_period('Q').astype('category') + "int_series": int_series, + "int_series_cat": int_series.astype("category"), + "float_series": int_series.astype("float"), + "float_series_cat": int_series.astype("float").astype("category"), + "dt_series": dt_series, + "dt_series_cat": dt_series.astype("category"), + "period_series": dt_series.to_period("Q"), + "period_series_cat": dt_series.to_period("Q").astype("category"), }, - columns=cols + columns=cols, ) def dt_to_ts(elems): @@ -418,35 +439,26 @@ def ts_to_dt(elems): if is_datetime64_any_dtype(s): col_vals = dt_to_ts(col_vals) - target = { - key: np.array([i]) - for i, key in enumerate(col_vals) - } + target = {key: np.array([i]) for i, key in enumerate(col_vals)} else: - col_vals = { - col: list(df[col].unique()) - for col in gb_cols - } + col_vals = {col: list(df[col].unique()) for col in gb_cols} for col in gb_cols: is_dt = is_datetime64_any_dtype(df[col]) - is_cat_dt = is_categorical_dtype(df[col]) and \ - is_datetime64_any_dtype(df[col].cat.categories) + is_cat_dt = is_categorical_dtype(df[col]) and is_datetime64_any_dtype( + df[col].cat.categories + ) if is_dt or is_cat_dt: col_vals[col] = ts_to_dt(dt_to_ts(col_vals[col])) it = zip(*(col_vals[col] for col in gb_cols)) - target = { - key: np.array([i]) - for i, key in enumerate(it) - } + target = {key: np.array([i]) for i, key in enumerate(it)} indices = df.groupby(gb_cols).indices assert set(target.keys()) == set(indices.keys()) for key in target.keys(): - assert pd.core.dtypes.missing.array_equivalent( - target[key], indices[key]) + assert pd.core.dtypes.missing.array_equivalent(target[key], indices[key]) def test_attr_wrapper(ts):