From 56e1d4fe95feebb2645542a086cf5ecb6b7b41b5 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 26 Jun 2021 13:17:27 +0100 Subject: [PATCH 01/22] Update generic.py --- pandas/core/generic.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 82895ab9eb67a..c7def189fc606 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5455,6 +5455,16 @@ def __finalize__( object.__setattr__(self, name, getattr(other, name, None)) if method == "concat": + # Issue #41828, retain the attrs only if all NDFrame have the same + # attrs. + attrs = other.objs[0].attrs + for obj in other.objs[1:]: + if obj.attrs != attrs: + break + else: + for name in attrs: + self.attrs[name] = attrs[name] + allows_duplicate_labels = all( x.flags.allows_duplicate_labels for x in other.objs ) From 93da2b26560ae777b72ccd7f9c2e9b49d8a81977 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 26 Jun 2021 14:39:45 +0100 Subject: [PATCH 02/22] Update test_finalize.py --- pandas/tests/generic/test_finalize.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 50ecb74924e2a..941b0e43c2f04 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -347,7 +347,6 @@ (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), - marks=not_implemented_mark, ), (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")), (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")), @@ -762,7 +761,7 @@ def test_groupby_finalize(obj, method): [ lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), - lambda x: x.apply(lambda y: y), + # lambda x: x.apply(lambda y: y), Fixed with #42252 lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), @@ -784,3 +783,26 @@ def test_finalize_frame_series_name(): df = pd.DataFrame({"name": [1, 2]}) result = pd.Series([1, 2]).__finalize__(df) assert result.name is None + +def test_concat_retain_attrs(): + '''Only retain the attrs when the attrs are the same across all + dataframes.''' + d = {'col1': [1, 2], 'col2': [3, 4]} + df1 = pd.DataFrame(data=d) + df1.attrs = {1: 1} + df2 = pd.DataFrame(data=d) + df2.attrs = {1: 1} + df = pd.concat([df1, df2]) + assert df.attrs == {1: 1} + +def test_concat_drop_attrs(): + '''Drop the attrs when the attrs when the attrs are different across + all + dataframes.''' + d = {'col1': [1, 2], 'col2': [3, 4]} + df1 = pd.DataFrame(data=d) + df1.attrs = {1: 1} + df2 = pd.DataFrame(data=d) + df2.attrs = {1: 2} + df = pd.concat([df1, df2]) + assert df.attrs == {} \ No newline at end of file From 20436348f0e6c5c58d1d19a047b19286b54498da Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 26 Jun 2021 14:40:46 +0100 Subject: [PATCH 03/22] Update test_finalize.py --- pandas/tests/generic/test_finalize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 941b0e43c2f04..d389c86009a26 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -784,6 +784,7 @@ def test_finalize_frame_series_name(): result = pd.Series([1, 2]).__finalize__(df) assert result.name is None + def test_concat_retain_attrs(): '''Only retain the attrs when the attrs are the same across all dataframes.''' @@ -795,6 +796,7 @@ def test_concat_retain_attrs(): df = pd.concat([df1, df2]) assert df.attrs == {1: 1} + def test_concat_drop_attrs(): '''Drop the attrs when the attrs when the attrs are different across all @@ -805,4 +807,4 @@ def test_concat_drop_attrs(): df2 = pd.DataFrame(data=d) df2.attrs = {1: 2} df = pd.concat([df1, df2]) - assert df.attrs == {} \ No newline at end of file + assert df.attrs == {} From 23541a8c947c10b7874efc79dd5bf5d5a6a2f1b9 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Wed, 30 Jun 2021 12:03:49 +0100 Subject: [PATCH 04/22] fix test --- pandas/tests/generic/test_finalize.py | 27 +--------------------- pandas/tests/reshape/concat/test_concat.py | 27 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index d389c86009a26..4119cc12af5d5 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -761,7 +761,7 @@ def test_groupby_finalize(obj, method): [ lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), - # lambda x: x.apply(lambda y: y), Fixed with #42252 + lambda x: x.apply(lambda y: y), #Fixed with #42252 lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), @@ -783,28 +783,3 @@ def test_finalize_frame_series_name(): df = pd.DataFrame({"name": [1, 2]}) result = pd.Series([1, 2]).__finalize__(df) assert result.name is None - - -def test_concat_retain_attrs(): - '''Only retain the attrs when the attrs are the same across all - dataframes.''' - d = {'col1': [1, 2], 'col2': [3, 4]} - df1 = pd.DataFrame(data=d) - df1.attrs = {1: 1} - df2 = pd.DataFrame(data=d) - df2.attrs = {1: 1} - df = pd.concat([df1, df2]) - assert df.attrs == {1: 1} - - -def test_concat_drop_attrs(): - '''Drop the attrs when the attrs when the attrs are different across - all - dataframes.''' - d = {'col1': [1, 2], 'col2': [3, 4]} - df1 = pd.DataFrame(data=d) - df1.attrs = {1: 1} - df2 = pd.DataFrame(data=d) - df2.attrs = {1: 2} - df = pd.concat([df1, df2]) - assert df.attrs == {} diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 17a7089f0ac85..974d1095d02c2 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -653,3 +653,30 @@ def test_concat_posargs_deprecation(): result = concat([df, df2], 0) expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"]) tm.assert_frame_equal(result, expected) + + +def test_concat_retain_attrs(): + ''' Retain the attrs during concat + + Only retain the attrs when the attrs are the same across all dataframes.''' + d = {'col1': [1, 2], 'col2': [3, 4]} + df1 = pd.DataFrame(data=d) + df1.attrs = {1: 1} + df2 = pd.DataFrame(data=d) + df2.attrs = {1: 1} + df = pd.concat([df1, df2]) + assert df.attrs == {1: 1} + + +def test_concat_drop_attrs(): + '''Discard attrs when they don't match. + + Drop the attrs when the attrs when the attrs are different across + all dataframes.''' + d = {'col1': [1, 2], 'col2': [3, 4]} + df1 = pd.DataFrame(data=d) + df1.attrs = {1: 1} + df2 = pd.DataFrame(data=d) + df2.attrs = {1: 2} + df = pd.concat([df1, df2]) + assert df.attrs == {} From b349e96ae134977c536ea998e44e206c2dcc8b60 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Wed, 30 Jun 2021 12:05:23 +0100 Subject: [PATCH 05/22] Update test_finalize.py --- pandas/tests/generic/test_finalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 4119cc12af5d5..a90336eb86c5a 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -761,7 +761,7 @@ def test_groupby_finalize(obj, method): [ lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), - lambda x: x.apply(lambda y: y), #Fixed with #42252 + #lambda x: x.apply(lambda y: y), Fixed with #42252 lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), From d2ba70ff115d9e964461ae7143a58607b9823bd7 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Wed, 30 Jun 2021 14:09:28 +0100 Subject: [PATCH 06/22] Update test_finalize.py --- pandas/tests/generic/test_finalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index a90336eb86c5a..27aa4e38867db 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -761,7 +761,7 @@ def test_groupby_finalize(obj, method): [ lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), - #lambda x: x.apply(lambda y: y), Fixed with #42252 + # lambda x: x.apply(lambda y: y), Fixed with #42252 lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), From 60cce3b92aba0f51ee6115288dc9f035cbfc001c Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Wed, 30 Jun 2021 14:21:18 +0100 Subject: [PATCH 07/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 974d1095d02c2..bfcabee606978 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -656,9 +656,7 @@ def test_concat_posargs_deprecation(): def test_concat_retain_attrs(): - ''' Retain the attrs during concat - - Only retain the attrs when the attrs are the same across all dataframes.''' + # GH#41828 d = {'col1': [1, 2], 'col2': [3, 4]} df1 = pd.DataFrame(data=d) df1.attrs = {1: 1} @@ -669,10 +667,7 @@ def test_concat_retain_attrs(): def test_concat_drop_attrs(): - '''Discard attrs when they don't match. - - Drop the attrs when the attrs when the attrs are different across - all dataframes.''' + # GH#41828 d = {'col1': [1, 2], 'col2': [3, 4]} df1 = pd.DataFrame(data=d) df1.attrs = {1: 1} From 28614cbe647a151b4f724cda8ac551d626cb1596 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 4 Oct 2021 14:30:01 +0100 Subject: [PATCH 08/22] update --- pandas/core/generic.py | 6 ++---- pandas/tests/generic/test_finalize.py | 1 - pandas/tests/reshape/concat/test_concat.py | 15 ++++++++++++--- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c73aeb1ccb935..eae9171475ae6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5539,10 +5539,8 @@ def __finalize__( # Issue #41828, retain the attrs only if all NDFrame have the same # attrs. attrs = other.objs[0].attrs - for obj in other.objs[1:]: - if obj.attrs != attrs: - break - else: + check_attrs = all([objs.attrs == attrs for objs in other.objs[1:]]) + if check_attrs: for name in attrs: self.attrs[name] = attrs[name] diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 27aa4e38867db..bb62a1086add9 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -761,7 +761,6 @@ def test_groupby_finalize(obj, method): [ lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), - # lambda x: x.apply(lambda y: y), Fixed with #42252 lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index b4080c9d92033..c552583fc3dda 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -700,7 +700,7 @@ def test_concat_posargs_deprecation(): tm.assert_frame_equal(result, expected) -def test_concat_retain_attrs(): +def test_concat_retain_attrs_df(): # GH#41828 d = {'col1': [1, 2], 'col2': [3, 4]} df1 = pd.DataFrame(data=d) @@ -708,8 +708,17 @@ def test_concat_retain_attrs(): df2 = pd.DataFrame(data=d) df2.attrs = {1: 1} df = pd.concat([df1, df2]) - assert df.attrs == {1: 1} + assert df.attrs[1] == 1 +def test_concat_retain_attrs_series(): + # GH#41828 + d = [1, 2] + df1 = pd.Series(data=d) + df1.attrs = {1: 1} + df2 = pd.Series(data=d) + df2.attrs = {1: 1} + df = pd.concat([df1, df2]) + assert df.attrs[1] == 1 def test_concat_drop_attrs(): # GH#41828 @@ -719,4 +728,4 @@ def test_concat_drop_attrs(): df2 = pd.DataFrame(data=d) df2.attrs = {1: 2} df = pd.concat([df1, df2]) - assert df.attrs == {} + assert len(df.attrs) == 0 From bb59b1b59217c5c0e0bfae71e2ecf3c5550808aa Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 4 Oct 2021 14:31:17 +0100 Subject: [PATCH 09/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index c552583fc3dda..90da3249e6846 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -710,6 +710,7 @@ def test_concat_retain_attrs_df(): df = pd.concat([df1, df2]) assert df.attrs[1] == 1 + def test_concat_retain_attrs_series(): # GH#41828 d = [1, 2] @@ -720,6 +721,7 @@ def test_concat_retain_attrs_series(): df = pd.concat([df1, df2]) assert df.attrs[1] == 1 + def test_concat_drop_attrs(): # GH#41828 d = {'col1': [1, 2], 'col2': [3, 4]} From 298e572cbc68bfeec1811bec88e0829d04ccab91 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 12 Oct 2021 10:37:34 +0100 Subject: [PATCH 10/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 90da3249e6846..7843a501ffad6 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -702,7 +702,7 @@ def test_concat_posargs_deprecation(): def test_concat_retain_attrs_df(): # GH#41828 - d = {'col1': [1, 2], 'col2': [3, 4]} + d = {"col1": [1, 2], "col2": [3, 4]} df1 = pd.DataFrame(data=d) df1.attrs = {1: 1} df2 = pd.DataFrame(data=d) @@ -724,7 +724,7 @@ def test_concat_retain_attrs_series(): def test_concat_drop_attrs(): # GH#41828 - d = {'col1': [1, 2], 'col2': [3, 4]} + d = {"col1": [1, 2], "col2": [3, 4]} df1 = pd.DataFrame(data=d) df1.attrs = {1: 1} df2 = pd.DataFrame(data=d) From e1728633120cfed4364618c976bfc5eb396c5ca4 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 12 Oct 2021 10:45:46 +0100 Subject: [PATCH 11/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 7843a501ffad6..a0dc78894f476 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -703,31 +703,31 @@ def test_concat_posargs_deprecation(): def test_concat_retain_attrs_df(): # GH#41828 d = {"col1": [1, 2], "col2": [3, 4]} - df1 = pd.DataFrame(data=d) + df1 = DataFrame(data=d) df1.attrs = {1: 1} - df2 = pd.DataFrame(data=d) + df2 = DataFrame(data=d) df2.attrs = {1: 1} - df = pd.concat([df1, df2]) + df = concat([df1, df2]) assert df.attrs[1] == 1 def test_concat_retain_attrs_series(): # GH#41828 d = [1, 2] - df1 = pd.Series(data=d) + df1 = Series(data=d) df1.attrs = {1: 1} - df2 = pd.Series(data=d) + df2 = Series(data=d) df2.attrs = {1: 1} - df = pd.concat([df1, df2]) + df = concat([df1, df2]) assert df.attrs[1] == 1 def test_concat_drop_attrs(): # GH#41828 d = {"col1": [1, 2], "col2": [3, 4]} - df1 = pd.DataFrame(data=d) + df1 = DataFrame(data=d) df1.attrs = {1: 1} - df2 = pd.DataFrame(data=d) + df2 = DataFrame(data=d) df2.attrs = {1: 2} - df = pd.concat([df1, df2]) + df = concat([df1, df2]) assert len(df.attrs) == 0 From e41822f25cfb6e389c14c7fcfc265f30f2e73aec Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 12 Oct 2021 10:55:54 +0100 Subject: [PATCH 12/22] Update generic.py --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eae9171475ae6..d33e8418c3059 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5539,7 +5539,8 @@ def __finalize__( # Issue #41828, retain the attrs only if all NDFrame have the same # attrs. attrs = other.objs[0].attrs - check_attrs = all([objs.attrs == attrs for objs in other.objs[1:]]) + check_attrs = all((objs.attrs == attrs for objs in other.objs[1:])) + if check_attrs: for name in attrs: self.attrs[name] = attrs[name] From ab34f2fe1856b8964d7893301c4777c80f786a31 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 12 Oct 2021 11:01:33 +0100 Subject: [PATCH 13/22] Update generic.py --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d33e8418c3059..f546f592114fd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5539,7 +5539,7 @@ def __finalize__( # Issue #41828, retain the attrs only if all NDFrame have the same # attrs. attrs = other.objs[0].attrs - check_attrs = all((objs.attrs == attrs for objs in other.objs[1:])) + check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) if check_attrs: for name in attrs: From 49bd5b1ee53c0f7555095de92f5f31a475bfa401 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Tue, 12 Oct 2021 11:09:17 +0100 Subject: [PATCH 14/22] Update generic.py --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f546f592114fd..74f4237e6ff6b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5540,7 +5540,6 @@ def __finalize__( # attrs. attrs = other.objs[0].attrs check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) - if check_attrs: for name in attrs: self.attrs[name] = attrs[name] From cfcc6d1ff43a43fa16e805a5d36dde5a62b0562a Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 29 Nov 2021 17:46:13 +0000 Subject: [PATCH 15/22] update --- pandas/tests/generic/test_finalize.py | 1 - pandas/tests/reshape/concat/test_concat.py | 39 +++++++++------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 95c1ff2704095..91b65309d75db 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -760,7 +760,6 @@ def test_groupby_finalize(obj, method): "method", [ lambda x: x.agg(["sum", "count"]), - lambda x: x.transform(lambda y: y), lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index a0dc78894f476..036c72afb4355 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -699,35 +699,28 @@ def test_concat_posargs_deprecation(): expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"]) tm.assert_frame_equal(result, expected) - -def test_concat_retain_attrs_df(): +@pytest.mark.parametrize("data", [Series(data=[1, 2]), + DataFrame(data={"col1": [1, 2],}), + DataFrame(), + Series()]) +def test_concat_drop_attrs(data): # GH#41828 - d = {"col1": [1, 2], "col2": [3, 4]} - df1 = DataFrame(data=d) + df1 = data.copy() df1.attrs = {1: 1} - df2 = DataFrame(data=d) - df2.attrs = {1: 1} + df2 = data.copy() + df2.attrs = {1: 2} df = concat([df1, df2]) - assert df.attrs[1] == 1 - + assert len(df.attrs) == 0 -def test_concat_retain_attrs_series(): +@pytest.mark.parametrize("data", [Series(data=[1, 2]), + DataFrame(data={"col1": [1, 2],}), + DataFrame(), + Series()]) +def test_concat_retain_attrs(data): # GH#41828 - d = [1, 2] - df1 = Series(data=d) + df1 = data.copy() df1.attrs = {1: 1} - df2 = Series(data=d) + df2 = data.copy() df2.attrs = {1: 1} df = concat([df1, df2]) assert df.attrs[1] == 1 - - -def test_concat_drop_attrs(): - # GH#41828 - d = {"col1": [1, 2], "col2": [3, 4]} - df1 = DataFrame(data=d) - df1.attrs = {1: 1} - df2 = DataFrame(data=d) - df2.attrs = {1: 2} - df = concat([df1, df2]) - assert len(df.attrs) == 0 From cadfb63b2c012035872a407d30caf2ed8c4ab396 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 29 Nov 2021 17:51:56 +0000 Subject: [PATCH 16/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 036c72afb4355..7a449f8364b0d 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -699,6 +699,7 @@ def test_concat_posargs_deprecation(): expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"]) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("data", [Series(data=[1, 2]), DataFrame(data={"col1": [1, 2],}), DataFrame(), @@ -712,6 +713,7 @@ def test_concat_drop_attrs(data): df = concat([df1, df2]) assert len(df.attrs) == 0 + @pytest.mark.parametrize("data", [Series(data=[1, 2]), DataFrame(data={"col1": [1, 2],}), DataFrame(), From dd19672ae2ccd94c66322fdf1b8fa0e905a68727 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 29 Nov 2021 17:52:36 +0000 Subject: [PATCH 17/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 7a449f8364b0d..4f2e66485988e 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -701,7 +701,7 @@ def test_concat_posargs_deprecation(): @pytest.mark.parametrize("data", [Series(data=[1, 2]), - DataFrame(data={"col1": [1, 2],}), + DataFrame(data={"col1": [1, 2], }), DataFrame(), Series()]) def test_concat_drop_attrs(data): @@ -715,7 +715,7 @@ def test_concat_drop_attrs(data): @pytest.mark.parametrize("data", [Series(data=[1, 2]), - DataFrame(data={"col1": [1, 2],}), + DataFrame(data={"col1": [1, 2], }), DataFrame(), Series()]) def test_concat_retain_attrs(data): From 93cc2578a89a51f5a7c3dcf434157c00b3939cc2 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 29 Nov 2021 20:18:48 +0000 Subject: [PATCH 18/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 4f2e66485988e..6deed83feb9fd 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -702,8 +702,8 @@ def test_concat_posargs_deprecation(): @pytest.mark.parametrize("data", [Series(data=[1, 2]), DataFrame(data={"col1": [1, 2], }), - DataFrame(), - Series()]) + DataFrame(dtype=float), + Series(dtype=float)]) def test_concat_drop_attrs(data): # GH#41828 df1 = data.copy() @@ -716,8 +716,8 @@ def test_concat_drop_attrs(data): @pytest.mark.parametrize("data", [Series(data=[1, 2]), DataFrame(data={"col1": [1, 2], }), - DataFrame(), - Series()]) + DataFrame(dtype=float), + Series(dtype=float)]) def test_concat_retain_attrs(data): # GH#41828 df1 = data.copy() From e67991814dcefe155e3b8bf89ae93007940ace7a Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Mon, 29 Nov 2021 21:23:46 +0000 Subject: [PATCH 19/22] Update test_concat.py --- pandas/tests/reshape/concat/test_concat.py | 34 +++++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 6deed83feb9fd..c4b32371042b3 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -700,10 +700,19 @@ def test_concat_posargs_deprecation(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("data", [Series(data=[1, 2]), - DataFrame(data={"col1": [1, 2], }), - DataFrame(dtype=float), - Series(dtype=float)]) +@pytest.mark.parametrize( + "data", + [ + Series(data=[1, 2]), + DataFrame( + data={ + "col1": [1, 2], + } + ), + DataFrame(dtype=float), + Series(dtype=float), + ], +) def test_concat_drop_attrs(data): # GH#41828 df1 = data.copy() @@ -714,10 +723,19 @@ def test_concat_drop_attrs(data): assert len(df.attrs) == 0 -@pytest.mark.parametrize("data", [Series(data=[1, 2]), - DataFrame(data={"col1": [1, 2], }), - DataFrame(dtype=float), - Series(dtype=float)]) +@pytest.mark.parametrize( + "data", + [ + Series(data=[1, 2]), + DataFrame( + data={ + "col1": [1, 2], + } + ), + DataFrame(dtype=float), + Series(dtype=float), + ], +) def test_concat_retain_attrs(data): # GH#41828 df1 = data.copy() From 08e6fab6f1a3d9461ef37b522d1e89e0e1ba8443 Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Tue, 30 Nov 2021 11:29:20 +0000 Subject: [PATCH 20/22] Update generic.py --- pandas/core/generic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bdb391fce0e87..7f18399160cad 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5522,8 +5522,6 @@ def __finalize__( object.__setattr__(self, name, getattr(other, name, None)) if method == "concat": - # Issue #41828, retain the attrs only if all NDFrame have the same - # attrs. attrs = other.objs[0].attrs check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) if check_attrs: From f49f0276ce23f0e80f693c9d1d9062b9a638937c Mon Sep 17 00:00:00 2001 From: xiki-tempula Date: Tue, 30 Nov 2021 11:32:26 +0000 Subject: [PATCH 21/22] Update test_finalize.py --- pandas/tests/generic/test_finalize.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 91b65309d75db..f27e92a55268f 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -347,9 +347,7 @@ operator.methodcaller("infer_objects"), ), (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), - ), + (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")), (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")), (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)), @@ -760,6 +758,7 @@ def test_groupby_finalize(obj, method): "method", [ lambda x: x.agg(["sum", "count"]), + lambda x: x.apply(lambda y: y), lambda x: x.agg("std"), lambda x: x.agg("var"), lambda x: x.agg("sem"), From cefc44433a75d5515ae5d111b82c59b905d8ae46 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Wed, 1 Dec 2021 08:51:31 +0000 Subject: [PATCH 22/22] Update v1.4.0.rst --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 193356a46a6ea..9e48e8bb35838 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -190,6 +190,7 @@ representation of :class:`DataFrame` objects (:issue:`4889`). Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`concat` will preserve the ``attrs`` when it is the same for all objects and discard the ``attrs`` when they are different. (:issue:`41828`) - :class:`DataFrameGroupBy` operations with ``as_index=False`` now correctly retain ``ExtensionDtype`` dtypes for columns being grouped on (:issue:`41373`) - Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`) - :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)