Skip to content

Commit 1cbe011

Browse files
authored
ENH: allow attrs to be propagated via pd.concat (#42252)
1 parent ed5a004 commit 1cbe011

File tree

4 files changed

+53
-1
lines changed

4 files changed

+53
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ representation of :class:`DataFrame` objects (:issue:`4889`).
198198

199199
Other enhancements
200200
^^^^^^^^^^^^^^^^^^
201+
- :meth:`concat` will preserve the ``attrs`` when it is the same for all objects and discard the ``attrs`` when they are different. (:issue:`41828`)
201202
- :class:`DataFrameGroupBy` operations with ``as_index=False`` now correctly retain ``ExtensionDtype`` dtypes for columns being grouped on (:issue:`41373`)
202203
- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`)
203204
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)

pandas/core/generic.py

+6
Original file line numberDiff line numberDiff line change
@@ -5514,6 +5514,12 @@ def __finalize__(
55145514
object.__setattr__(self, name, getattr(other, name, None))
55155515

55165516
if method == "concat":
5517+
attrs = other.objs[0].attrs
5518+
check_attrs = all(objs.attrs == attrs for objs in other.objs[1:])
5519+
if check_attrs:
5520+
for name in attrs:
5521+
self.attrs[name] = attrs[name]
5522+
55175523
allows_duplicate_labels = all(
55185524
x.flags.allows_duplicate_labels for x in other.objs
55195525
)

pandas/tests/generic/test_finalize.py

-1
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,6 @@ def test_groupby_finalize(obj, method):
758758
"method",
759759
[
760760
lambda x: x.agg(["sum", "count"]),
761-
lambda x: x.transform(lambda y: y),
762761
lambda x: x.apply(lambda y: y),
763762
lambda x: x.agg("std"),
764763
lambda x: x.agg("var"),

pandas/tests/reshape/concat/test_concat.py

+46
Original file line numberDiff line numberDiff line change
@@ -698,3 +698,49 @@ def test_concat_posargs_deprecation():
698698
result = concat([df, df2], 0)
699699
expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"])
700700
tm.assert_frame_equal(result, expected)
701+
702+
703+
@pytest.mark.parametrize(
704+
"data",
705+
[
706+
Series(data=[1, 2]),
707+
DataFrame(
708+
data={
709+
"col1": [1, 2],
710+
}
711+
),
712+
DataFrame(dtype=float),
713+
Series(dtype=float),
714+
],
715+
)
716+
def test_concat_drop_attrs(data):
717+
# GH#41828
718+
df1 = data.copy()
719+
df1.attrs = {1: 1}
720+
df2 = data.copy()
721+
df2.attrs = {1: 2}
722+
df = concat([df1, df2])
723+
assert len(df.attrs) == 0
724+
725+
726+
@pytest.mark.parametrize(
727+
"data",
728+
[
729+
Series(data=[1, 2]),
730+
DataFrame(
731+
data={
732+
"col1": [1, 2],
733+
}
734+
),
735+
DataFrame(dtype=float),
736+
Series(dtype=float),
737+
],
738+
)
739+
def test_concat_retain_attrs(data):
740+
# GH#41828
741+
df1 = data.copy()
742+
df1.attrs = {1: 1}
743+
df2 = data.copy()
744+
df2.attrs = {1: 1}
745+
df = concat([df1, df2])
746+
assert df.attrs[1] == 1

0 commit comments

Comments
 (0)