Skip to content

BUG: dtype lost in DataFrame.append #43392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 4, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 23 additions & 18 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8966,6 +8966,7 @@ def append(
3 3
4 4
"""
combined_columns = None
if isinstance(other, (Series, dict)):
if isinstance(other, dict):
if not ignore_index:
Expand All @@ -8980,36 +8981,40 @@ def append(
index = Index([other.name], name=self.index.name)
idx_diff = other.index.difference(self.columns)
combined_columns = self.columns.append(idx_diff)
other = (
other.reindex(combined_columns, copy=False)
.to_frame()
.T.infer_objects()
.rename_axis(index.names, copy=False)
)
if not self.columns.equals(combined_columns):
self = self.reindex(columns=combined_columns)
row_df = other.to_frame().T
# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
other = row_df.infer_objects().rename_axis(index.names, copy=False)
elif isinstance(other, list):
if not other:
pass
elif not isinstance(other[0], DataFrame):
other = DataFrame(other)
if (self.columns.get_indexer(other.columns) >= 0).all():
other = other.reindex(columns=self.columns)

from pandas.core.reshape.concat import concat

if isinstance(other, (list, tuple)):
to_concat = [self, *other]
else:
to_concat = [self, other]
return (
concat(
to_concat,
ignore_index=ignore_index,
verify_integrity=verify_integrity,
sort=sort,
)
).__finalize__(self, method="append")

result = concat(
to_concat,
ignore_index=ignore_index,
verify_integrity=verify_integrity,
sort=sort,
)
if (
combined_columns is not None
and not sort
and not combined_columns.equals(result.columns)
):
# TODO: reindexing here is a kludge bc union_indexes does not
# pass sort to index.union, xref #43375
# combined_columns.equals check is necessary for preserving dtype
# in test_crosstab_normalize
result = result.reindex(combined_columns, axis=1)
return result.__finalize__(self, method="append")

def join(
self,
Expand Down
16 changes: 6 additions & 10 deletions pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -198,8 +196,12 @@ def test_append_same_columns_type(self, index):
ser = Series([7, 8], index=ser_index, name=2)
result = df.append(ser)
expected = DataFrame(
[[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
[[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
)
# integer dtype is preserved for columns present in ser.index
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert_frame_equal compares these dtypes no? (i guess ok that you are being explicit as well)

assert expected.dtypes.iloc[0].kind == "i"
assert expected.dtypes.iloc[1].kind == "i"

tm.assert_frame_equal(result, expected)

# ser wider than df
Expand Down Expand Up @@ -301,14 +303,10 @@ def test_append_missing_column_proper_upcast(self, sort):
assert appended["A"].dtype == "f8"
assert appended["B"].dtype == "O"

# TODO(ArrayManager) DataFrame.append reindexes a Series itself (giving
# float dtype) -> delay reindexing until concat_array_managers which properly
# takes care of all-null dtype inference
@td.skip_array_manager_not_yet_implemented
def test_append_empty_frame_to_series_with_dateutil_tz(self):
# GH 23682
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
ser = Series({"date": date, "a": 1.0, "b": 2.0})
ser = Series({"a": 1.0, "b": 2.0, "date": date})
df = DataFrame(columns=["c", "d"])
result_a = df.append(ser, ignore_index=True)
expected = DataFrame(
Expand All @@ -327,8 +325,6 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
result_b = result_a.append(ser, ignore_index=True)
tm.assert_frame_equal(result_b, expected)

# column order is different
expected = expected[["c", "d", "date", "a", "b"]]
result = df.append([ser, ser], ignore_index=True)
tm.assert_frame_equal(result, expected)

Expand Down