Skip to content

Commit 236a0ad

Browse files
authored
Fix compatibility issues with pandas 1.4.3 (#11152)
This PR fixes the two main issues that crop up in pandas 1.4.3 relative to 1.4.2, both around `pd.concat`: - Columns are now sorted such that integer values come before string values. That is a behavior change that we mimic. - When multiple objects with identical RangeIndexes are concatenated along axis 1 and sorting is requested, pandas now creates an integer index instead of a RangeIndex. This is not what we want since it increases memory pressure, so those tests have been modified to stop checking the index type and a [pandas issue has been raised](pandas-dev/pandas#47501). Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Nghia Truong (https://github.com/ttnghia) URL: #11152
1 parent e45741f commit 236a0ad

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

python/cudf/cudf/core/dataframe.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,18 @@ def _concat(
15901590
# include different types that are not comparable.
15911591
names = sorted(names)
15921592
except TypeError:
1593-
names = list(names)
1593+
# For pandas compatibility, we also try to handle the case
1594+
# where some column names are strings and others are ints. Just
1595+
# assume that everything that isn't a str is numerical, we
1596+
# can't sort anything else.
1597+
try:
1598+
str_names = sorted(n for n in names if isinstance(n, str))
1599+
non_str_names = sorted(
1600+
n for n in names if not isinstance(n, str)
1601+
)
1602+
names = non_str_names + str_names
1603+
except TypeError:
1604+
names = list(names)
15941605
else:
15951606
names = list(names)
15961607

python/cudf/cudf/tests/test_concat.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,10 @@ def test_concat_join_axis_1(objs, ignore_index, sort, join, axis):
803803
axis=axis,
804804
)
805805

806-
assert_eq(expected, actual, check_index_type=True)
806+
# TODO: Remove special handling of check_index_type below
807+
# after the following bug from pandas is fixed:
808+
# https://github.com/pandas-dev/pandas/issues/47501
809+
assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
807810

808811

809812
@pytest.mark.parametrize("ignore_index", [True, False])
@@ -870,7 +873,10 @@ def test_concat_join_one_df(ignore_index, sort, join, axis):
870873
[gdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
871874
)
872875

873-
assert_eq(expected, actual, check_index_type=True)
876+
# TODO: Remove special handling of check_index_type below
877+
# after the following bug from pandas is fixed:
878+
# https://github.com/pandas-dev/pandas/issues/47501
879+
assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
874880

875881

876882
@pytest.mark.parametrize(
@@ -919,7 +925,10 @@ def test_concat_join_no_overlapping_columns(
919925
axis=axis,
920926
)
921927

922-
assert_eq(expected, actual, check_index_type=True)
928+
# TODO: Remove special handling of check_index_type below
929+
# after the following bug from pandas is fixed:
930+
# https://github.com/pandas-dev/pandas/issues/47501
931+
assert_eq(expected, actual, check_index_type=not (axis == 1 and sort))
923932

924933

925934
@pytest.mark.parametrize("ignore_index", [False, True])
@@ -1107,13 +1116,14 @@ def test_concat_join_series(ignore_index, sort, join, axis):
11071116
axis=axis,
11081117
)
11091118

1110-
# TODO: Remove special handling below
1111-
# after following bug from pandas is fixed:
1119+
# TODO: Remove special handling of check_index_type below
1120+
# after the following bugs from pandas are fixed:
11121121
# https://github.com/pandas-dev/pandas/issues/46675
1122+
# https://github.com/pandas-dev/pandas/issues/47501
11131123
assert_eq(
11141124
expected,
11151125
actual,
1116-
check_index_type=False if axis == 1 and join == "outer" else True,
1126+
check_index_type=(axis == 0),
11171127
)
11181128

11191129

0 commit comments

Comments
 (0)