Skip to content

Commit f577be2

Browse files
Backport PR pandas-dev#57089 on branch 2.2.x (BUG: wide_to_long with string columns) (pandas-dev#57120)
Backport PR pandas-dev#57089: BUG: wide_to_long with string columns Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 1550858 commit f577be2

File tree

4 files changed

+34
-4
lines changed

4 files changed

+34
-4
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
1717
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
1818
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
19+
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
1920
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
2021
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
2122
- Fixed regression in :meth:`Series.pct_change` raising a ``ValueError`` for an empty :class:`Series` (:issue:`57056`)

pandas/core/reshape/melt.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,7 @@ def wide_to_long(
458458

459459
def get_var_names(df, stub: str, sep: str, suffix: str):
460460
regex = rf"^{re.escape(stub)}{re.escape(sep)}{suffix}$"
461-
pattern = re.compile(regex)
462-
return df.columns[df.columns.str.match(pattern)]
461+
return df.columns[df.columns.str.match(regex)]
463462

464463
def melt_stub(df, stub: str, i, j, value_vars, sep: str):
465464
newdf = melt(

pandas/core/strings/accessor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1336,14 +1336,14 @@ def contains(
13361336
return self._wrap_result(result, fill_value=na, returns_string=False)
13371337

13381338
@forbid_nonstring_types(["bytes"])
1339-
def match(self, pat, case: bool = True, flags: int = 0, na=None):
1339+
def match(self, pat: str, case: bool = True, flags: int = 0, na=None):
13401340
"""
13411341
Determine if each string starts with a match of a regular expression.
13421342
13431343
Parameters
13441344
----------
13451345
pat : str
1346-
Character sequence or regular expression.
1346+
Character sequence.
13471347
case : bool, default True
13481348
If True, case sensitive.
13491349
flags : int, default 0 (no flags)

pandas/tests/reshape/test_melt.py

+30
Original file line numberDiff line numberDiff line change
@@ -1220,3 +1220,33 @@ def test_missing_stubname(self, dtype):
12201220
new_level = expected.index.levels[0].astype(dtype)
12211221
expected.index = expected.index.set_levels(new_level, level=0)
12221222
tm.assert_frame_equal(result, expected)
1223+
1224+
1225+
def test_wide_to_long_pyarrow_string_columns():
1226+
# GH 57066
1227+
pytest.importorskip("pyarrow")
1228+
df = DataFrame(
1229+
{
1230+
"ID": {0: 1},
1231+
"R_test1": {0: 1},
1232+
"R_test2": {0: 1},
1233+
"R_test3": {0: 2},
1234+
"D": {0: 1},
1235+
}
1236+
)
1237+
df.columns = df.columns.astype("string[pyarrow_numpy]")
1238+
result = wide_to_long(
1239+
df, stubnames="R", i="ID", j="UNPIVOTED", sep="_", suffix=".*"
1240+
)
1241+
expected = DataFrame(
1242+
[[1, 1], [1, 1], [1, 2]],
1243+
columns=Index(["D", "R"], dtype=object),
1244+
index=pd.MultiIndex.from_arrays(
1245+
[
1246+
[1, 1, 1],
1247+
Index(["test1", "test2", "test3"], dtype="string[pyarrow_numpy]"),
1248+
],
1249+
names=["ID", "UNPIVOTED"],
1250+
),
1251+
)
1252+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)