Skip to content

Commit 9a9bd3f

Browse files
authored
BUG: stack changes NA values in the index (#56582)
1 parent a3af152 commit 9a9bd3f

File tree

3 files changed

+41
-2
lines changed

3 files changed

+41
-2
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ Reshaping
689689
- Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`)
690690
- Bug in :meth:`DataFrame.pivot_table` where the row margin is incorrect when the columns have numeric names (:issue:`26568`)
691691
- Bug in :meth:`DataFrame.pivot` with numeric columns and extension dtype for data (:issue:`56528`)
692+
- Bug in :meth:`DataFrame.stack` and :meth:`Series.stack` with ``future_stack=True`` would not preserve NA values in the index (:issue:`56573`)
692693

693694
Sparse
694695
^^^^^^

pandas/core/reshape/reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -953,8 +953,8 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
953953
index_levels = frame.index.levels
954954
index_codes = list(np.tile(frame.index.codes, (1, ratio)))
955955
else:
956-
index_levels = [frame.index.unique()]
957-
codes = factorize(frame.index)[0]
956+
codes, uniques = factorize(frame.index, use_na_sentinel=False)
957+
index_levels = [uniques]
958958
index_codes = list(np.tile(codes, (1, ratio)))
959959
if isinstance(stack_cols, MultiIndex):
960960
column_levels = ordered_stack_cols.levels

pandas/tests/frame/test_stack_unstack.py

+38
Original file line numberDiff line numberDiff line change
@@ -2638,3 +2638,41 @@ def test_stack_tuple_columns(future_stack):
26382638
),
26392639
)
26402640
tm.assert_series_equal(result, expected)
2641+
2642+
2643+
@pytest.mark.parametrize(
2644+
"dtype, na_value",
2645+
[
2646+
("float64", np.nan),
2647+
("Float64", np.nan),
2648+
("Float64", pd.NA),
2649+
("Int64", pd.NA),
2650+
],
2651+
)
2652+
@pytest.mark.parametrize("test_multiindex", [True, False])
2653+
def test_stack_preserves_na(dtype, na_value, test_multiindex):
2654+
# GH#56573
2655+
if test_multiindex:
2656+
index = MultiIndex.from_arrays(2 * [Index([na_value], dtype=dtype)])
2657+
else:
2658+
index = Index([na_value], dtype=dtype)
2659+
df = DataFrame({"a": [1]}, index=index)
2660+
result = df.stack(future_stack=True)
2661+
2662+
if test_multiindex:
2663+
expected_index = MultiIndex.from_arrays(
2664+
[
2665+
Index([na_value], dtype=dtype),
2666+
Index([na_value], dtype=dtype),
2667+
Index(["a"]),
2668+
]
2669+
)
2670+
else:
2671+
expected_index = MultiIndex.from_arrays(
2672+
[
2673+
Index([na_value], dtype=dtype),
2674+
Index(["a"]),
2675+
]
2676+
)
2677+
expected = Series(1, index=expected_index)
2678+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)