Skip to content

Commit 3cfd868

Browse files
authored
BUG: crosstab(dropna=False) did not keep np.nan in result (#53205)
1 parent 1771b0f commit 3cfd868

File tree

4 files changed

+39
-13
lines changed

4 files changed

+39
-13
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ Groupby/resample/rolling
421421

422422
Reshaping
423423
^^^^^^^^^
424+
- Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`)
424425
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
425426
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
426427
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)

pandas/core/reshape/pivot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def __internal_pivot_table(
164164
pass
165165
values = list(values)
166166

167-
grouped = data.groupby(keys, observed=observed, sort=sort)
167+
grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
168168
agged = grouped.agg(aggfunc)
169169

170170
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):

pandas/tests/reshape/test_crosstab.py

+28-10
Original file line numberDiff line numberDiff line change
@@ -286,24 +286,29 @@ def test_margin_dropna4(self):
286286
# GH 12642
287287
# _add_margins raises KeyError: Level None not found
288288
# when margins=True and dropna=False
289+
# GH: 10772: Keep np.nan in result with dropna=False
289290
df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
290291
actual = crosstab(df.a, df.b, margins=True, dropna=False)
291-
expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
292-
expected.index = Index([1.0, 2.0, "All"], name="a")
292+
expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])
293+
expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
293294
expected.columns = Index([3, 4, "All"], name="b")
294295
tm.assert_frame_equal(actual, expected)
295296

296297
def test_margin_dropna5(self):
298+
# GH: 10772: Keep np.nan in result with dropna=False
297299
df = DataFrame(
298300
{"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
299301
)
300302
actual = crosstab(df.a, df.b, margins=True, dropna=False)
301-
expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
302-
expected.index = Index([1.0, 2.0, "All"], name="a")
303-
expected.columns = Index([3.0, 4.0, "All"], name="b")
303+
expected = DataFrame(
304+
[[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]
305+
)
306+
expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
307+
expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")
304308
tm.assert_frame_equal(actual, expected)
305309

306310
def test_margin_dropna6(self):
311+
# GH: 10772: Keep np.nan in result with dropna=False
307312
a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
308313
b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
309314
c = np.array(
@@ -315,13 +320,14 @@ def test_margin_dropna6(self):
315320
)
316321
m = MultiIndex.from_arrays(
317322
[
318-
["one", "one", "two", "two", "All"],
319-
["dull", "shiny", "dull", "shiny", ""],
323+
["one", "one", "two", "two", np.nan, np.nan, "All"],
324+
["dull", "shiny", "dull", "shiny", "dull", "shiny", ""],
320325
],
321326
names=["b", "c"],
322327
)
323328
expected = DataFrame(
324-
[[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
329+
[[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],
330+
columns=m,
325331
)
326332
expected.index = Index(["bar", "foo", "All"], name="a")
327333
tm.assert_frame_equal(actual, expected)
@@ -330,11 +336,23 @@ def test_margin_dropna6(self):
330336
[a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
331337
)
332338
m = MultiIndex.from_arrays(
333-
[["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
339+
[
340+
["bar", "bar", "bar", "foo", "foo", "foo", "All"],
341+
["one", "two", np.nan, "one", "two", np.nan, ""],
342+
],
334343
names=["a", "b"],
335344
)
336345
expected = DataFrame(
337-
[[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
346+
[
347+
[1, 0, 1.0],
348+
[1, 0, 1.0],
349+
[0, 0, np.nan],
350+
[2, 0, 2.0],
351+
[1, 1, 2.0],
352+
[0, 1, np.nan],
353+
[5, 2, 7.0],
354+
],
355+
index=m,
338356
)
339357
expected.columns = Index(["dull", "shiny", "All"], name="c")
340358
tm.assert_frame_equal(actual, expected)

pandas/tests/reshape/test_pivot.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,18 @@ def test_pivot_with_non_observable_dropna(self, dropna):
248248
)
249249

250250
result = df.pivot_table(index="A", values="B", dropna=dropna)
251+
if dropna:
252+
values = [2.0, 3.0]
253+
codes = [0, 1]
254+
else:
255+
# GH: 10772
256+
values = [2.0, 3.0, 0.0]
257+
codes = [0, 1, -1]
251258
expected = DataFrame(
252-
{"B": [2.0, 3.0]},
259+
{"B": values},
253260
index=Index(
254261
Categorical.from_codes(
255-
[0, 1], categories=["low", "high"], ordered=True
262+
codes, categories=["low", "high"], ordered=dropna
256263
),
257264
name="A",
258265
),

0 commit comments

Comments
 (0)