Skip to content

Commit 58de58f

Browse files
authored
ENH: check for string and convert to list in DataFrame.dropna subset argument (#41022)
1 parent 402c536 commit 58de58f

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ Other enhancements
177177
- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
178178
- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
179179
- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
180+
- :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
180181
-
181182

182183
.. ---------------------------------------------------------------------------

pandas/core/frame.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -5931,7 +5931,7 @@ def dropna(
59315931
axis: Axis = 0,
59325932
how: str = "any",
59335933
thresh=None,
5934-
subset=None,
5934+
subset: IndexLabel = None,
59355935
inplace: bool = False,
59365936
):
59375937
"""
@@ -5963,7 +5963,7 @@ def dropna(
59635963
59645964
thresh : int, optional
59655965
Require that many non-NA values.
5966-
subset : array-like, optional
5966+
subset : column label or sequence of labels, optional
59675967
Labels along other axis to consider, e.g. if you are dropping rows
59685968
these would be a list of columns to include.
59695969
inplace : bool, default False
@@ -6047,11 +6047,14 @@ def dropna(
60476047

60486048
agg_obj = self
60496049
if subset is not None:
6050+
# subset needs to be list
6051+
if not is_list_like(subset):
6052+
subset = [subset]
60506053
ax = self._get_axis(agg_axis)
60516054
indices = ax.get_indexer_for(subset)
60526055
check = indices == -1
60536056
if check.any():
6054-
raise KeyError(list(np.compress(check, subset)))
6057+
raise KeyError(np.array(subset)[check].tolist())
60556058
agg_obj = self.take(indices, axis=agg_axis)
60566059

60576060
count = agg_obj.count(axis=agg_axis)

pandas/tests/frame/methods/test_dropna.py

+24
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,27 @@ def test_dropna_pos_args_deprecation(self):
243243
result = df.dropna(1)
244244
expected = DataFrame({"a": [1, 2, 3]})
245245
tm.assert_frame_equal(result, expected)
246+
247+
def test_set_single_column_subset(self):
248+
# GH 41021
249+
df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.NaN, 5]})
250+
expected = DataFrame(
251+
{"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2]
252+
)
253+
result = df.dropna(subset="C")
254+
tm.assert_frame_equal(result, expected)
255+
256+
def test_single_column_not_present_in_axis(self):
257+
# GH 41021
258+
df = DataFrame({"A": [1, 2, 3]})
259+
260+
# Column not present
261+
with pytest.raises(KeyError, match="['D']"):
262+
df.dropna(subset="D", axis=0)
263+
264+
def test_subset_is_nparray(self):
265+
# GH 41021
266+
df = DataFrame({"A": [1, 2, np.NaN], "B": list("abc"), "C": [4, np.NaN, 5]})
267+
expected = DataFrame({"A": [1.0], "B": ["a"], "C": [4.0]})
268+
result = df.dropna(subset=np.array(["A", "C"]))
269+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)