diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 92eb1d54ae676..7115364fbb0e5 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -971,4 +971,4 @@ Bug Fixes - Bug in DataFrame terminal display: Setting max_column/max_rows to zero did not trigger auto-resizing of dfs to fit terminal width/height (:issue:`7180`). - Bug in OLS where running with "cluster" and "nw_lags" parameters did not work correctly, but also did not throw an error (:issue:`5884'). - +- Bug in ``DataFrame.dropna`` that interpreted non-existent columns in the subset argument as the 'last column' (:issue:`8303`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4654ceee9896b..9402d9a6d0685 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2552,7 +2552,11 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, agg_obj = self if subset is not None: ax = self._get_axis(agg_axis) - agg_obj = self.take(ax.get_indexer_for(subset),axis=agg_axis) + indices = ax.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check,subset))) + agg_obj = self.take(indices,axis=agg_axis) count = agg_obj.count(axis=agg_axis) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 99b1fa09a8d51..4cae0f1d266d7 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7200,6 +7200,8 @@ def test_dropna_corner(self): # bad input self.assertRaises(ValueError, self.frame.dropna, how='foo') self.assertRaises(TypeError, self.frame.dropna, how=None) + # non-existent column - 8303 + self.assertRaises(KeyError, self.frame.dropna, subset=['A','X']) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3],