Skip to content

Commit 1672f26

Browse files
committed
ENH: add Index.dropna
1 parent 31f8e4d commit 1672f26

File tree

6 files changed

+134
-9
lines changed

6 files changed

+134
-9
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1349,6 +1349,8 @@ Modifying and Computations
13491349
Index.unique
13501350
Index.nunique
13511351
Index.value_counts
1352+
Index.fillna
1353+
Index.dropna
13521354

13531355
Conversion
13541356
~~~~~~~~~~

doc/source/whatsnew/v0.19.0.txt

+34-9
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,40 @@ Using the anchoring suffix, you can also specify the day of month to use instead
259259

260260
pd.date_range('2015-01-01', freq='SM-14', periods=4)
261261

262+
.. _whatsnew_0190.enhancements.index:
263+
264+
New Index methods
265+
^^^^^^^^^^^^^^^^^
266+
267+
Following methods and options are added to ``Index`` to be more consistent with ``Series`` and ``DataFrame``.
268+
269+
- ``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`)
270+
271+
.. ipython:: python
272+
273+
idx = pd.Index(['a', 'b', 'c'])
274+
idx.where([True, False, True])
275+
276+
277+
- ``Index`` now supports ``.dropna`` to exclude missing values (:issue:`6194`)
278+
279+
.. ipython:: python
280+
281+
idx = pd.Index([1, 2, np.nan, 4])
282+
idx.dropna()
283+
284+
For ``MultiIndex``, values are dropped if any level is missing by default. Specifying
285+
``how='all'`` only drops values where all levels are missing.
286+
287+
midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4],
288+
[1, 2, np.nan, np.nan]])
289+
midx
290+
midx.dropna()
291+
midx.dropna(how='all')
292+
293+
- ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`)
294+
- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, the see :ref:`docs here <text.extractall>` (:issue:`10008`, :issue:`13156`)
295+
262296
.. _whatsnew_0190.enhancements.other:
263297

264298
Other enhancements
@@ -273,7 +307,6 @@ Other enhancements
273307
pd.to_numeric(s, downcast='unsigned')
274308
pd.to_numeric(s, downcast='integer')
275309

276-
- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, the see :ref:`docs here <text.extractall>` (:issue:`10008`, :issue:`13156`)
277310
- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`)
278311

279312
.. ipython:: python
@@ -295,14 +328,6 @@ Other enhancements
295328

296329
- The ``pd.read_html()`` has gained support for the ``na_values``, ``converters``, ``keep_default_na`` options (:issue:`13461`)
297330

298-
- ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`)
299-
- ``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`)
300-
301-
.. ipython:: python
302-
303-
idx = pd.Index(['a', 'b', 'c'])
304-
idx.where([True, False, True])
305-
306331
- ``Categorical.astype()`` now accepts an optional boolean argument ``copy``, effective when dtype is categorical (:issue:`13209`)
307332
- ``DataFrame`` has gained the ``.asof()`` method to return the last non-NaN values according to the selected subset (:issue:`13358`)
308333
- Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`)

pandas/indexes/base.py

+23
Original file line numberDiff line numberDiff line change
@@ -3243,6 +3243,29 @@ def fillna(self, value=None, downcast=None):
32433243
return Index(result, name=self.name)
32443244
return self._shallow_copy()
32453245

3246+
_index_shared_docs['dropna'] = """
3247+
Return Index without NA/NaN values
3248+
3249+
Parameters
3250+
----------
3251+
how : {'any', 'all'}, default 'any'
3252+
If the Index is a MultiIndex, drop the value when any or all levels
3253+
are NaN.
3254+
3255+
Returns
3256+
-------
3257+
valid : Index
3258+
"""
3259+
3260+
@Appender(_index_shared_docs['dropna'])
3261+
def dropna(self, how='any'):
3262+
if how not in ('any', 'all'):
3263+
raise ValueError("invalid how option: {0}".format(how))
3264+
3265+
if self.hasnans:
3266+
return self._shallow_copy(self.values[~self._isnan])
3267+
return self._shallow_copy()
3268+
32463269
def _evaluate_with_timedelta_like(self, other, op, opstr):
32473270
raise TypeError("can only perform ops with timedelta like values")
32483271

pandas/indexes/multi.py

+13
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,19 @@ def fillna(self, value=None, downcast=None):
597597
# isnull is not implemented for MultiIndex
598598
raise NotImplementedError('isnull is not defined for MultiIndex')
599599

600+
@Appender(_index_shared_docs['dropna'])
601+
def dropna(self, how='any'):
602+
nans = [label == -1 for label in self.labels]
603+
if how == 'any':
604+
indexer = np.any(nans, axis=0)
605+
elif how == 'all':
606+
indexer = np.all(nans, axis=0)
607+
else:
608+
raise ValueError("invalid how option: {0}".format(how))
609+
610+
new_labels = [label[~indexer] for label in self.labels]
611+
return self.copy(labels=new_labels, deep=True)
612+
600613
def get_value(self, series, key):
601614
# somewhat broken encapsulation
602615
from pandas.core.indexing import maybe_droplevels

pandas/tests/indexes/test_base.py

+41
Original file line numberDiff line numberDiff line change
@@ -1837,6 +1837,47 @@ def test_logical_compat(self):
18371837
self.assertEqual(idx.all(), idx.values.all())
18381838
self.assertEqual(idx.any(), idx.values.any())
18391839

1840+
def test_dropna(self):
1841+
# GH 6194
1842+
for dtype in [None, object, 'category']:
1843+
idx = pd.Index([1, 2, 3], dtype=dtype)
1844+
tm.assert_index_equal(idx.dropna(), idx)
1845+
1846+
idx = pd.Index([1., 2., 3.], dtype=dtype)
1847+
tm.assert_index_equal(idx.dropna(), idx)
1848+
nanidx = pd.Index([1., 2., np.nan, 3.], dtype=dtype)
1849+
tm.assert_index_equal(nanidx.dropna(), idx)
1850+
1851+
idx = pd.Index(['A', 'B', 'C'], dtype=dtype)
1852+
tm.assert_index_equal(idx.dropna(), idx)
1853+
nanidx = pd.Index(['A', np.nan, 'B', 'C'], dtype=dtype)
1854+
tm.assert_index_equal(nanidx.dropna(), idx)
1855+
1856+
tm.assert_index_equal(nanidx.dropna(how='any'), idx)
1857+
tm.assert_index_equal(nanidx.dropna(how='all'), idx)
1858+
1859+
idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'])
1860+
tm.assert_index_equal(idx.dropna(), idx)
1861+
nanidx = pd.DatetimeIndex(['2011-01-01', '2011-01-02',
1862+
'2011-01-03', pd.NaT])
1863+
tm.assert_index_equal(nanidx.dropna(), idx)
1864+
1865+
idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'])
1866+
tm.assert_index_equal(idx.dropna(), idx)
1867+
nanidx = pd.TimedeltaIndex([pd.NaT, '1 days', '2 days',
1868+
'3 days', pd.NaT])
1869+
tm.assert_index_equal(nanidx.dropna(), idx)
1870+
1871+
idx = pd.PeriodIndex(['2012-02', '2012-04', '2012-05'], freq='M')
1872+
tm.assert_index_equal(idx.dropna(), idx)
1873+
nanidx = pd.PeriodIndex(['2012-02', '2012-04', 'NaT', '2012-05'],
1874+
freq='M')
1875+
tm.assert_index_equal(nanidx.dropna(), idx)
1876+
1877+
msg = "invalid how option: xxx"
1878+
with tm.assertRaisesRegexp(ValueError, msg):
1879+
pd.Index([1, 2, 3]).dropna(how='xxx')
1880+
18401881

18411882
def test_get_combined_index():
18421883
from pandas.core.index import _get_combined_index

pandas/tests/indexes/test_multi.py

+21
Original file line numberDiff line numberDiff line change
@@ -2258,3 +2258,24 @@ def test_rangeindex_fallback_coercion_bug(self):
22582258
result = df.index.get_level_values('buzz')
22592259
expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
22602260
tm.assert_index_equal(result, expected)
2261+
2262+
def test_dropna(self):
2263+
# GH 6194
2264+
idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
2265+
[1, 2, np.nan, np.nan, 5],
2266+
['a', 'b', 'c', np.nan, 'e']])
2267+
2268+
exp = pd.MultiIndex.from_arrays([[1, 5],
2269+
[1, 5],
2270+
['a', 'e']])
2271+
tm.assert_index_equal(idx.dropna(), exp)
2272+
tm.assert_index_equal(idx.dropna(how='any'), exp)
2273+
2274+
exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
2275+
[1, 2, np.nan, 5],
2276+
['a', 'b', 'c', 'e']])
2277+
tm.assert_index_equal(idx.dropna(how='all'), exp)
2278+
2279+
msg = "invalid how option: xxx"
2280+
with tm.assertRaisesRegexp(ValueError, msg):
2281+
idx.dropna(how='xxx')

0 commit comments

Comments
 (0)