Skip to content

Commit 134b20b

Browse files
committed
PERF: Series.dropna with non-nan dtypes
1 parent a89b96d commit 134b20b

File tree

4 files changed

+45
-5
lines changed

4 files changed

+45
-5
lines changed

asv_bench/benchmarks/series_methods.py

+20
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,23 @@ def setup(self):
7171
def time_series_nsmallest2(self):
7272
self.s2.nsmallest(3, take_last=True)
7373
self.s2.nsmallest(3, take_last=False)
74+
75+
76+
class series_dropna_int64(object):
77+
goal_time = 0.2
78+
79+
def setup(self):
80+
self.s = Series(np.random.randint(1, 10, 1000000))
81+
82+
def time_series_dropna_int64(self):
83+
self.s.dropna()
84+
85+
class series_dropna_datetime(object):
86+
goal_time = 0.2
87+
88+
def setup(self):
89+
self.s = Series(pd.date_range('2000-01-01', freq='S', periods=1000000))
90+
self.s[np.random.randint(1, 1000000, 100)] = pd.NaT
91+
92+
def time_series_dropna_datetime(self):
93+
self.s.dropna()

doc/source/whatsnew/v0.17.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Performance Improvements
5353
~~~~~~~~~~~~~~~~~~~~~~~~
5454

5555
- Checking monotonic-ness before sorting on an index (:issue:`11080`)
56+
- ``Series.dropna`` performance improvement when its dtype can't contain ``NaN`` (:issue:`11159`)
5657

5758
.. _whatsnew_0171.bug_fixes:
5859

pandas/core/series.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -2501,11 +2501,19 @@ def dropna(self, axis=0, inplace=False, **kwargs):
25012501
'argument "{0}"'.format(list(kwargs.keys())[0]))
25022502

25032503
axis = self._get_axis_number(axis or 0)
2504-
result = remove_na(self)
2505-
if inplace:
2506-
self._update_inplace(result)
2504+
2505+
if self._can_hold_na:
2506+
result = remove_na(self)
2507+
if inplace:
2508+
self._update_inplace(result)
2509+
else:
2510+
return result
25072511
else:
2508-
return result
2512+
if inplace:
2513+
# do nothing
2514+
pass
2515+
else:
2516+
return self.copy()
25092517

25102518
valid = lambda self, inplace=False, **kwargs: self.dropna(inplace=inplace,
25112519
**kwargs)

pandas/tests/test_series.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -5117,7 +5117,6 @@ def test_dropna_empty(self):
51175117
# invalid axis
51185118
self.assertRaises(ValueError, s.dropna, axis=1)
51195119

5120-
51215120
def test_datetime64_tz_dropna(self):
51225121
# DatetimeBlock
51235122
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
@@ -5140,6 +5139,18 @@ def test_datetime64_tz_dropna(self):
51405139
self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]')
51415140
self.assert_series_equal(result, expected)
51425141

5142+
def test_dropna_no_nan(self):
5143+
for s in [Series([1, 2, 3], name='x'),
5144+
Series([False, True, False], name='x')]:
5145+
5146+
result = s.dropna()
5147+
self.assert_series_equal(result, s)
5148+
self.assertFalse(result is s)
5149+
5150+
s2 = s.copy()
5151+
s2.dropna(inplace=True)
5152+
self.assert_series_equal(s2, s)
5153+
51435154
def test_axis_alias(self):
51445155
s = Series([1, 2, np.nan])
51455156
assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))

0 commit comments

Comments
 (0)