Skip to content

Commit 53a0db1

Browse files
committed
Merge pull request pandas-dev#11159 from sinhrks/dropna_perf
PERF: Series.dropna with non-nan dtype blocks
2 parents 2431425 + 134b20b commit 53a0db1

File tree

4 files changed

+45
-5
lines changed

4 files changed

+45
-5
lines changed

asv_bench/benchmarks/series_methods.py

+20
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,23 @@ def setup(self):
7171
def time_series_nsmallest2(self):
7272
self.s2.nsmallest(3, take_last=True)
7373
self.s2.nsmallest(3, take_last=False)
74+
75+
76+
class series_dropna_int64(object):
77+
goal_time = 0.2
78+
79+
def setup(self):
80+
self.s = Series(np.random.randint(1, 10, 1000000))
81+
82+
def time_series_dropna_int64(self):
83+
self.s.dropna()
84+
85+
class series_dropna_datetime(object):
86+
goal_time = 0.2
87+
88+
def setup(self):
89+
self.s = Series(pd.date_range('2000-01-01', freq='S', periods=1000000))
90+
self.s[np.random.randint(1, 1000000, 100)] = pd.NaT
91+
92+
def time_series_dropna_datetime(self):
93+
self.s.dropna()

doc/source/whatsnew/v0.17.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Performance Improvements
5757
~~~~~~~~~~~~~~~~~~~~~~~~
5858

5959
- Checking monotonic-ness before sorting on an index (:issue:`11080`)
60+
- ``Series.dropna`` performance improvement when its dtype can't contain ``NaN`` (:issue:`11159`)
6061

6162

6263
- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`)

pandas/core/series.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -2501,11 +2501,19 @@ def dropna(self, axis=0, inplace=False, **kwargs):
25012501
'argument "{0}"'.format(list(kwargs.keys())[0]))
25022502

25032503
axis = self._get_axis_number(axis or 0)
2504-
result = remove_na(self)
2505-
if inplace:
2506-
self._update_inplace(result)
2504+
2505+
if self._can_hold_na:
2506+
result = remove_na(self)
2507+
if inplace:
2508+
self._update_inplace(result)
2509+
else:
2510+
return result
25072511
else:
2508-
return result
2512+
if inplace:
2513+
# do nothing
2514+
pass
2515+
else:
2516+
return self.copy()
25092517

25102518
valid = lambda self, inplace=False, **kwargs: self.dropna(inplace=inplace,
25112519
**kwargs)

pandas/tests/test_series.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -5169,7 +5169,6 @@ def test_dropna_empty(self):
51695169
# invalid axis
51705170
self.assertRaises(ValueError, s.dropna, axis=1)
51715171

5172-
51735172
def test_datetime64_tz_dropna(self):
51745173
# DatetimeBlock
51755174
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
@@ -5192,6 +5191,18 @@ def test_datetime64_tz_dropna(self):
51925191
self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]')
51935192
self.assert_series_equal(result, expected)
51945193

5194+
def test_dropna_no_nan(self):
5195+
for s in [Series([1, 2, 3], name='x'),
5196+
Series([False, True, False], name='x')]:
5197+
5198+
result = s.dropna()
5199+
self.assert_series_equal(result, s)
5200+
self.assertFalse(result is s)
5201+
5202+
s2 = s.copy()
5203+
s2.dropna(inplace=True)
5204+
self.assert_series_equal(s2, s)
5205+
51955206
def test_axis_alias(self):
51965207
s = Series([1, 2, np.nan])
51975208
assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))

0 commit comments

Comments
 (0)