Skip to content

Commit 174196d

Browse files
committed
Merge pull request #3333 from jreback/series_perf
PERF: series construction perf enhancements, use a fast path based on dt...
2 parents 9764ea6 + c54848f commit 174196d

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

pandas/core/common.py

+5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ class AmbiguousIndexError(PandasError, KeyError):
4242
pass
4343

4444

45+
_POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ])
46+
4547
def isnull(obj):
4648
'''
4749
Detect missing values (NaN in numeric arrays, None/NaN in object arrays)
@@ -1038,6 +1040,9 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
10381040

10391041
return values
10401042

1043+
def _possibly_castable(arr):
1044+
return arr.dtype not in _POSSIBLY_CAST_DTYPES
1045+
10411046
def _possibly_convert_platform(values):
10421047
""" try to do platform conversion, allow ndarray or list here """
10431048

pandas/core/series.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -3196,7 +3196,6 @@ def remove_na(arr):
31963196
"""
31973197
return arr[notnull(arr)]
31983198

3199-
32003199
def _sanitize_array(data, index, dtype=None, copy=False,
32013200
raise_cast_failure=False):
32023201

@@ -3208,7 +3207,13 @@ def _sanitize_array(data, index, dtype=None, copy=False,
32083207
else:
32093208
data = data.copy()
32103209

3211-
def _try_cast(arr):
3210+
def _try_cast(arr, take_fast_path):
3211+
3212+
# perf shortcut as this is the most common case
3213+
if take_fast_path:
3214+
if com._possibly_castable(arr) and not copy and dtype is None:
3215+
return arr
3216+
32123217
try:
32133218
arr = com._possibly_cast_to_datetime(arr, dtype)
32143219
subarr = pa.array(arr, dtype=dtype, copy=copy)
@@ -3227,7 +3232,7 @@ def _try_cast(arr):
32273232
# possibility of nan -> garbage
32283233
if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype):
32293234
if not isnull(data).any():
3230-
subarr = _try_cast(data)
3235+
subarr = _try_cast(data, True)
32313236
elif copy:
32323237
subarr = data.copy()
32333238
else:
@@ -3239,17 +3244,17 @@ def _try_cast(arr):
32393244
elif raise_cast_failure:
32403245
raise TypeError('Cannot cast datetime64 to %s' % dtype)
32413246
else:
3242-
subarr = _try_cast(data)
3247+
subarr = _try_cast(data, True)
32433248
else:
3244-
subarr = _try_cast(data)
3249+
subarr = _try_cast(data, True)
32453250

32463251
if copy:
32473252
subarr = data.copy()
32483253

32493254
elif isinstance(data, list) and len(data) > 0:
32503255
if dtype is not None:
32513256
try:
3252-
subarr = _try_cast(data)
3257+
subarr = _try_cast(data, False)
32533258
except Exception:
32543259
if raise_cast_failure: # pragma: no cover
32553260
raise
@@ -3262,7 +3267,7 @@ def _try_cast(arr):
32623267
subarr = com._possibly_cast_to_datetime(subarr, dtype)
32633268

32643269
else:
3265-
subarr = _try_cast(data)
3270+
subarr = _try_cast(data, False)
32663271

32673272
# scalar like
32683273
if subarr.ndim == 0:

0 commit comments

Comments
 (0)