From c54848f9e2e3d282f1f62dce1c14feb2f8a41152 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 12 Apr 2013 11:53:26 -0400 Subject: [PATCH] PERF: series construction perf enhancements, use a fast path based on dtype --- pandas/core/common.py | 5 +++++ pandas/core/series.py | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 4acaa3f421e3a..c41784d015e7c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -42,6 +42,8 @@ class AmbiguousIndexError(PandasError, KeyError): pass +_POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ]) + def isnull(obj): ''' Detect missing values (NaN in numeric arrays, None/NaN in object arrays) @@ -1038,6 +1040,9 @@ def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True): return values +def _possibly_castable(arr): + return arr.dtype not in _POSSIBLY_CAST_DTYPES + def _possibly_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 8c60bfdd582d6..919dd57ee70ab 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3196,7 +3196,6 @@ def remove_na(arr): """ return arr[notnull(arr)] - def _sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): @@ -3208,7 +3207,13 @@ def _sanitize_array(data, index, dtype=None, copy=False, else: data = data.copy() - def _try_cast(arr): + def _try_cast(arr, take_fast_path): + + # perf shortcut as this is the most common case + if take_fast_path: + if com._possibly_castable(arr) and not copy and dtype is None: + return arr + try: arr = com._possibly_cast_to_datetime(arr, dtype) subarr = pa.array(arr, dtype=dtype, copy=copy) @@ -3227,7 +3232,7 @@ def _try_cast(arr): # possibility of nan -> garbage if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype): if not isnull(data).any(): - subarr = _try_cast(data) + subarr = _try_cast(data, True) elif copy: subarr = data.copy() else: @@ -3239,9 +3244,9 @@ def _try_cast(arr): elif raise_cast_failure: raise TypeError('Cannot cast datetime64 to %s' % dtype) else: - subarr = _try_cast(data) + subarr = _try_cast(data, True) else: - subarr = _try_cast(data) + subarr = _try_cast(data, True) if copy: subarr = data.copy() @@ -3249,7 +3254,7 @@ def _try_cast(arr): elif isinstance(data, list) and len(data) > 0: if dtype is not None: try: - subarr = _try_cast(data) + subarr = _try_cast(data, False) except Exception: if raise_cast_failure: # pragma: no cover raise @@ -3262,7 +3267,7 @@ def _try_cast(arr): subarr = com._possibly_cast_to_datetime(subarr, dtype) else: - subarr = _try_cast(data) + subarr = _try_cast(data, False) # scalar like if subarr.ndim == 0: