diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 63119e90a1337..ead3c79430bf9 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -246,6 +246,7 @@ Bug Fixes - Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated SQLAlchemy type (:issue:`9083`). - Bug in ``.loc`` partial setting with a ``np.datetime64`` (:issue:`9516`) +- Incorrect dtypes inferred on datetimelike looking series & on xs slices (:issue:`9477`) - Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas. diff --git a/pandas/core/common.py b/pandas/core/common.py index 78c0c6c5dbd0f..4de63cf59bd1c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2030,6 +2030,7 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False): Parameters ---------- + value : np.array convert_dates : boolean, default False if True try really hard to convert dates (such as datetime.date), other leave inferred dtype 'date' alone @@ -2068,9 +2069,9 @@ def _try_timedelta(v): inferred_type = lib.infer_dtype(sample) if inferred_type in ['datetime', 'datetime64'] or (convert_dates and inferred_type in ['date']): - value = _try_datetime(v) + value = _try_datetime(v).reshape(shape) elif inferred_type in ['timedelta', 'timedelta64']: - value = _try_timedelta(v) + value = _try_timedelta(v).reshape(shape) # its possible to have nulls intermixed within the datetime or timedelta # these will in general have an inferred_type of 'mixed', so have to try @@ -2081,9 +2082,9 @@ def _try_timedelta(v): elif inferred_type in ['mixed']: if lib.is_possible_datetimelike_array(_ensure_object(v)): - value = _try_timedelta(v) + value = _try_timedelta(v).reshape(shape) if lib.infer_dtype(value) in ['mixed']: - value = _try_datetime(v) + value = _try_datetime(v).reshape(shape) return value diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 733de1fc202e5..d64353db8cda6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -27,7 +27,7 @@ _default_index, _maybe_upcast, is_sequence, _infer_dtype_from_scalar, _values_from_object, is_list_like, _get_dtype, _maybe_box_datetimelike, - is_categorical_dtype) + is_categorical_dtype, is_object_dtype, _possibly_infer_to_datetimelike) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, @@ -396,7 +396,15 @@ def _get_axes(N, K, index=index, columns=columns): raise_with_traceback(e) index, columns = _get_axes(*values.shape) - return create_block_manager_from_blocks([values.T], [columns, index]) + values = values.T + + # if we don't have a dtype specified, then try to convert objects + # on the entire block; this is to convert if we have datetimelike's + # embedded in an object type + if dtype is None and is_object_dtype(values): + values = _possibly_infer_to_datetimelike(values) + + return create_block_manager_from_blocks([values], [columns, index]) @property def axes(self): @@ -1537,7 +1545,7 @@ def _sizeof_fmt(num, size_qualifier): # cases (e.g., it misses categorical data even with object # categories) size_qualifier = ('+' if 'object' in counts - or self.index.dtype.kind == 'O' else '') + or is_object_dtype(self.index) else '') mem_usage = self.memory_usage(index=True).sum() lines.append("memory usage: %s\n" % _sizeof_fmt(mem_usage, size_qualifier)) @@ -2257,6 +2265,8 @@ def reindexer(value): elif (isinstance(value, Index) or is_sequence(value)): from pandas.core.series import _sanitize_index + + # turn me into an ndarray value = _sanitize_index(value, self.index, copy=False) if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: @@ -2267,6 +2277,11 @@ def reindexer(value): value = value.copy().T else: value = value.copy() + + # possibly infer to datetimelike + if is_object_dtype(value.dtype): + value = _possibly_infer_to_datetimelike(value.ravel()).reshape(value.shape) + else: # upcast the scalar dtype, value = _infer_dtype_from_scalar(value) @@ -2341,7 +2356,7 @@ def lookup(self, row_labels, col_labels): for i, (r, c) in enumerate(zip(row_labels, col_labels)): result[i] = self.get_value(r, c) - if result.dtype == 'O': + if is_object_dtype(result): result = lib.maybe_convert_objects(result) return result @@ -4232,7 +4247,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, values = self.values result = f(values) - if result.dtype == np.object_: + if is_object_dtype(result.dtype): try: if filter_type is None or filter_type == 'numeric': result = result.astype(np.float64) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9a4a91cf01ff6..18500fd05b5f8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1467,8 +1467,11 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): if not is_list_like(new_values) or self.ndim == 1: return _maybe_box_datetimelike(new_values) - result = Series(new_values, index=self.columns, - name=self.index[loc]) + result = Series(new_values, + index=self.columns, + name=self.index[loc], + copy=copy, + dtype=new_values.dtype) else: result = self.iloc[loc] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 359463b10d3d0..4453c7eaf1fda 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -13,8 +13,8 @@ ABCSparseSeries, _infer_dtype_from_scalar, is_null_datelike_scalar, _maybe_promote, is_timedelta64_dtype, is_datetime64_dtype, - _possibly_infer_to_datetimelike, array_equivalent, - _maybe_convert_string_to_object, is_categorical) + array_equivalent, _maybe_convert_string_to_object, + is_categorical) from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical @@ -2074,25 +2074,8 @@ def make_block(values, placement, klass=None, ndim=None, klass = ComplexBlock elif is_categorical(values): klass = CategoricalBlock - else: - - # we want to infer here if its a datetimelike if its object type - # this is pretty strict in that it requires a datetime/timedelta - # value IN addition to possible nulls/strings - # an array of ONLY strings will not be inferred - if np.prod(values.shape): - result = _possibly_infer_to_datetimelike(values) - vtype = result.dtype.type - if issubclass(vtype, np.datetime64): - klass = DatetimeBlock - values = result - elif (issubclass(vtype, np.timedelta64)): - klass = TimeDeltaBlock - values = result - - if klass is None: - klass = ObjectBlock + klass = ObjectBlock return klass(values, ndim=ndim, fastpath=fastpath, placement=placement) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index a8ad8c058a2b4..b3e2e16af54c2 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -490,7 +490,9 @@ def decode(obj): index = obj['index'] return globals()[obj['klass']](unconvert(obj['data'], dtype, obj['compress']), - index=index, name=obj['name']) + index=index, + dtype=dtype, + name=obj['name']) elif typ == 'block_manager': axes = obj['axes'] diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 04dad68703577..79467db1ee264 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -569,6 +569,7 @@ def test_scalar_conversion(self): self.assertEqual(int(Series([1.])), 1) self.assertEqual(long(Series([1.])), 1) + def test_astype(self): s = Series(np.random.randn(5),name='foo') @@ -778,6 +779,28 @@ def test_constructor_dtype_nocast(self): s2[1] = 5 self.assertEqual(s[1], 5) + def test_constructor_datelike_coercion(self): + + # GH 9477 + # incorrectly infering on dateimelike looking when object dtype is specified + s = Series([Timestamp('20130101'),'NOV'],dtype=object) + self.assertEqual(s.iloc[0],Timestamp('20130101')) + self.assertEqual(s.iloc[1],'NOV') + self.assertTrue(s.dtype == object) + + # the dtype was being reset on the slicing and re-inferred to datetime even + # thought the blocks are mixed + belly = '216 3T19'.split() + wing1 = '2T15 4H19'.split() + wing2 = '416 4T20'.split() + mat = pd.to_datetime('2016-01-22 2019-09-07'.split()) + df = pd.DataFrame({'wing1':wing1, 'wing2':wing2, 'mat':mat}, index=belly) + + result = df.loc['3T19'] + self.assertTrue(result.dtype == object) + result = df.loc['216'] + self.assertTrue(result.dtype == object) + def test_constructor_dtype_datetime64(self): import pandas.tslib as tslib