Skip to content

BUG: Incorrect dtypes inferred on datetimelike looking series & on xs slices (GH9477) #9501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 24, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ Bug Fixes
- Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated
SQLAlchemy type (:issue:`9083`).
- Bug in ``.loc`` partial setting with a ``np.datetime64`` (:issue:`9516`)
- Incorrect dtypes inferred on datetimelike looking series & on xs slices (:issue:`9477`)

- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas.

Expand Down
9 changes: 5 additions & 4 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,6 +2030,7 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False):

Parameters
----------
value : np.array
convert_dates : boolean, default False
if True try really hard to convert dates (such as datetime.date), other
leave inferred dtype 'date' alone
Expand Down Expand Up @@ -2068,9 +2069,9 @@ def _try_timedelta(v):
inferred_type = lib.infer_dtype(sample)

if inferred_type in ['datetime', 'datetime64'] or (convert_dates and inferred_type in ['date']):
value = _try_datetime(v)
value = _try_datetime(v).reshape(shape)
elif inferred_type in ['timedelta', 'timedelta64']:
value = _try_timedelta(v)
value = _try_timedelta(v).reshape(shape)

# its possible to have nulls intermixed within the datetime or timedelta
# these will in general have an inferred_type of 'mixed', so have to try
Expand All @@ -2081,9 +2082,9 @@ def _try_timedelta(v):
elif inferred_type in ['mixed']:

if lib.is_possible_datetimelike_array(_ensure_object(v)):
value = _try_timedelta(v)
value = _try_timedelta(v).reshape(shape)
if lib.infer_dtype(value) in ['mixed']:
value = _try_datetime(v)
value = _try_datetime(v).reshape(shape)

return value

Expand Down
25 changes: 20 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
_default_index, _maybe_upcast, is_sequence,
_infer_dtype_from_scalar, _values_from_object,
is_list_like, _get_dtype, _maybe_box_datetimelike,
is_categorical_dtype)
is_categorical_dtype, is_object_dtype, _possibly_infer_to_datetimelike)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (maybe_droplevels,
Expand Down Expand Up @@ -396,7 +396,15 @@ def _get_axes(N, K, index=index, columns=columns):
raise_with_traceback(e)

index, columns = _get_axes(*values.shape)
return create_block_manager_from_blocks([values.T], [columns, index])
values = values.T

# if we don't have a dtype specified, then try to convert objects
# on the entire block; this is to convert if we have datetimelike's
# embedded in an object type
if dtype is None and is_object_dtype(values):
values = _possibly_infer_to_datetimelike(values)

return create_block_manager_from_blocks([values], [columns, index])

@property
def axes(self):
Expand Down Expand Up @@ -1537,7 +1545,7 @@ def _sizeof_fmt(num, size_qualifier):
# cases (e.g., it misses categorical data even with object
# categories)
size_qualifier = ('+' if 'object' in counts
or self.index.dtype.kind == 'O' else '')
or is_object_dtype(self.index) else '')
mem_usage = self.memory_usage(index=True).sum()
lines.append("memory usage: %s\n" %
_sizeof_fmt(mem_usage, size_qualifier))
Expand Down Expand Up @@ -2257,6 +2265,8 @@ def reindexer(value):

elif (isinstance(value, Index) or is_sequence(value)):
from pandas.core.series import _sanitize_index

# turn me into an ndarray
value = _sanitize_index(value, self.index, copy=False)
if not isinstance(value, (np.ndarray, Index)):
if isinstance(value, list) and len(value) > 0:
Expand All @@ -2267,6 +2277,11 @@ def reindexer(value):
value = value.copy().T
else:
value = value.copy()

# possibly infer to datetimelike
if is_object_dtype(value.dtype):
value = _possibly_infer_to_datetimelike(value.ravel()).reshape(value.shape)

else:
# upcast the scalar
dtype, value = _infer_dtype_from_scalar(value)
Expand Down Expand Up @@ -2341,7 +2356,7 @@ def lookup(self, row_labels, col_labels):
for i, (r, c) in enumerate(zip(row_labels, col_labels)):
result[i] = self.get_value(r, c)

if result.dtype == 'O':
if is_object_dtype(result):
result = lib.maybe_convert_objects(result)

return result
Expand Down Expand Up @@ -4232,7 +4247,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
values = self.values
result = f(values)

if result.dtype == np.object_:
if is_object_dtype(result.dtype):
try:
if filter_type is None or filter_type == 'numeric':
result = result.astype(np.float64)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1467,8 +1467,11 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
if not is_list_like(new_values) or self.ndim == 1:
return _maybe_box_datetimelike(new_values)

result = Series(new_values, index=self.columns,
name=self.index[loc])
result = Series(new_values,
index=self.columns,
name=self.index[loc],
copy=copy,
dtype=new_values.dtype)

else:
result = self.iloc[loc]
Expand Down
23 changes: 3 additions & 20 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
ABCSparseSeries, _infer_dtype_from_scalar,
is_null_datelike_scalar, _maybe_promote,
is_timedelta64_dtype, is_datetime64_dtype,
_possibly_infer_to_datetimelike, array_equivalent,
_maybe_convert_string_to_object, is_categorical)
array_equivalent, _maybe_convert_string_to_object,
is_categorical)
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import maybe_convert_indices, length_of_indexer
from pandas.core.categorical import Categorical, maybe_to_categorical
Expand Down Expand Up @@ -2074,25 +2074,8 @@ def make_block(values, placement, klass=None, ndim=None,
klass = ComplexBlock
elif is_categorical(values):
klass = CategoricalBlock

else:

# we want to infer here if its a datetimelike if its object type
# this is pretty strict in that it requires a datetime/timedelta
# value IN addition to possible nulls/strings
# an array of ONLY strings will not be inferred
if np.prod(values.shape):
result = _possibly_infer_to_datetimelike(values)
vtype = result.dtype.type
if issubclass(vtype, np.datetime64):
klass = DatetimeBlock
values = result
elif (issubclass(vtype, np.timedelta64)):
klass = TimeDeltaBlock
values = result

if klass is None:
klass = ObjectBlock
klass = ObjectBlock

return klass(values, ndim=ndim, fastpath=fastpath,
placement=placement)
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,9 @@ def decode(obj):
index = obj['index']
return globals()[obj['klass']](unconvert(obj['data'], dtype,
obj['compress']),
index=index, name=obj['name'])
index=index,
dtype=dtype,
name=obj['name'])
elif typ == 'block_manager':
axes = obj['axes']

Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ def test_scalar_conversion(self):
self.assertEqual(int(Series([1.])), 1)
self.assertEqual(long(Series([1.])), 1)


def test_astype(self):
s = Series(np.random.randn(5),name='foo')

Expand Down Expand Up @@ -778,6 +779,28 @@ def test_constructor_dtype_nocast(self):
s2[1] = 5
self.assertEqual(s[1], 5)

def test_constructor_datelike_coercion(self):

# GH 9477
# incorrectly infering on dateimelike looking when object dtype is specified
s = Series([Timestamp('20130101'),'NOV'],dtype=object)
self.assertEqual(s.iloc[0],Timestamp('20130101'))
self.assertEqual(s.iloc[1],'NOV')
self.assertTrue(s.dtype == object)

# the dtype was being reset on the slicing and re-inferred to datetime even
# thought the blocks are mixed
belly = '216 3T19'.split()
wing1 = '2T15 4H19'.split()
wing2 = '416 4T20'.split()
mat = pd.to_datetime('2016-01-22 2019-09-07'.split())
df = pd.DataFrame({'wing1':wing1, 'wing2':wing2, 'mat':mat}, index=belly)

result = df.loc['3T19']
self.assertTrue(result.dtype == object)
result = df.loc['216']
self.assertTrue(result.dtype == object)

def test_constructor_dtype_datetime64(self):
import pandas.tslib as tslib

Expand Down