Skip to content

Commit 1d94dd2

Browse files
committed
Merge pull request #9501 from jreback/series
BUG: Incorrect dtypes inferred on datetimelike looking series & on xs slices (GH9477)
2 parents 9796a9f + cdf611a commit 1d94dd2

File tree

7 files changed

+60
-32
lines changed

7 files changed

+60
-32
lines changed

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ Bug Fixes
246246
- Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated
247247
SQLAlchemy type (:issue:`9083`).
248248
- Bug in ``.loc`` partial setting with a ``np.datetime64`` (:issue:`9516`)
249+
- Incorrect dtypes inferred on datetimelike looking series & on xs slices (:issue:`9477`)
249250

250251
- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas.
251252

pandas/core/common.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2030,6 +2030,7 @@ def _possibly_infer_to_datetimelike(value, convert_dates=False):
20302030
20312031
Parameters
20322032
----------
2033+
value : np.array
20332034
convert_dates : boolean, default False
20342035
if True try really hard to convert dates (such as datetime.date), other
20352036
leave inferred dtype 'date' alone
@@ -2068,9 +2069,9 @@ def _try_timedelta(v):
20682069
inferred_type = lib.infer_dtype(sample)
20692070

20702071
if inferred_type in ['datetime', 'datetime64'] or (convert_dates and inferred_type in ['date']):
2071-
value = _try_datetime(v)
2072+
value = _try_datetime(v).reshape(shape)
20722073
elif inferred_type in ['timedelta', 'timedelta64']:
2073-
value = _try_timedelta(v)
2074+
value = _try_timedelta(v).reshape(shape)
20742075

20752076
# its possible to have nulls intermixed within the datetime or timedelta
20762077
# these will in general have an inferred_type of 'mixed', so have to try
@@ -2081,9 +2082,9 @@ def _try_timedelta(v):
20812082
elif inferred_type in ['mixed']:
20822083

20832084
if lib.is_possible_datetimelike_array(_ensure_object(v)):
2084-
value = _try_timedelta(v)
2085+
value = _try_timedelta(v).reshape(shape)
20852086
if lib.infer_dtype(value) in ['mixed']:
2086-
value = _try_datetime(v)
2087+
value = _try_datetime(v).reshape(shape)
20872088

20882089
return value
20892090

pandas/core/frame.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
_default_index, _maybe_upcast, is_sequence,
2828
_infer_dtype_from_scalar, _values_from_object,
2929
is_list_like, _get_dtype, _maybe_box_datetimelike,
30-
is_categorical_dtype)
30+
is_categorical_dtype, is_object_dtype, _possibly_infer_to_datetimelike)
3131
from pandas.core.generic import NDFrame, _shared_docs
3232
from pandas.core.index import Index, MultiIndex, _ensure_index
3333
from pandas.core.indexing import (maybe_droplevels,
@@ -396,7 +396,15 @@ def _get_axes(N, K, index=index, columns=columns):
396396
raise_with_traceback(e)
397397

398398
index, columns = _get_axes(*values.shape)
399-
return create_block_manager_from_blocks([values.T], [columns, index])
399+
values = values.T
400+
401+
# if we don't have a dtype specified, then try to convert objects
402+
# on the entire block; this is to convert if we have datetimelike's
403+
# embedded in an object type
404+
if dtype is None and is_object_dtype(values):
405+
values = _possibly_infer_to_datetimelike(values)
406+
407+
return create_block_manager_from_blocks([values], [columns, index])
400408

401409
@property
402410
def axes(self):
@@ -1537,7 +1545,7 @@ def _sizeof_fmt(num, size_qualifier):
15371545
# cases (e.g., it misses categorical data even with object
15381546
# categories)
15391547
size_qualifier = ('+' if 'object' in counts
1540-
or self.index.dtype.kind == 'O' else '')
1548+
or is_object_dtype(self.index) else '')
15411549
mem_usage = self.memory_usage(index=True).sum()
15421550
lines.append("memory usage: %s\n" %
15431551
_sizeof_fmt(mem_usage, size_qualifier))
@@ -2257,6 +2265,8 @@ def reindexer(value):
22572265

22582266
elif (isinstance(value, Index) or is_sequence(value)):
22592267
from pandas.core.series import _sanitize_index
2268+
2269+
# turn me into an ndarray
22602270
value = _sanitize_index(value, self.index, copy=False)
22612271
if not isinstance(value, (np.ndarray, Index)):
22622272
if isinstance(value, list) and len(value) > 0:
@@ -2267,6 +2277,11 @@ def reindexer(value):
22672277
value = value.copy().T
22682278
else:
22692279
value = value.copy()
2280+
2281+
# possibly infer to datetimelike
2282+
if is_object_dtype(value.dtype):
2283+
value = _possibly_infer_to_datetimelike(value.ravel()).reshape(value.shape)
2284+
22702285
else:
22712286
# upcast the scalar
22722287
dtype, value = _infer_dtype_from_scalar(value)
@@ -2341,7 +2356,7 @@ def lookup(self, row_labels, col_labels):
23412356
for i, (r, c) in enumerate(zip(row_labels, col_labels)):
23422357
result[i] = self.get_value(r, c)
23432358

2344-
if result.dtype == 'O':
2359+
if is_object_dtype(result):
23452360
result = lib.maybe_convert_objects(result)
23462361

23472362
return result
@@ -4232,7 +4247,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
42324247
values = self.values
42334248
result = f(values)
42344249

4235-
if result.dtype == np.object_:
4250+
if is_object_dtype(result.dtype):
42364251
try:
42374252
if filter_type is None or filter_type == 'numeric':
42384253
result = result.astype(np.float64)

pandas/core/generic.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1467,8 +1467,11 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
14671467
if not is_list_like(new_values) or self.ndim == 1:
14681468
return _maybe_box_datetimelike(new_values)
14691469

1470-
result = Series(new_values, index=self.columns,
1471-
name=self.index[loc])
1470+
result = Series(new_values,
1471+
index=self.columns,
1472+
name=self.index[loc],
1473+
copy=copy,
1474+
dtype=new_values.dtype)
14721475

14731476
else:
14741477
result = self.iloc[loc]

pandas/core/internals.py

+3-20
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
ABCSparseSeries, _infer_dtype_from_scalar,
1414
is_null_datelike_scalar, _maybe_promote,
1515
is_timedelta64_dtype, is_datetime64_dtype,
16-
_possibly_infer_to_datetimelike, array_equivalent,
17-
_maybe_convert_string_to_object, is_categorical)
16+
array_equivalent, _maybe_convert_string_to_object,
17+
is_categorical)
1818
from pandas.core.index import Index, MultiIndex, _ensure_index
1919
from pandas.core.indexing import maybe_convert_indices, length_of_indexer
2020
from pandas.core.categorical import Categorical, maybe_to_categorical
@@ -2074,25 +2074,8 @@ def make_block(values, placement, klass=None, ndim=None,
20742074
klass = ComplexBlock
20752075
elif is_categorical(values):
20762076
klass = CategoricalBlock
2077-
20782077
else:
2079-
2080-
# we want to infer here if its a datetimelike if its object type
2081-
# this is pretty strict in that it requires a datetime/timedelta
2082-
# value IN addition to possible nulls/strings
2083-
# an array of ONLY strings will not be inferred
2084-
if np.prod(values.shape):
2085-
result = _possibly_infer_to_datetimelike(values)
2086-
vtype = result.dtype.type
2087-
if issubclass(vtype, np.datetime64):
2088-
klass = DatetimeBlock
2089-
values = result
2090-
elif (issubclass(vtype, np.timedelta64)):
2091-
klass = TimeDeltaBlock
2092-
values = result
2093-
2094-
if klass is None:
2095-
klass = ObjectBlock
2078+
klass = ObjectBlock
20962079

20972080
return klass(values, ndim=ndim, fastpath=fastpath,
20982081
placement=placement)

pandas/io/packers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,9 @@ def decode(obj):
490490
index = obj['index']
491491
return globals()[obj['klass']](unconvert(obj['data'], dtype,
492492
obj['compress']),
493-
index=index, name=obj['name'])
493+
index=index,
494+
dtype=dtype,
495+
name=obj['name'])
494496
elif typ == 'block_manager':
495497
axes = obj['axes']
496498

pandas/tests/test_series.py

+23
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ def test_scalar_conversion(self):
569569
self.assertEqual(int(Series([1.])), 1)
570570
self.assertEqual(long(Series([1.])), 1)
571571

572+
572573
def test_astype(self):
573574
s = Series(np.random.randn(5),name='foo')
574575

@@ -778,6 +779,28 @@ def test_constructor_dtype_nocast(self):
778779
s2[1] = 5
779780
self.assertEqual(s[1], 5)
780781

782+
def test_constructor_datelike_coercion(self):
783+
784+
# GH 9477
785+
# incorrectly infering on dateimelike looking when object dtype is specified
786+
s = Series([Timestamp('20130101'),'NOV'],dtype=object)
787+
self.assertEqual(s.iloc[0],Timestamp('20130101'))
788+
self.assertEqual(s.iloc[1],'NOV')
789+
self.assertTrue(s.dtype == object)
790+
791+
# the dtype was being reset on the slicing and re-inferred to datetime even
792+
# thought the blocks are mixed
793+
belly = '216 3T19'.split()
794+
wing1 = '2T15 4H19'.split()
795+
wing2 = '416 4T20'.split()
796+
mat = pd.to_datetime('2016-01-22 2019-09-07'.split())
797+
df = pd.DataFrame({'wing1':wing1, 'wing2':wing2, 'mat':mat}, index=belly)
798+
799+
result = df.loc['3T19']
800+
self.assertTrue(result.dtype == object)
801+
result = df.loc['216']
802+
self.assertTrue(result.dtype == object)
803+
781804
def test_constructor_dtype_datetime64(self):
782805
import pandas.tslib as tslib
783806

0 commit comments

Comments
 (0)