Skip to content

Commit c8f48a5

Browse files
committed
Merge pull request #7779 from jreback/td
BUG: unwanted conversions of timedelta dtypes when in a mixed datetimelike frame (GH7778)
2 parents a797b28 + 9103513 commit c8f48a5

File tree

6 files changed

+89
-16
lines changed

6 files changed

+89
-16
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ Bug Fixes
186186
~~~~~~~~~
187187

188188
- Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`)
189-
189+
- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`)
190190

191191

192192

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3539,6 +3539,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
35393539
except Exception:
35403540
pass
35413541

3542+
dtype = object if self._is_mixed_type else None
35423543
if axis == 0:
35433544
series_gen = (self.icol(i) for i in range(len(self.columns)))
35443545
res_index = self.columns
@@ -3547,7 +3548,7 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
35473548
res_index = self.index
35483549
res_columns = self.columns
35493550
values = self.values
3550-
series_gen = (Series.from_array(arr, index=res_columns, name=name)
3551+
series_gen = (Series.from_array(arr, index=res_columns, name=name, dtype=dtype)
35513552
for i, (arr, name) in
35523553
enumerate(zip(values, res_index)))
35533554
else: # pragma : no cover

pandas/core/internals.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from pandas.util.decorators import cache_readonly
2626

2727
from pandas.tslib import Timestamp
28-
from pandas import compat
28+
from pandas import compat, _np_version_under1p7
2929
from pandas.compat import range, map, zip, u
3030
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
3131

@@ -1290,6 +1290,16 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
12901290
return rvalues.tolist()
12911291

12921292

1293+
def get_values(self, dtype=None):
1294+
# return object dtypes as datetime.timedeltas
1295+
if dtype == object:
1296+
if _np_version_under1p7:
1297+
return self.values.astype('object')
1298+
return lib.map_infer(self.values.ravel(),
1299+
lambda x: timedelta(microseconds=x.item()/1000)
1300+
).reshape(self.values.shape)
1301+
return self.values
1302+
12931303
class BoolBlock(NumericBlock):
12941304
__slots__ = ()
12951305
is_bool = True
@@ -2595,7 +2605,7 @@ def as_matrix(self, items=None):
25952605
else:
25962606
mgr = self
25972607

2598-
if self._is_single_block:
2608+
if self._is_single_block or not self.is_mixed_type:
25992609
return mgr.blocks[0].get_values()
26002610
else:
26012611
return mgr._interleave()
@@ -3647,9 +3657,11 @@ def _lcd_dtype(l):
36473657
has_non_numeric = have_dt64 or have_td64 or have_cat
36483658

36493659
if (have_object or
3650-
(have_bool and have_numeric) or
3660+
(have_bool and (have_numeric or have_dt64 or have_td64)) or
36513661
(have_numeric and has_non_numeric) or
3652-
have_cat):
3662+
have_cat or
3663+
have_dt64 or
3664+
have_td64):
36533665
return np.dtype(object)
36543666
elif have_bool:
36553667
return np.dtype(bool)
@@ -3670,10 +3682,6 @@ def _lcd_dtype(l):
36703682
return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
36713683
return lcd
36723684

3673-
elif have_dt64 and not have_float and not have_complex:
3674-
return np.dtype('M8[ns]')
3675-
elif have_td64 and not have_float and not have_complex:
3676-
return np.dtype('m8[ns]')
36773685
elif have_complex:
36783686
return np.dtype('c16')
36793687
else:

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -237,14 +237,14 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
237237
self._set_axis(0, index, fastpath=True)
238238

239239
@classmethod
240-
def from_array(cls, arr, index=None, name=None, copy=False,
240+
def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
241241
fastpath=False):
242242
# return a sparse series here
243243
if isinstance(arr, ABCSparseArray):
244244
from pandas.sparse.series import SparseSeries
245245
cls = SparseSeries
246246

247-
return cls(arr, index=index, name=name, copy=copy, fastpath=fastpath)
247+
return cls(arr, index=index, name=name, dtype=dtype, copy=copy, fastpath=fastpath)
248248

249249
@property
250250
def _constructor(self):

pandas/tests/test_frame.py

+9
Original file line numberDiff line numberDiff line change
@@ -9635,6 +9635,15 @@ def test_apply(self):
96359635
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
96369636
self.assertRaises(ValueError, df.apply, lambda x: x, 2)
96379637

9638+
def test_apply_mixed_datetimelike(self):
9639+
tm._skip_if_not_numpy17_friendly()
9640+
9641+
# mixed datetimelike
9642+
# GH 7778
9643+
df = DataFrame({ 'A' : date_range('20130101',periods=3), 'B' : pd.to_timedelta(np.arange(3),unit='s') })
9644+
result = df.apply(lambda x: x, axis=1)
9645+
assert_frame_equal(result, df)
9646+
96389647
def test_apply_empty(self):
96399648
# empty
96409649
applied = self.empty.apply(np.sqrt)

pandas/tests/test_internals.py

+59-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import nose
44
import numpy as np
55

6-
from pandas import Index, MultiIndex, DataFrame, Series
6+
from pandas import Index, MultiIndex, DataFrame, Series, Categorical
77
from pandas.compat import OrderedDict, lrange
88
from pandas.sparse.array import SparseArray
99
from pandas.core.internals import *
@@ -41,9 +41,11 @@ def create_block(typestr, placement, item_shape=None, num_offset=0):
4141
* complex, c16, c8
4242
* bool
4343
* object, string, O
44-
* datetime, dt
44+
* datetime, dt, M8[ns]
45+
* timedelta, td, m8[ns]
4546
* sparse (SparseArray with fill_value=0.0)
4647
* sparse_na (SparseArray with fill_value=np.nan)
48+
* category, category2
4749
4850
"""
4951
placement = BlockPlacement(placement)
@@ -67,8 +69,14 @@ def create_block(typestr, placement, item_shape=None, num_offset=0):
6769
shape)
6870
elif typestr in ('bool'):
6971
values = np.ones(shape, dtype=np.bool_)
70-
elif typestr in ('datetime', 'dt'):
72+
elif typestr in ('datetime', 'dt', 'M8[ns]'):
7173
values = (mat * 1e9).astype('M8[ns]')
74+
elif typestr in ('timedelta', 'td', 'm8[ns]'):
75+
values = (mat * 1).astype('m8[ns]')
76+
elif typestr in ('category'):
77+
values = Categorical([1,1,2,2,3,3,3,3,4,4])
78+
elif typestr in ('category2'):
79+
values = Categorical(['a','a','a','a','b','b','c','c','c','d'])
7280
elif typestr in ('sparse', 'sparse_na'):
7381
# FIXME: doesn't support num_rows != 10
7482
assert shape[-1] == 10
@@ -556,7 +564,54 @@ def _compare(old_mgr, new_mgr):
556564
self.assertEqual(new_mgr.get('h').dtype, np.float16)
557565

558566
def test_interleave(self):
559-
pass
567+
568+
569+
# self
570+
for dtype in ['f8','i8','object','bool','complex','M8[ns]','m8[ns]']:
571+
mgr = create_mgr('a: {0}'.format(dtype))
572+
self.assertEqual(mgr.as_matrix().dtype,dtype)
573+
mgr = create_mgr('a: {0}; b: {0}'.format(dtype))
574+
self.assertEqual(mgr.as_matrix().dtype,dtype)
575+
576+
# will be converted according the actual dtype of the underlying
577+
mgr = create_mgr('a: category')
578+
self.assertEqual(mgr.as_matrix().dtype,'i8')
579+
mgr = create_mgr('a: category; b: category')
580+
self.assertEqual(mgr.as_matrix().dtype,'i8'),
581+
mgr = create_mgr('a: category; b: category2')
582+
self.assertEqual(mgr.as_matrix().dtype,'object')
583+
mgr = create_mgr('a: category2')
584+
self.assertEqual(mgr.as_matrix().dtype,'object')
585+
mgr = create_mgr('a: category2; b: category2')
586+
self.assertEqual(mgr.as_matrix().dtype,'object')
587+
588+
# combinations
589+
mgr = create_mgr('a: f8')
590+
self.assertEqual(mgr.as_matrix().dtype,'f8')
591+
mgr = create_mgr('a: f8; b: i8')
592+
self.assertEqual(mgr.as_matrix().dtype,'f8')
593+
mgr = create_mgr('a: f4; b: i8')
594+
self.assertEqual(mgr.as_matrix().dtype,'f4')
595+
mgr = create_mgr('a: f4; b: i8; d: object')
596+
self.assertEqual(mgr.as_matrix().dtype,'object')
597+
mgr = create_mgr('a: bool; b: i8')
598+
self.assertEqual(mgr.as_matrix().dtype,'object')
599+
mgr = create_mgr('a: complex')
600+
self.assertEqual(mgr.as_matrix().dtype,'complex')
601+
mgr = create_mgr('a: f8; b: category')
602+
self.assertEqual(mgr.as_matrix().dtype,'object')
603+
mgr = create_mgr('a: M8[ns]; b: category')
604+
self.assertEqual(mgr.as_matrix().dtype,'object')
605+
mgr = create_mgr('a: M8[ns]; b: bool')
606+
self.assertEqual(mgr.as_matrix().dtype,'object')
607+
mgr = create_mgr('a: M8[ns]; b: i8')
608+
self.assertEqual(mgr.as_matrix().dtype,'object')
609+
mgr = create_mgr('a: m8[ns]; b: bool')
610+
self.assertEqual(mgr.as_matrix().dtype,'object')
611+
mgr = create_mgr('a: m8[ns]; b: i8')
612+
self.assertEqual(mgr.as_matrix().dtype,'object')
613+
mgr = create_mgr('a: M8[ns]; b: m8[ns]')
614+
self.assertEqual(mgr.as_matrix().dtype,'object')
560615

561616
def test_interleave_non_unique_cols(self):
562617
df = DataFrame([

0 commit comments

Comments
 (0)