Skip to content

Commit 1ed5c3e

Browse files
committed
Merge pull request #6142 from jreback/from_records
BUG: Bug in DataFrame construction with recarray and non-ns datetime dtype (GH6140)
2 parents 76fadb1 + 5c4545b commit 1ed5c3e

File tree

4 files changed

+57
-9
lines changed

4 files changed

+57
-9
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Bug Fixes
164164
index/columns (:issue:`6121`)
165165
- Bug in ``DataFrame.apply`` when using mixed datelike reductions (:issue:`6125`)
166166
- Bug in ``DataFrame.append`` when appending a row with different columns (:issue:`6129`)
167+
- Bug in DataFrame construction with recarray and non-ns datetime dtype (:issue:`6140`)
167168

168169
pandas 0.13.0
169170
-------------

pandas/core/common.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ class AmbiguousIndexError(PandasError, KeyError):
4141

4242

4343
_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
44-
for t in ['M8[ns]', '>M8[ns]', '<M8[ns]',
45-
'm8[ns]', '>m8[ns]', '<m8[ns]',
46-
'O', 'int8',
44+
for t in ['O', 'int8',
4745
'uint8', 'int16', 'uint16', 'int32',
4846
'uint32', 'int64', 'uint64']])
4947

@@ -1612,6 +1610,14 @@ def _possibly_convert_objects(values, convert_dates=True,
16121610

16131611

16141612
def _possibly_castable(arr):
1613+
# return False to force a non-fastpath
1614+
1615+
# check datetime64[ns]/timedelta64[ns] are valid
1616+
# otherwise try to coerce
1617+
kind = arr.dtype.kind
1618+
if kind == 'M' or kind == 'm':
1619+
return arr.dtype in _DATELIKE_DTYPES
1620+
16151621
return arr.dtype.name not in _POSSIBLY_CAST_DTYPES
16161622

16171623

@@ -1681,12 +1687,30 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
16811687

16821688
else:
16831689

1690+
is_array = isinstance(value, np.ndarray)
1691+
1692+
# catch a datetime/timedelta that is not of ns variety
1693+
# and no coercion specified
1694+
if (is_array and value.dtype.kind in ['M','m']):
1695+
dtype = value.dtype
1696+
1697+
if dtype.kind == 'M' and dtype != _NS_DTYPE:
1698+
try:
1699+
value = tslib.array_to_datetime(value)
1700+
except:
1701+
raise
1702+
1703+
elif dtype.kind == 'm' and dtype != _TD_DTYPE:
1704+
from pandas.tseries.timedeltas import \
1705+
_possibly_cast_to_timedelta
1706+
value = _possibly_cast_to_timedelta(value, coerce='compat')
1707+
16841708
# only do this if we have an array and the dtype of the array is not
16851709
# setup already we are not an integer/object, so don't bother with this
16861710
# conversion
1687-
if (isinstance(value, np.ndarray) and not
1688-
(issubclass(value.dtype.type, np.integer) or
1689-
value.dtype == np.object_)):
1711+
elif (is_array and not (
1712+
issubclass(value.dtype.type, np.integer) or
1713+
value.dtype == np.object_)):
16901714
pass
16911715

16921716
else:

pandas/core/series.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -2536,7 +2536,10 @@ def _try_cast(arr, take_fast_path):
25362536
else:
25372537
subarr = _try_cast(data, True)
25382538
else:
2539-
subarr = _try_cast(data, True)
2539+
# don't coerce Index types
2540+
# e.g. indexes can have different conversions (so don't fast path them)
2541+
# GH 6140
2542+
subarr = _try_cast(data, not isinstance(data, Index))
25402543

25412544
if copy:
25422545
subarr = data.copy()

pandas/tests/test_frame.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# pylint: disable-msg=W0612,E1101
55
from copy import deepcopy
66
from datetime import datetime, timedelta, time
7+
import sys
78
import operator
89
import re
910
import csv
@@ -3956,6 +3957,27 @@ def test_from_records_empty_with_nonempty_fields_gh3682(self):
39563957
assert_array_equal(df.index, Index([], name='id'))
39573958
self.assertEqual(df.index.name, 'id')
39583959

3960+
def test_from_records_with_datetimes(self):
3961+
if sys.version < LooseVersion('2.7'):
3962+
raise nose.SkipTest('rec arrays dont work properly with py2.6')
3963+
3964+
# construction with a null in a recarray
3965+
# GH 6140
3966+
expected = DataFrame({ 'EXPIRY' : [datetime(2005, 3, 1, 0, 0), None ]})
3967+
3968+
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
3969+
dtypes = [('EXPIRY', '<M8[ns]')]
3970+
recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
3971+
result = DataFrame.from_records(recarray)
3972+
assert_frame_equal(result,expected)
3973+
3974+
# coercion should work too
3975+
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
3976+
dtypes = [('EXPIRY', '<M8[m]')]
3977+
recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
3978+
result = DataFrame.from_records(recarray)
3979+
assert_frame_equal(result,expected)
3980+
39593981
def test_to_records_floats(self):
39603982
df = DataFrame(np.random.rand(10, 10))
39613983
df.to_records()
@@ -5138,8 +5160,6 @@ def test_combineSeries(self):
51385160
#_check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = 'int32', D = 'int64'))
51395161

51405162
# TimeSeries
5141-
import sys
5142-
51435163
buf = StringIO()
51445164
tmp = sys.stderr
51455165
sys.stderr = buf

0 commit comments

Comments
 (0)