Skip to content

BUG: Bug in DataFrame construction with recarray and non-ns datetime dtype (GH6140) #6142

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 28, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ Bug Fixes
index/columns (:issue:`6121`)
- Bug in ``DataFrame.apply`` when using mixed datelike reductions (:issue:`6125`)
- Bug in ``DataFrame.append`` when appending a row with different columns (:issue:`6129`)
- Bug in DataFrame construction with recarray and non-ns datetime dtype (:issue:`6140`)

pandas 0.13.0
-------------
Expand Down
36 changes: 30 additions & 6 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ class AmbiguousIndexError(PandasError, KeyError):


_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
for t in ['M8[ns]', '>M8[ns]', '<M8[ns]',
'm8[ns]', '>m8[ns]', '<m8[ns]',
'O', 'int8',
for t in ['O', 'int8',
'uint8', 'int16', 'uint16', 'int32',
'uint32', 'int64', 'uint64']])

Expand Down Expand Up @@ -1612,6 +1610,14 @@ def _possibly_convert_objects(values, convert_dates=True,


def _possibly_castable(arr):
# return False to force a non-fastpath

# check datetime64[ns]/timedelta64[ns] are valid
# otherwise try to coerce
kind = arr.dtype.kind
if kind == 'M' or kind == 'm':
return arr.dtype in _DATELIKE_DTYPES

return arr.dtype.name not in _POSSIBLY_CAST_DTYPES


Expand Down Expand Up @@ -1681,12 +1687,30 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):

else:

is_array = isinstance(value, np.ndarray)

# catch a datetime/timedelta that is not of ns variety
# and no coercion specified
if (is_array and value.dtype.kind in ['M','m']):
dtype = value.dtype

if dtype.kind == 'M' and dtype != _NS_DTYPE:
try:
value = tslib.array_to_datetime(value)
except:
raise

elif dtype.kind == 'm' and dtype != _TD_DTYPE:
from pandas.tseries.timedeltas import \
_possibly_cast_to_timedelta
value = _possibly_cast_to_timedelta(value, coerce='compat')

# only do this if we have an array and the dtype of the array is not
# setup already we are not an integer/object, so don't bother with this
# conversion
if (isinstance(value, np.ndarray) and not
(issubclass(value.dtype.type, np.integer) or
value.dtype == np.object_)):
elif (is_array and not (
issubclass(value.dtype.type, np.integer) or
value.dtype == np.object_)):
pass

else:
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2536,7 +2536,10 @@ def _try_cast(arr, take_fast_path):
else:
subarr = _try_cast(data, True)
else:
subarr = _try_cast(data, True)
# don't coerce Index types
# e.g. indexes can have different conversions (so don't fast path them)
# GH 6140
subarr = _try_cast(data, not isinstance(data, Index))

if copy:
subarr = data.copy()
Expand Down
24 changes: 22 additions & 2 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable-msg=W0612,E1101
from copy import deepcopy
from datetime import datetime, timedelta, time
import sys
import operator
import re
import csv
Expand Down Expand Up @@ -3956,6 +3957,27 @@ def test_from_records_empty_with_nonempty_fields_gh3682(self):
assert_array_equal(df.index, Index([], name='id'))
self.assertEqual(df.index.name, 'id')

def test_from_records_with_datetimes(self):
if sys.version < LooseVersion('2.7'):
raise nose.SkipTest('rec arrays dont work properly with py2.6')

# construction with a null in a recarray
# GH 6140
expected = DataFrame({ 'EXPIRY' : [datetime(2005, 3, 1, 0, 0), None ]})

arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
dtypes = [('EXPIRY', '<M8[ns]')]
recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
result = DataFrame.from_records(recarray)
assert_frame_equal(result,expected)

# coercion should work too
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
dtypes = [('EXPIRY', '<M8[m]')]
recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
result = DataFrame.from_records(recarray)
assert_frame_equal(result,expected)

def test_to_records_floats(self):
df = DataFrame(np.random.rand(10, 10))
df.to_records()
Expand Down Expand Up @@ -5138,8 +5160,6 @@ def test_combineSeries(self):
#_check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = 'int32', D = 'int64'))

# TimeSeries
import sys

buf = StringIO()
tmp = sys.stderr
sys.stderr = buf
Expand Down