Skip to content

BUG: series resample with timedelta values looses dtype (GH13119) #14118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 31, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1260,7 +1260,7 @@ Bug Fixes
- Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`)
- Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`)
- Bug in ``Series`` creation with ``np.datetime64`` which has other unit than ``ns`` as ``object`` dtype results in incorrect values (:issue:`13876`)

- Bug in ``resample`` with timedelta data where data was casted to float (:issue:`13119`).
- Bug in ``pd.isnull()`` ``pd.notnull()`` raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`)
- Bug in ``pd.merge()`` may raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`)

Expand Down
76 changes: 44 additions & 32 deletions pandas/tests/types/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,44 +24,56 @@
_multiprocess_can_split_ = True


def test_downcast_conv():
# test downcasting
class TestPossiblyDowncast(tm.TestCase):

arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
result = _possibly_downcast_to_dtype(arr, 'infer')
assert (np.array_equal(result, arr))
def test_downcast_conv(self):
# test downcasting

arr = np.array([8., 8., 8., 8., 8.9999999999995])
result = _possibly_downcast_to_dtype(arr, 'infer')
expected = np.array([8, 8, 8, 8, 9])
assert (np.array_equal(result, expected))

arr = np.array([8., 8., 8., 8., 9.0000000000005])
result = _possibly_downcast_to_dtype(arr, 'infer')
expected = np.array([8, 8, 8, 8, 9])
assert (np.array_equal(result, expected))

# conversions
arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
result = _possibly_downcast_to_dtype(arr, 'infer')
assert (np.array_equal(result, arr))

expected = np.array([1, 2])
for dtype in [np.float64, object, np.int64]:
arr = np.array([1.0, 2.0], dtype=dtype)
arr = np.array([8., 8., 8., 8., 8.9999999999995])
result = _possibly_downcast_to_dtype(arr, 'infer')
tm.assert_almost_equal(result, expected, check_dtype=False)
expected = np.array([8, 8, 8, 8, 9])
assert (np.array_equal(result, expected))

for dtype in [np.float64, object]:
expected = np.array([1.0, 2.0, np.nan], dtype=dtype)
arr = np.array([1.0, 2.0, np.nan], dtype=dtype)
arr = np.array([8., 8., 8., 8., 9.0000000000005])
result = _possibly_downcast_to_dtype(arr, 'infer')
tm.assert_almost_equal(result, expected)

# empties
for dtype in [np.int32, np.float64, np.float32, np.bool_,
np.int64, object]:
arr = np.array([], dtype=dtype)
result = _possibly_downcast_to_dtype(arr, 'int64')
tm.assert_almost_equal(result, np.array([], dtype=np.int64))
assert result.dtype == np.int64
expected = np.array([8, 8, 8, 8, 9])
assert (np.array_equal(result, expected))

# conversions

expected = np.array([1, 2])
for dtype in [np.float64, object, np.int64]:
arr = np.array([1.0, 2.0], dtype=dtype)
result = _possibly_downcast_to_dtype(arr, 'infer')
tm.assert_almost_equal(result, expected, check_dtype=False)

for dtype in [np.float64, object]:
expected = np.array([1.0, 2.0, np.nan], dtype=dtype)
arr = np.array([1.0, 2.0, np.nan], dtype=dtype)
result = _possibly_downcast_to_dtype(arr, 'infer')
tm.assert_almost_equal(result, expected)

# empties
for dtype in [np.int32, np.float64, np.float32, np.bool_,
np.int64, object]:
arr = np.array([], dtype=dtype)
result = _possibly_downcast_to_dtype(arr, 'int64')
tm.assert_almost_equal(result, np.array([], dtype=np.int64))
assert result.dtype == np.int64

def test_datetimelikes_nan(self):
arr = np.array([1, 2, np.nan])
exp = np.array([1, 2, np.datetime64('NaT')], dtype='datetime64[ns]')
res = _possibly_downcast_to_dtype(arr, 'datetime64[ns]')
tm.assert_numpy_array_equal(res, exp)

exp = np.array([1, 2, np.timedelta64('NaT')], dtype='timedelta64[ns]')
res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]')
tm.assert_numpy_array_equal(res, exp)


class TestInferDtype(tm.TestCase):
Expand Down
17 changes: 17 additions & 0 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,23 @@ def test_resample_with_nat(self):

assert_frame_equal(frame.resample('60s').mean(), frame_3s)

def test_resample_timedelta_values(self):
# GH 13119
# check that timedelta dtype is preserved when NaT values are
# introduced by the resampling

times = timedelta_range('1 day', '4 day', freq='4D')
df = DataFrame({'time': times}, index=times)

times2 = timedelta_range('1 day', '4 day', freq='2D')
exp = Series(times2, index=times2, name='time')
exp.iloc[1] = pd.NaT

res = df.resample('2D').first()['time']
tm.assert_series_equal(res, exp)
res = df['time'].resample('2D').first()
tm.assert_series_equal(res, exp)


class TestPeriodIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
Expand Down
4 changes: 2 additions & 2 deletions pandas/types/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
is_timedelta64_dtype, is_dtype_equal,
is_float_dtype, is_complex_dtype,
is_integer_dtype, is_datetime_or_timedelta_dtype,
is_scalar,
is_bool_dtype, is_scalar,
_string_dtypes,
_coerce_to_dtype,
_ensure_int8, _ensure_int16,
Expand Down Expand Up @@ -89,7 +89,7 @@ def trans(x): # noqa

if issubclass(dtype.type, np.floating):
return result.astype(dtype)
elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
elif is_bool_dtype(dtype) or is_integer_dtype(dtype):

# if we don't have any elements, just astype it
if not np.prod(result.shape):
Expand Down