Skip to content

Commit 9389e4a

Browse files
committed
BUG: DataFrame.diff(axis=0) with DatetimeTZ data
add whatsnew clarify comment Add addtional tests
1 parent cd1b168 commit 9389e4a

File tree

4 files changed

+28
-1
lines changed

4 files changed

+28
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@ Timezones
760760
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
761761
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
762762
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
763+
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)
763764

764765
Offsets
765766
^^^^^^^

pandas/core/algorithms.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1501,7 +1501,12 @@ def diff(arr, n, axis=0):
15011501
is_timedelta = False
15021502
if needs_i8_conversion(arr):
15031503
dtype = np.float64
1504-
arr = arr.view('i8')
1504+
if is_datetime64tz_dtype(arr):
1505+
# Block data is usually a 2D array
1506+
# except DatetimeTZBlock which is a 1D array (DatetimeIndex)
1507+
arr = arr.view('i8').reshape(1, arr.shape[0])
1508+
else:
1509+
arr = arr.view('i8')
15051510
na = iNaT
15061511
is_timedelta = True
15071512

pandas/core/internals.py

+5
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,11 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
12691269

12701270
def diff(self, n, axis=1, mgr=None):
12711271
""" return block for the diff of the values """
1272+
if isinstance(self, DatetimeTZBlock) and axis == 0:
1273+
# This method will iterate (self.apply) over each DateTimeTZBlock
1274+
# but axis=0 will need to gather data from all DateTimeTZBlocks
1275+
# in the manager in order for diff to work correctly.
1276+
raise NotImplementedError
12721277
new_values = algos.diff(self.values, n, axis=axis)
12731278
return [self.make_block(values=new_values)]
12741279

pandas/tests/frame/test_timeseries.py

+16
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,22 @@ def test_diff(self):
5757
1), 'z': pd.Series(1)}).astype('float64')
5858
assert_frame_equal(result, expected)
5959

60+
@pytest.mark.parametrize('axis', [0, 1])
61+
@pytest.mark.parametrize('tz', [None, 'UTC'])
62+
@pytest.mark.xfail(raises=NotImplementedError)
63+
def test_diff_datetime(self, axis, tz):
64+
# GH 18578
65+
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
66+
1: date_range('2010', freq='D', periods=2, tz=tz)})
67+
result = df.diff(axis=axis) # xfails for axis=1 and tz='UTC'
68+
if axis == 1:
69+
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
70+
1: pd.TimedeltaIndex(['0 days', '0 days'])})
71+
else:
72+
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
73+
1: pd.TimedeltaIndex(['NaT', '1 days'])})
74+
assert_frame_equal(result, expected)
75+
6076
def test_diff_timedelta(self):
6177
# GH 4533
6278
df = DataFrame(dict(time=[Timestamp('20130101 9:01'),

0 commit comments

Comments
 (0)