Skip to content

Commit 9242248

Browse files
mroeschkejreback
authored andcommitted
BUG: DataFrame.diff(axis=0) with DatetimeTZ data (pandas-dev#19773)
1 parent c5a1ef1 commit 9242248

File tree

3 files changed

+56
-0
lines changed

3 files changed

+56
-0
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,7 @@ Timezones
833833
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
834834
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
835835
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
836+
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)
836837

837838
Offsets
838839
^^^^^^^

pandas/core/internals.py

+29
Original file line numberDiff line numberDiff line change
@@ -2905,6 +2905,35 @@ def shift(self, periods, axis=0, mgr=None):
29052905
return [self.make_block_same_class(new_values,
29062906
placement=self.mgr_locs)]
29072907

2908+
def diff(self, n, axis=0, mgr=None):
2909+
"""1st discrete difference
2910+
2911+
Parameters
2912+
----------
2913+
n : int, number of periods to diff
2914+
axis : int, axis to diff upon. default 0
2915+
mgr : default None
2916+
2917+
Return
2918+
------
2919+
A list with a new TimeDeltaBlock.
2920+
2921+
Note
2922+
----
2923+
The arguments here are mimicking shift so they are called correctly
2924+
by apply.
2925+
"""
2926+
if axis == 0:
2927+
# Cannot currently calculate diff across multiple blocks since this
2928+
# function is invoked via apply
2929+
raise NotImplementedError
2930+
new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8
2931+
2932+
# Reshape the new_values like how algos.diff does for timedelta data
2933+
new_values = new_values.reshape(1, len(new_values))
2934+
new_values = new_values.astype('timedelta64[ns]')
2935+
return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
2936+
29082937
def concat_same_type(self, to_concat, placement=None):
29092938
"""
29102939
Concatenate list of single blocks of the same type.

pandas/tests/frame/test_timeseries.py

+26
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,32 @@ def test_diff(self):
5757
1), 'z': pd.Series(1)}).astype('float64')
5858
assert_frame_equal(result, expected)
5959

60+
@pytest.mark.parametrize('tz', [None, 'UTC'])
61+
def test_diff_datetime_axis0(self, tz):
62+
# GH 18578
63+
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
64+
1: date_range('2010', freq='D', periods=2, tz=tz)})
65+
66+
result = df.diff(axis=0)
67+
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
68+
1: pd.TimedeltaIndex(['NaT', '1 days'])})
69+
assert_frame_equal(result, expected)
70+
71+
@pytest.mark.parametrize('tz', [None, 'UTC'])
72+
def test_diff_datetime_axis1(self, tz):
73+
# GH 18578
74+
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
75+
1: date_range('2010', freq='D', periods=2, tz=tz)})
76+
if tz is None:
77+
result = df.diff(axis=1)
78+
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
79+
1: pd.TimedeltaIndex(['0 days',
80+
'0 days'])})
81+
assert_frame_equal(result, expected)
82+
else:
83+
with pytest.raises(NotImplementedError):
84+
result = df.diff(axis=1)
85+
6086
def test_diff_timedelta(self):
6187
# GH 4533
6288
df = DataFrame(dict(time=[Timestamp('20130101 9:01'),

0 commit comments

Comments
 (0)