Skip to content

Commit 620abc4

Browse files
louispotokmroeschke
authored andcommitted
Series.describe returns first and last for tz-aware datetimes (#21332)
* Series.describe returns first and last for tz-aware datetimes GH issue 21328 * parameterize tests * parameterize names * use tz_naive_fixture and fix top * add tz describe test for df * add bugfix to whatsnew * fix formatting in whatsnew and add issue number to tests * final bugfix
1 parent 30eb48c commit 620abc4

File tree

4 files changed

+46
-5
lines changed

4 files changed

+46
-5
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ Timezones
317317
- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`)
318318
- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
319319
- Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`)
320+
- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`)
320321

321322
Offsets
322323
^^^^^^^

pandas/core/generic.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
is_bool_dtype,
2121
is_categorical_dtype,
2222
is_numeric_dtype,
23-
is_datetime64_dtype,
23+
is_datetime64_any_dtype,
2424
is_timedelta64_dtype,
2525
is_datetime64tz_dtype,
2626
is_list_like,
@@ -8531,12 +8531,13 @@ def describe_categorical_1d(data):
85318531
if result[1] > 0:
85328532
top, freq = objcounts.index[0], objcounts.iloc[0]
85338533

8534-
if is_datetime64_dtype(data):
8534+
if is_datetime64_any_dtype(data):
8535+
tz = data.dt.tz
85358536
asint = data.dropna().values.view('i8')
85368537
names += ['top', 'freq', 'first', 'last']
8537-
result += [tslib.Timestamp(top), freq,
8538-
tslib.Timestamp(asint.min()),
8539-
tslib.Timestamp(asint.max())]
8538+
result += [tslib.Timestamp(top, tz=tz), freq,
8539+
tslib.Timestamp(asint.min(), tz=tz),
8540+
tslib.Timestamp(asint.max(), tz=tz)]
85408541
else:
85418542
names += ['top', 'freq']
85428543
result += [top, freq]

pandas/tests/frame/test_analytics.py

+22
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,28 @@ def test_describe_timedelta_values(self):
417417
"max 5 days 00:00:00 0 days 05:00:00")
418418
assert repr(res) == exp_repr
419419

420+
def test_describe_tz_values(self, tz_naive_fixture):
421+
# GH 21332
422+
tz = tz_naive_fixture
423+
s1 = Series(range(5))
424+
start = Timestamp(2018, 1, 1)
425+
end = Timestamp(2018, 1, 5)
426+
s2 = Series(date_range(start, end, tz=tz))
427+
df = pd.DataFrame({'s1': s1, 's2': s2})
428+
429+
expected = DataFrame({'s1': [5, np.nan, np.nan, np.nan, np.nan, np.nan,
430+
2, 1.581139, 0, 1, 2, 3, 4],
431+
's2': [5, 5, s2.value_counts().index[0], 1,
432+
start.tz_localize(tz),
433+
end.tz_localize(tz), np.nan, np.nan,
434+
np.nan, np.nan, np.nan, np.nan, np.nan]},
435+
index=['count', 'unique', 'top', 'freq', 'first',
436+
'last', 'mean', 'std', 'min', '25%', '50%',
437+
'75%', 'max']
438+
)
439+
res = df.describe(include='all')
440+
tm.assert_frame_equal(res, expected)
441+
420442
def test_reduce_mixed_frame(self):
421443
# GH 6806
422444
df = DataFrame({

pandas/tests/series/test_analytics.py

+17
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,23 @@ def test_describe(self):
336336
index=['count', 'unique', 'top', 'freq'])
337337
tm.assert_series_equal(result, expected)
338338

339+
def test_describe_with_tz(self, tz_naive_fixture):
340+
# GH 21332
341+
tz = tz_naive_fixture
342+
name = tz_naive_fixture
343+
start = Timestamp(2018, 1, 1)
344+
end = Timestamp(2018, 1, 5)
345+
s = Series(date_range(start, end, tz=tz), name=name)
346+
result = s.describe()
347+
expected = Series(
348+
[5, 5, s.value_counts().index[0], 1, start.tz_localize(tz),
349+
end.tz_localize(tz)
350+
],
351+
name=name,
352+
index=['count', 'unique', 'top', 'freq', 'first', 'last']
353+
)
354+
tm.assert_series_equal(result, expected)
355+
339356
def test_argsort(self):
340357
self._check_accum_op('argsort', check_dtype=False)
341358
argsorted = self.ts.argsort()

0 commit comments

Comments
 (0)