Skip to content

Commit d2b5819

Browse files
max-sixtyjreback
authored andcommitted
BUG: Empty PeriodIndex issues
closes #13067 closes #13212 Author: Maximilian Roos <[email protected]> Closes #13079 from MaximilianR/period_resample_0 and squashes the following commits: 8c7b9db [Maximilian Roos] empty PeriodIndex issues
1 parent f5c24d2 commit d2b5819

File tree

8 files changed

+168
-89
lines changed

8 files changed

+168
-89
lines changed

doc/source/whatsnew/v0.18.2.txt

+6
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ Bug Fixes
156156

157157

158158
- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`)
159+
- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`)
160+
- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`)
161+
- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame``appropriately when empty (:issue:`13212`)
159162

160163

161164

@@ -175,4 +178,7 @@ Bug Fixes
175178
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
176179
- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
177180
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
181+
182+
183+
178184
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)

pandas/core/groupby.py

+22-17
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
is_datetime_or_timedelta_dtype, is_bool,
3838
is_bool_dtype, AbstractMethodError,
3939
_maybe_fill)
40-
from pandas.core.config import option_context
40+
from pandas.core.config import option_context, is_callable
4141
import pandas.lib as lib
4242
from pandas.lib import Timestamp
4343
import pandas.tslib as tslib
@@ -643,9 +643,20 @@ def apply(self, func, *args, **kwargs):
643643

644644
func = self._is_builtin_func(func)
645645

646-
@wraps(func)
647-
def f(g):
648-
return func(g, *args, **kwargs)
646+
# this is needed so we don't try and wrap strings. If we could
647+
# resolve functions to their callable functions prior, this
648+
# wouldn't be needed
649+
if args or kwargs:
650+
if is_callable(func):
651+
652+
@wraps(func)
653+
def f(g):
654+
return func(g, *args, **kwargs)
655+
else:
656+
raise ValueError('func must be a callable if args or '
657+
'kwargs are supplied')
658+
else:
659+
f = func
649660

650661
# ignore SettingWithCopy here in case the user mutates
651662
with option_context('mode.chained_assignment', None):
@@ -2675,7 +2686,7 @@ def _wrap_transformed_output(self, output, names=None):
26752686
def _wrap_applied_output(self, keys, values, not_indexed_same=False):
26762687
if len(keys) == 0:
26772688
# GH #6265
2678-
return Series([], name=self.name)
2689+
return Series([], name=self.name, index=keys)
26792690

26802691
def _get_index():
26812692
if self.grouper.nkeys > 1:
@@ -3222,8 +3233,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
32223233
from pandas.core.index import _all_indexes_same
32233234

32243235
if len(keys) == 0:
3225-
# XXX
3226-
return DataFrame({})
3236+
return DataFrame(index=keys)
32273237

32283238
key_names = self.grouper.names
32293239

@@ -3646,17 +3656,12 @@ def _gotitem(self, key, ndim, subset=None):
36463656
def _wrap_generic_output(self, result, obj):
36473657
result_index = self.grouper.levels[0]
36483658

3649-
if result:
3650-
if self.axis == 0:
3651-
result = DataFrame(result, index=obj.columns,
3652-
columns=result_index).T
3653-
else:
3654-
result = DataFrame(result, index=obj.index,
3655-
columns=result_index)
3659+
if self.axis == 0:
3660+
return DataFrame(result, index=obj.columns,
3661+
columns=result_index).T
36563662
else:
3657-
result = DataFrame(result)
3658-
3659-
return result
3663+
return DataFrame(result, index=obj.index,
3664+
columns=result_index)
36603665

36613666
def _get_data_to_aggregate(self):
36623667
obj = self._obj_with_exclusions

pandas/tests/indexing/test_indexing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4250,7 +4250,8 @@ def test_series_partial_set_period(self):
42504250
pd.Period('2011-01-03', freq='D')]
42514251
exp = Series([np.nan, 0.2, np.nan],
42524252
index=pd.PeriodIndex(keys, name='idx'), name='s')
4253-
assert_series_equal(ser.loc[keys], exp, check_index_type=True)
4253+
result = ser.loc[keys]
4254+
assert_series_equal(result, exp)
42544255

42554256
def test_partial_set_invalid(self):
42564257

pandas/tests/test_groupby.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -775,11 +775,11 @@ def test_agg_apply_corner(self):
775775
# DataFrame
776776
grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan)
777777
exp_df = DataFrame(columns=self.tsframe.columns, dtype=float,
778-
index=pd.Index(
779-
[], dtype=np.float64))
778+
index=pd.Index([], dtype=np.float64))
780779
assert_frame_equal(grouped.sum(), exp_df, check_names=False)
781780
assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False)
782-
assert_frame_equal(grouped.apply(np.sum), DataFrame({}, dtype=float))
781+
assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0],
782+
check_names=False)
783783

784784
def test_agg_grouping_is_list_tuple(self):
785785
from pandas.core.groupby import Grouping

pandas/tseries/period.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
from pandas.core.base import _shared_docs
1818

1919
import pandas.core.common as com
20-
from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box,
21-
_values_from_object, ABCSeries,
22-
is_integer, is_float, is_object_dtype)
20+
from pandas.core.common import (
21+
isnull, _INT64_DTYPE, _maybe_box, _values_from_object, ABCSeries,
22+
is_integer, is_float)
2323
from pandas import compat
2424
from pandas.compat.numpy import function as nv
2525
from pandas.util.decorators import Appender, cache_readonly, Substitution
@@ -271,10 +271,15 @@ def _from_arraylike(cls, data, freq, tz):
271271

272272
@classmethod
273273
def _simple_new(cls, values, name=None, freq=None, **kwargs):
274-
if not getattr(values, 'dtype', None):
274+
275+
if not com.is_integer_dtype(values):
275276
values = np.array(values, copy=False)
276-
if is_object_dtype(values):
277-
return PeriodIndex(values, name=name, freq=freq, **kwargs)
277+
if (len(values) > 0 and com.is_float_dtype(values)):
278+
raise TypeError("PeriodIndex can't take floats")
279+
else:
280+
return PeriodIndex(values, name=name, freq=freq, **kwargs)
281+
282+
values = np.array(values, dtype='int64', copy=False)
278283

279284
result = object.__new__(cls)
280285
result._data = values

pandas/tseries/resample.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pandas.compat.numpy import function as nv
2222

2323
from pandas.lib import Timestamp
24+
from pandas._period import IncompatibleFrequency
2425
import pandas.lib as lib
2526
import pandas.tslib as tslib
2627

@@ -795,27 +796,27 @@ def _downsample(self, how, **kwargs):
795796
ax = self.ax
796797

797798
new_index = self._get_new_index()
798-
if len(new_index) == 0:
799-
return self._wrap_result(self._selected_obj.reindex(new_index))
800799

801800
# Start vs. end of period
802801
memb = ax.asfreq(self.freq, how=self.convention)
803802

804803
if is_subperiod(ax.freq, self.freq):
805804
# Downsampling
806-
rng = np.arange(memb.values[0], memb.values[-1] + 1)
807-
bins = memb.searchsorted(rng, side='right')
805+
if len(new_index) == 0:
806+
bins = []
807+
else:
808+
rng = np.arange(memb.values[0], memb.values[-1] + 1)
809+
bins = memb.searchsorted(rng, side='right')
808810
grouper = BinGrouper(bins, new_index)
809811
return self._groupby_and_aggregate(how, grouper=grouper)
810812
elif is_superperiod(ax.freq, self.freq):
811813
return self.asfreq()
812814
elif ax.freq == self.freq:
813815
return self.asfreq()
814816

815-
raise ValueError('Frequency {axfreq} cannot be '
816-
'resampled to {freq}'.format(
817-
axfreq=ax.freq,
818-
freq=self.freq))
817+
raise IncompatibleFrequency(
818+
'Frequency {} cannot be resampled to {}, as they are not '
819+
'sub or super periods'.format(ax.freq, self.freq))
819820

820821
def _upsample(self, method, limit=None):
821822
"""
@@ -838,9 +839,6 @@ def _upsample(self, method, limit=None):
838839
obj = self.obj
839840
new_index = self._get_new_index()
840841

841-
if len(new_index) == 0:
842-
return self._wrap_result(self._selected_obj.reindex(new_index))
843-
844842
# Start vs. end of period
845843
memb = ax.asfreq(self.freq, how=self.convention)
846844

pandas/tseries/tests/test_period.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -1742,13 +1742,45 @@ def test_constructor_datetime64arr(self):
17421742
self.assertRaises(ValueError, PeriodIndex, vals, freq='D')
17431743

17441744
def test_constructor_simple_new(self):
1745-
idx = period_range('2007-01', name='p', periods=20, freq='M')
1745+
idx = period_range('2007-01', name='p', periods=2, freq='M')
17461746
result = idx._simple_new(idx, 'p', freq=idx.freq)
17471747
self.assertTrue(result.equals(idx))
17481748

17491749
result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq)
17501750
self.assertTrue(result.equals(idx))
17511751

1752+
result = idx._simple_new(
1753+
[pd.Period('2007-01', freq='M'), pd.Period('2007-02', freq='M')],
1754+
'p', freq=idx.freq)
1755+
self.assertTrue(result.equals(idx))
1756+
1757+
result = idx._simple_new(
1758+
np.array([pd.Period('2007-01', freq='M'),
1759+
pd.Period('2007-02', freq='M')]),
1760+
'p', freq=idx.freq)
1761+
self.assertTrue(result.equals(idx))
1762+
1763+
def test_constructor_simple_new_empty(self):
1764+
# GH13079
1765+
idx = PeriodIndex([], freq='M', name='p')
1766+
result = idx._simple_new(idx, name='p', freq='M')
1767+
assert_index_equal(result, idx)
1768+
1769+
def test_constructor_simple_new_floats(self):
1770+
# GH13079
1771+
for floats in [[1.1], np.array([1.1])]:
1772+
with self.assertRaises(TypeError):
1773+
pd.PeriodIndex._simple_new(floats, freq='M')
1774+
1775+
def test_shallow_copy_empty(self):
1776+
1777+
# GH13067
1778+
idx = PeriodIndex([], freq='M')
1779+
result = idx._shallow_copy()
1780+
expected = idx
1781+
1782+
assert_index_equal(result, expected)
1783+
17521784
def test_constructor_nat(self):
17531785
self.assertRaises(ValueError, period_range, start='NaT',
17541786
end='2011-01-01', freq='M')

0 commit comments

Comments
 (0)