diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 907ca6f185e0a..b4656f23021dd 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -176,4 +176,10 @@ Bug Fixes - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) - Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` +appropriately when empty (:issue:`13067`) +- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some +circumstances (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` +appropriately when empty (:issue:`13212`) - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index bc02d8c49f3ae..bea62e98e4a2a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -37,7 +37,7 @@ is_datetime_or_timedelta_dtype, is_bool, is_bool_dtype, AbstractMethodError, _maybe_fill) -from pandas.core.config import option_context +from pandas.core.config import option_context, is_callable import pandas.lib as lib from pandas.lib import Timestamp import pandas.tslib as tslib @@ -643,9 +643,20 @@ def apply(self, func, *args, **kwargs): func = self._is_builtin_func(func) - @wraps(func) - def f(g): - return func(g, *args, **kwargs) + # this is needed so we don't try and wrap strings. If we could + # resolve functions to their callable functions prior, this + # wouldn't be needed + if args or kwargs: + if is_callable(func): + + @wraps(func) + def f(g): + return func(g, *args, **kwargs) + else: + raise ValueError('func must be a callable if args or ' + 'kwargs are supplied') + else: + f = func # ignore SettingWithCopy here in case the user mutates with option_context('mode.chained_assignment', None): @@ -2675,7 +2686,7 @@ def _wrap_transformed_output(self, output, names=None): def _wrap_applied_output(self, keys, values, not_indexed_same=False): if len(keys) == 0: # GH #6265 - return Series([], name=self.name) + return Series([], name=self.name, index=keys) def _get_index(): if self.grouper.nkeys > 1: @@ -3222,8 +3233,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): from pandas.core.index import _all_indexes_same if len(keys) == 0: - # XXX - return DataFrame({}) + return DataFrame(index=keys) key_names = self.grouper.names @@ -3646,17 +3656,12 @@ def _gotitem(self, key, ndim, subset=None): def _wrap_generic_output(self, result, obj): result_index = self.grouper.levels[0] - if result: - if self.axis == 0: - result = DataFrame(result, index=obj.columns, - columns=result_index).T - else: - result = DataFrame(result, index=obj.index, - columns=result_index) + if self.axis == 0: + return DataFrame(result, index=obj.columns, + columns=result_index).T else: - result = DataFrame(result) - - return result + return DataFrame(result, index=obj.index, + columns=result_index) def _get_data_to_aggregate(self): obj = self._obj_with_exclusions diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index fdc9d3599e8ac..708006a9dc21b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -4250,7 +4250,8 @@ def test_series_partial_set_period(self): pd.Period('2011-01-03', freq='D')] exp = Series([np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name='idx'), name='s') - assert_series_equal(ser.loc[keys], exp, check_index_type=True) + result = ser.loc[keys] + assert_series_equal(result, exp) def test_partial_set_invalid(self): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 9cb070c0cd926..5dfe88d04309e 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -775,11 +775,11 @@ def test_agg_apply_corner(self): # DataFrame grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan) exp_df = DataFrame(columns=self.tsframe.columns, dtype=float, - index=pd.Index( - [], dtype=np.float64)) + index=pd.Index([], dtype=np.float64)) assert_frame_equal(grouped.sum(), exp_df, check_names=False) assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) - assert_frame_equal(grouped.apply(np.sum), DataFrame({}, dtype=float)) + assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], + check_names=False) def test_agg_grouping_is_list_tuple(self): from pandas.core.groupby import Grouping diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index fb91185746181..b690bc23c2496 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -17,9 +17,9 @@ from pandas.core.base import _shared_docs import pandas.core.common as com -from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, - _values_from_object, ABCSeries, - is_integer, is_float, is_object_dtype) +from pandas.core.common import ( + isnull, _INT64_DTYPE, _maybe_box, _values_from_object, ABCSeries, + is_integer, is_float) from pandas import compat from pandas.compat.numpy import function as nv from pandas.util.decorators import Appender, cache_readonly, Substitution @@ -271,10 +271,15 @@ def _from_arraylike(cls, data, freq, tz): @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): - if not getattr(values, 'dtype', None): + + if not com.is_integer_dtype(values): values = np.array(values, copy=False) - if is_object_dtype(values): - return PeriodIndex(values, name=name, freq=freq, **kwargs) + if (len(values) > 0 and com.is_float_dtype(values)): + raise TypeError("PeriodIndex can't take floats") + else: + return PeriodIndex(values, name=name, freq=freq, **kwargs) + + values = np.array(values, dtype='int64', copy=False) result = object.__new__(cls) result._data = values diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index ac30db35c0f85..90ec5d19db590 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -21,6 +21,7 @@ from pandas.compat.numpy import function as nv from pandas.lib import Timestamp +from pandas._period import IncompatibleFrequency import pandas.lib as lib import pandas.tslib as tslib @@ -795,16 +796,17 @@ def _downsample(self, how, **kwargs): ax = self.ax new_index = self._get_new_index() - if len(new_index) == 0: - return self._wrap_result(self._selected_obj.reindex(new_index)) # Start vs. end of period memb = ax.asfreq(self.freq, how=self.convention) if is_subperiod(ax.freq, self.freq): # Downsampling - rng = np.arange(memb.values[0], memb.values[-1] + 1) - bins = memb.searchsorted(rng, side='right') + if len(new_index) == 0: + bins = [] + else: + rng = np.arange(memb.values[0], memb.values[-1] + 1) + bins = memb.searchsorted(rng, side='right') grouper = BinGrouper(bins, new_index) return self._groupby_and_aggregate(how, grouper=grouper) elif is_superperiod(ax.freq, self.freq): @@ -812,10 +814,9 @@ def _downsample(self, how, **kwargs): elif ax.freq == self.freq: return self.asfreq() - raise ValueError('Frequency {axfreq} cannot be ' - 'resampled to {freq}'.format( - axfreq=ax.freq, - freq=self.freq)) + raise IncompatibleFrequency( + 'Frequency {} cannot be resampled to {}, as they are not ' + 'sub or super periods'.format(ax.freq, self.freq)) def _upsample(self, method, limit=None): """ @@ -838,9 +839,6 @@ def _upsample(self, method, limit=None): obj = self.obj new_index = self._get_new_index() - if len(new_index) == 0: - return self._wrap_result(self._selected_obj.reindex(new_index)) - # Start vs. end of period memb = ax.asfreq(self.freq, how=self.convention) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index db1572a49a9ff..c5aae1f8ecebb 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1742,13 +1742,45 @@ def test_constructor_datetime64arr(self): self.assertRaises(ValueError, PeriodIndex, vals, freq='D') def test_constructor_simple_new(self): - idx = period_range('2007-01', name='p', periods=20, freq='M') + idx = period_range('2007-01', name='p', periods=2, freq='M') result = idx._simple_new(idx, 'p', freq=idx.freq) self.assertTrue(result.equals(idx)) result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) self.assertTrue(result.equals(idx)) + result = idx._simple_new( + [pd.Period('2007-01', freq='M'), pd.Period('2007-02', freq='M')], + 'p', freq=idx.freq) + self.assertTrue(result.equals(idx)) + + result = idx._simple_new( + np.array([pd.Period('2007-01', freq='M'), + pd.Period('2007-02', freq='M')]), + 'p', freq=idx.freq) + self.assertTrue(result.equals(idx)) + + def test_constructor_simple_new_empty(self): + # GH13079 + idx = PeriodIndex([], freq='M', name='p') + result = idx._simple_new(idx, name='p', freq='M') + assert_index_equal(result, idx) + + def test_constructor_simple_new_floats(self): + # GH13079 + for floats in [[1.1], np.array([1.1])]: + with self.assertRaises(TypeError): + pd.PeriodIndex._simple_new(floats, freq='M') + + def test_shallow_copy_empty(self): + + # GH13067 + idx = PeriodIndex([], freq='M') + result = idx._shallow_copy() + expected = idx + + assert_index_equal(result, expected) + def test_constructor_nat(self): self.assertRaises(ValueError, period_range, start='NaT', end='2011-01-01', freq='M') diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 27b15a412ae37..37b16684643be 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -17,15 +17,16 @@ UnsupportedFunctionCall) from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS +from pandas.tseries.frequencies import to_offset from pandas.tseries.index import date_range from pandas.tseries.offsets import Minute, BDay from pandas.tseries.period import period_range, PeriodIndex, Period from pandas.tseries.resample import (DatetimeIndex, TimeGrouper, DatetimeIndexResampler) -from pandas.tseries.frequencies import to_offset from pandas.tseries.tdi import timedelta_range from pandas.util.testing import (assert_series_equal, assert_almost_equal, - assert_frame_equal) + assert_frame_equal, assert_index_equal) +from pandas._period import IncompatibleFrequency bday = BDay() @@ -578,6 +579,7 @@ class Base(object): base class for resampling testing, calling .create_series() generates a series of each index type """ + def create_index(self, *args, **kwargs): """ return the _index_factory created using the args, kwargs """ factory = self._index_factory() @@ -620,6 +622,76 @@ def test_resample_interpolate(self): df.resample('1T').asfreq().interpolate(), df.resample('1T').interpolate()) + def test_raises_on_non_datetimelike_index(self): + # this is a non datetimelike index + xp = DataFrame() + self.assertRaises(TypeError, lambda: xp.resample('A').mean()) + + def test_resample_empty_series(self): + # GH12771 & GH12868 + + s = self.create_series()[:0] + + for freq in ['M', 'D', 'H']: + # need to test for ohlc from GH13083 + methods = [method for method in resample_methods + if method != 'ohlc'] + for method in methods: + expected_index = s.index._shallow_copy(freq=freq) + + result = getattr(s.resample(freq), method)() + expected = s + assert_index_equal(result.index, expected_index) + # freq equality not yet checked in assert_index_equal + self.assertEqual(result.index.freq, expected_index.freq) + if (method == 'size' and + isinstance(result.index, PeriodIndex) and + freq in ['M', 'D']): + # GH12871 - TODO: name should propagate, but currently + # doesn't on lower / same frequency with PeriodIndex + assert_series_equal(result, expected, check_dtype=False, + check_names=False) + # this assert will break when fixed + self.assertTrue(result.name is None) + else: + assert_series_equal(result, expected, check_dtype=False) + + def test_resample_empty_dataframe(self): + # GH13212 + index = self.create_series().index[:0] + f = DataFrame(index=index) + + for freq in ['M', 'D', 'H']: + # count retains dimensions too + methods = downsample_methods + ['count'] + for method in methods: + expected_index = f.index._shallow_copy(freq=freq) + result = getattr(f.resample(freq), method)() + expected = f + assert_index_equal(result.index, expected_index) + # freq equality not yet checked in assert_index_equal + # TODO: remove when freq checked + self.assertEqual(result.index.freq, expected_index.freq) + assert_frame_equal(result, expected, check_dtype=False) + + # test size for GH13212 (currently stays as df) + + def test_resample_empty_dtypes(self): + + # Empty series were sometimes causing a segfault (for the functions + # with Cython bounds-checking disabled) or an IndexError. We just run + # them to ensure they no longer do. (GH #10228) + for index in tm.all_timeseries_index_generator(0): + for dtype in (np.float, np.int, np.object, 'datetime64[ns]'): + for how in downsample_methods + upsample_methods: + empty_series = pd.Series([], index, dtype) + try: + getattr(empty_series.resample('d'), how)() + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object) + pass + class TestDatetimeIndex(Base, tm.TestCase): _multiprocess_can_split_ = True @@ -1408,39 +1480,6 @@ def test_period_with_agg(self): result = s2.resample('D').agg(lambda x: x.mean()) assert_series_equal(result, expected) - def test_resample_empty(self): - ts = _simple_ts('1/1/2000', '2/1/2000')[:0] - - result = ts.resample('A').mean() - self.assertEqual(len(result), 0) - self.assertEqual(result.index.freqstr, 'A-DEC') - - result = ts.resample('A', kind='period').mean() - self.assertEqual(len(result), 0) - self.assertEqual(result.index.freqstr, 'A-DEC') - - # this is a non datetimelike index - xp = DataFrame() - self.assertRaises(TypeError, lambda: xp.resample('A').mean()) - - # Empty series were sometimes causing a segfault (for the functions - # with Cython bounds-checking disabled) or an IndexError. We just run - # them to ensure they no longer do. (GH #10228) - for index in tm.all_timeseries_index_generator(0): - for dtype in (np.float, np.int, np.object, 'datetime64[ns]'): - for how in downsample_methods + upsample_methods: - empty_series = pd.Series([], index, dtype) - try: - getattr(empty_series.resample('d'), how)() - except DataError: - # Ignore these since some combinations are invalid - # (ex: doing mean with dtype of np.object) - pass - - # this should also tests nunique - # (IOW, use resample_methods) - # when GH12886 is closed - def test_resample_segfault(self): # GH 8573 # segfaulting in older versions @@ -2085,19 +2124,6 @@ def test_resample_basic(self): result2 = s.resample('T', kind='period').mean() assert_series_equal(result2, expected) - def test_resample_empty(self): - - # GH12771 & GH12868 - index = PeriodIndex(start='2000', periods=0, freq='D', name='idx') - s = Series(index=index) - - expected_index = PeriodIndex([], name='idx', freq='M') - expected = Series(index=expected_index) - - for method in resample_methods: - result = getattr(s.resample('M'), method)() - assert_series_equal(result, expected) - def test_resample_count(self): # GH12774 @@ -2121,6 +2147,12 @@ def test_resample_same_freq(self): result = getattr(series.resample('M'), method)() assert_series_equal(result, expected) + def test_resample_incompat_freq(self): + + with self.assertRaises(IncompatibleFrequency): + pd.Series(range(3), index=pd.period_range( + start='2000', periods=3, freq='M')).resample('W').mean() + def test_with_local_timezone_pytz(self): # GH5430 tm._skip_if_no_pytz() @@ -2482,7 +2514,6 @@ def create_series(self): return Series(np.arange(len(i)), index=i, name='tdi') def test_asfreq_bug(self): - import datetime as dt df = DataFrame(data=[1, 3], index=[dt.timedelta(), dt.timedelta(minutes=3)]) @@ -2495,7 +2526,6 @@ def test_asfreq_bug(self): class TestResamplerGrouper(tm.TestCase): - def setUp(self): self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, 'B': np.arange(40)}, @@ -2631,11 +2661,13 @@ def test_apply(self): def f(x): return x.resample('2s').sum() + result = r.apply(f) assert_frame_equal(result, expected) def f(x): return x.resample('2s').apply(lambda y: y.sum()) + result = g.apply(f) assert_frame_equal(result, expected)