Skip to content

Commit 0b1e1c9

Browse files
sinhrksjreback
authored andcommitted
TST: Add more period tests
- Added some ``Period`` related tests in preparation to add period dtype. - Moved some tests ``test_timeseries`` to correct test class. Author: sinhrks <[email protected]> Closes #12549 from sinhrks/period_test and squashes the following commits: 883a4cf [sinhrks] TST: Add more period tests
1 parent a58ad4f commit 0b1e1c9

File tree

5 files changed

+436
-148
lines changed

5 files changed

+436
-148
lines changed

pandas/tests/test_groupby.py

+58-16
Original file line numberDiff line numberDiff line change
@@ -3993,11 +3993,13 @@ def test_groupby_groups_datetimeindex_tz(self):
39933993
df['datetime'] = df['datetime'].apply(
39943994
lambda d: Timestamp(d, tz='US/Pacific'))
39953995

3996-
exp_idx1 = pd.DatetimeIndex(
3997-
['2011-07-19 07:00:00', '2011-07-19 07:00:00',
3998-
'2011-07-19 08:00:00', '2011-07-19 08:00:00',
3999-
'2011-07-19 09:00:00', '2011-07-19 09:00:00'],
4000-
tz='US/Pacific', name='datetime')
3996+
exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00',
3997+
'2011-07-19 07:00:00',
3998+
'2011-07-19 08:00:00',
3999+
'2011-07-19 08:00:00',
4000+
'2011-07-19 09:00:00',
4001+
'2011-07-19 09:00:00'],
4002+
tz='US/Pacific', name='datetime')
40014003
exp_idx2 = Index(['a', 'b'] * 3, name='label')
40024004
exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
40034005
expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
@@ -4013,9 +4015,9 @@ def test_groupby_groups_datetimeindex_tz(self):
40134015
'value2': [1, 2, 3, 1, 2, 3]},
40144016
index=didx)
40154017

4016-
exp_idx = pd.DatetimeIndex(
4017-
['2011-07-19 07:00:00', '2011-07-19 08:00:00',
4018-
'2011-07-19 09:00:00'], tz='Asia/Tokyo')
4018+
exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00',
4019+
'2011-07-19 08:00:00',
4020+
'2011-07-19 09:00:00'], tz='Asia/Tokyo')
40194021
expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
40204022
index=exp_idx, columns=['value1', 'value2'])
40214023

@@ -4032,8 +4034,8 @@ def test_groupby_multi_timezone(self):
40324034
3,2000-01-31 16:50:00,America/Chicago
40334035
4,2000-01-01 16:50:00,America/New_York"""
40344036

4035-
df = pd.read_csv(
4036-
StringIO(data), header=None, names=['value', 'date', 'tz'])
4037+
df = pd.read_csv(StringIO(data), header=None,
4038+
names=['value', 'date', 'tz'])
40374039
result = df.groupby('tz').date.apply(
40384040
lambda x: pd.to_datetime(x).dt.tz_localize(x.name))
40394041

@@ -4051,14 +4053,54 @@ def test_groupby_multi_timezone(self):
40514053
assert_series_equal(result, expected)
40524054

40534055
tz = 'America/Chicago'
4054-
result = pd.to_datetime(df.groupby('tz').date.get_group(
4055-
tz)).dt.tz_localize(tz)
4056-
expected = pd.to_datetime(Series(
4057-
['2000-01-28 16:47:00', '2000-01-29 16:48:00',
4058-
'2000-01-31 16:50:00'], index=[0, 1, 3
4059-
], name='date')).dt.tz_localize(tz)
4056+
res_values = df.groupby('tz').date.get_group(tz)
4057+
result = pd.to_datetime(res_values).dt.tz_localize(tz)
4058+
exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00',
4059+
'2000-01-31 16:50:00'],
4060+
index=[0, 1, 3], name='date')
4061+
expected = pd.to_datetime(exp_values).dt.tz_localize(tz)
40604062
assert_series_equal(result, expected)
40614063

4064+
def test_groupby_groups_periods(self):
4065+
dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
4066+
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
4067+
'2011-07-19 08:00:00', '2011-07-19 09:00:00']
4068+
df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'],
4069+
'period': [pd.Period(d, freq='H') for d in dates],
4070+
'value1': np.arange(6, dtype='int64'),
4071+
'value2': [1, 2] * 3})
4072+
4073+
exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00',
4074+
'2011-07-19 07:00:00',
4075+
'2011-07-19 08:00:00',
4076+
'2011-07-19 08:00:00',
4077+
'2011-07-19 09:00:00',
4078+
'2011-07-19 09:00:00'],
4079+
freq='H', name='period')
4080+
exp_idx2 = Index(['a', 'b'] * 3, name='label')
4081+
exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
4082+
expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
4083+
'value2': [1, 2, 2, 1, 1, 2]},
4084+
index=exp_idx, columns=['value1', 'value2'])
4085+
4086+
result = df.groupby(['period', 'label']).sum()
4087+
assert_frame_equal(result, expected)
4088+
4089+
# by level
4090+
didx = pd.PeriodIndex(dates, freq='H')
4091+
df = DataFrame({'value1': np.arange(6, dtype='int64'),
4092+
'value2': [1, 2, 3, 1, 2, 3]},
4093+
index=didx)
4094+
4095+
exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00',
4096+
'2011-07-19 08:00:00',
4097+
'2011-07-19 09:00:00'], freq='H')
4098+
expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
4099+
index=exp_idx, columns=['value1', 'value2'])
4100+
4101+
result = df.groupby(level=0).sum()
4102+
assert_frame_equal(result, expected)
4103+
40624104
def test_groupby_reindex_inside_function(self):
40634105
from pandas.tseries.api import DatetimeIndex
40644106

pandas/tools/tests/test_merge.py

+63
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,36 @@ def test_merge_on_datetime64tz(self):
10311031
result = pd.merge(left, right, on='key', how='outer')
10321032
assert_frame_equal(result, expected)
10331033

1034+
def test_merge_on_periods(self):
1035+
left = pd.DataFrame({'key': pd.period_range('20151010', periods=2,
1036+
freq='D'),
1037+
'value': [1, 2]})
1038+
right = pd.DataFrame({'key': pd.period_range('20151011', periods=3,
1039+
freq='D'),
1040+
'value': [1, 2, 3]})
1041+
1042+
expected = DataFrame({'key': pd.period_range('20151010', periods=4,
1043+
freq='D'),
1044+
'value_x': [1, 2, np.nan, np.nan],
1045+
'value_y': [np.nan, 1, 2, 3]})
1046+
result = pd.merge(left, right, on='key', how='outer')
1047+
assert_frame_equal(result, expected)
1048+
1049+
left = pd.DataFrame({'value': pd.period_range('20151010', periods=2,
1050+
freq='D'),
1051+
'key': [1, 2]})
1052+
right = pd.DataFrame({'value': pd.period_range('20151011', periods=2,
1053+
freq='D'),
1054+
'key': [2, 3]})
1055+
1056+
exp_x = pd.period_range('20151010', periods=2, freq='D')
1057+
exp_y = pd.period_range('20151011', periods=2, freq='D')
1058+
expected = DataFrame({'value_x': list(exp_x) + [pd.NaT],
1059+
'value_y': [pd.NaT] + list(exp_y),
1060+
'key': [1., 2, 3]})
1061+
result = pd.merge(left, right, on='key', how='outer')
1062+
assert_frame_equal(result, expected)
1063+
10341064
def test_concat_NaT_series(self):
10351065
# GH 11693
10361066
# test for merging NaT series with datetime series.
@@ -1131,6 +1161,39 @@ def test_concat_tz_series(self):
11311161
result = pd.concat([first, second])
11321162
self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]')
11331163

1164+
def test_concat_period_series(self):
1165+
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
1166+
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
1167+
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
1168+
result = concat([x, y], ignore_index=True)
1169+
tm.assert_series_equal(result, expected)
1170+
1171+
# different freq
1172+
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
1173+
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M'))
1174+
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
1175+
result = concat([x, y], ignore_index=True)
1176+
tm.assert_series_equal(result, expected)
1177+
1178+
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
1179+
y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M'))
1180+
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
1181+
result = concat([x, y], ignore_index=True)
1182+
tm.assert_series_equal(result, expected)
1183+
1184+
# non-period
1185+
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
1186+
y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01']))
1187+
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
1188+
result = concat([x, y], ignore_index=True)
1189+
tm.assert_series_equal(result, expected)
1190+
1191+
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
1192+
y = Series(['A', 'B'])
1193+
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
1194+
result = concat([x, y], ignore_index=True)
1195+
tm.assert_series_equal(result, expected)
1196+
11341197
def test_indicator(self):
11351198
# PR #10054. xref #7412 and closes #8790.
11361199
df1 = DataFrame({'col1': [0, 1], 'col_left': [

pandas/tools/tests/test_pivot.py

+33
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,39 @@ def test_pivot_with_tz(self):
240240
pv = df.pivot(index='dt1', columns='dt2', values='data1')
241241
tm.assert_frame_equal(pv, expected)
242242

243+
def test_pivot_periods(self):
244+
df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
245+
pd.Period('2013-01-02', 'D'),
246+
pd.Period('2013-01-01', 'D'),
247+
pd.Period('2013-01-02', 'D')],
248+
'p2': [pd.Period('2013-01', 'M'),
249+
pd.Period('2013-01', 'M'),
250+
pd.Period('2013-02', 'M'),
251+
pd.Period('2013-02', 'M')],
252+
'data1': np.arange(4, dtype='int64'),
253+
'data2': np.arange(4, dtype='int64')})
254+
255+
exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
256+
exp_col2 = pd.PeriodIndex(['2013-01', '2013-02'] * 2,
257+
name='p2', freq='M')
258+
exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
259+
expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
260+
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
261+
name='p1', freq='D'),
262+
columns=exp_col)
263+
264+
pv = df.pivot(index='p1', columns='p2')
265+
tm.assert_frame_equal(pv, expected)
266+
267+
expected = DataFrame([[0, 2], [1, 3]],
268+
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
269+
name='p1', freq='D'),
270+
columns=pd.PeriodIndex(['2013-01', '2013-02'],
271+
name='p2', freq='M'))
272+
273+
pv = df.pivot(index='p1', columns='p2', values='data1')
274+
tm.assert_frame_equal(pv, expected)
275+
243276
def test_margins(self):
244277
def _check_output(result, values_col, index=['A', 'B'],
245278
columns=['C'],

pandas/tseries/tests/test_period.py

+148
Original file line numberDiff line numberDiff line change
@@ -2877,6 +2877,17 @@ def test_union(self):
28772877
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
28782878
self.assertRaises(ValueError, index.join, index3)
28792879

2880+
def test_union_dataframe_index(self):
2881+
rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M')
2882+
s1 = pd.Series(np.random.randn(len(rng1)), rng1)
2883+
2884+
rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M')
2885+
s2 = pd.Series(np.random.randn(len(rng2)), rng2)
2886+
df = pd.DataFrame({'s1': s1, 's2': s2})
2887+
2888+
exp = pd.period_range('1/1/1980', '1/1/2012', freq='M')
2889+
self.assert_index_equal(df.index, exp)
2890+
28802891
def test_intersection(self):
28812892
index = period_range('1/1/2000', '1/20/2000', freq='D')
28822893

@@ -2897,6 +2908,63 @@ def test_intersection(self):
28972908
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
28982909
self.assertRaises(ValueError, index.intersection, index3)
28992910

2911+
def test_intersection_cases(self):
2912+
base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx')
2913+
2914+
# if target has the same name, it is preserved
2915+
rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx')
2916+
expected2 = period_range('6/1/2000', '6/20/2000', freq='D',
2917+
name='idx')
2918+
2919+
# if target name is different, it will be reset
2920+
rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other')
2921+
expected3 = period_range('6/1/2000', '6/20/2000', freq='D',
2922+
name=None)
2923+
2924+
rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
2925+
expected4 = PeriodIndex([], name='idx', freq='D')
2926+
2927+
for (rng, expected) in [(rng2, expected2), (rng3, expected3),
2928+
(rng4, expected4)]:
2929+
result = base.intersection(rng)
2930+
self.assertTrue(result.equals(expected))
2931+
self.assertEqual(result.name, expected.name)
2932+
self.assertEqual(result.freq, expected.freq)
2933+
2934+
# non-monotonic
2935+
base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02',
2936+
'2011-01-03'], freq='D', name='idx')
2937+
2938+
rng2 = PeriodIndex(['2011-01-04', '2011-01-02',
2939+
'2011-02-02', '2011-02-03'],
2940+
freq='D', name='idx')
2941+
expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
2942+
name='idx')
2943+
2944+
rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02',
2945+
'2011-02-03'],
2946+
freq='D', name='other')
2947+
expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
2948+
name=None)
2949+
2950+
rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
2951+
expected4 = PeriodIndex([], freq='D', name='idx')
2952+
2953+
for (rng, expected) in [(rng2, expected2), (rng3, expected3),
2954+
(rng4, expected4)]:
2955+
result = base.intersection(rng)
2956+
self.assertTrue(result.equals(expected))
2957+
self.assertEqual(result.name, expected.name)
2958+
self.assertEqual(result.freq, 'D')
2959+
2960+
# empty same freq
2961+
rng = date_range('6/1/2000', '6/15/2000', freq='T')
2962+
result = rng[0:0].intersection(rng)
2963+
self.assertEqual(len(result), 0)
2964+
2965+
result = rng.intersection(rng[0:0])
2966+
self.assertEqual(len(result), 0)
2967+
29002968
def test_fields(self):
29012969
# year, month, day, hour, minute
29022970
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
@@ -3734,6 +3802,86 @@ def test_pi_nat_comp(self):
37343802
idx1 == diff
37353803

37363804

3805+
class TestSeriesPeriod(tm.TestCase):
3806+
3807+
def setUp(self):
3808+
self.series = Series(period_range('2000-01-01', periods=10, freq='D'))
3809+
3810+
def test_auto_conversion(self):
3811+
series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
3812+
self.assertEqual(series.dtype, 'object')
3813+
3814+
def test_constructor_cant_cast_period(self):
3815+
with tm.assertRaises(TypeError):
3816+
Series(period_range('2000-01-01', periods=10, freq='D'),
3817+
dtype=float)
3818+
3819+
def test_series_comparison_scalars(self):
3820+
val = pd.Period('2000-01-04', freq='D')
3821+
result = self.series > val
3822+
expected = np.array([x > val for x in self.series])
3823+
self.assert_numpy_array_equal(result, expected)
3824+
3825+
val = self.series[5]
3826+
result = self.series > val
3827+
expected = np.array([x > val for x in self.series])
3828+
self.assert_numpy_array_equal(result, expected)
3829+
3830+
def test_between(self):
3831+
left, right = self.series[[2, 7]]
3832+
result = self.series.between(left, right)
3833+
expected = (self.series >= left) & (self.series <= right)
3834+
assert_series_equal(result, expected)
3835+
3836+
# ---------------------------------------------------------------------
3837+
# NaT support
3838+
3839+
"""
3840+
# ToDo: Enable when support period dtype
3841+
def test_NaT_scalar(self):
3842+
series = Series([0, 1000, 2000, iNaT], dtype='period[D]')
3843+
3844+
val = series[3]
3845+
self.assertTrue(com.isnull(val))
3846+
3847+
series[2] = val
3848+
self.assertTrue(com.isnull(series[2]))
3849+
3850+
def test_NaT_cast(self):
3851+
result = Series([np.nan]).astype('period[D]')
3852+
expected = Series([NaT])
3853+
assert_series_equal(result, expected)
3854+
"""
3855+
3856+
def test_set_none_nan(self):
3857+
# currently Period is stored as object dtype, not as NaT
3858+
self.series[3] = None
3859+
self.assertIs(self.series[3], None)
3860+
3861+
self.series[3:5] = None
3862+
self.assertIs(self.series[4], None)
3863+
3864+
self.series[5] = np.nan
3865+
self.assertTrue(np.isnan(self.series[5]))
3866+
3867+
self.series[5:7] = np.nan
3868+
self.assertTrue(np.isnan(self.series[6]))
3869+
3870+
def test_intercept_astype_object(self):
3871+
expected = self.series.astype('object')
3872+
3873+
df = DataFrame({'a': self.series,
3874+
'b': np.random.randn(len(self.series))})
3875+
3876+
result = df.values.squeeze()
3877+
self.assertTrue((result[:, 0] == expected.values).all())
3878+
3879+
df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})
3880+
3881+
result = df.values.squeeze()
3882+
self.assertTrue((result[:, 0] == expected.values).all())
3883+
3884+
37373885
if __name__ == '__main__':
37383886
import nose
37393887
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)