Skip to content

TST: Add more period tests #12549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 58 additions & 16 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3993,11 +3993,13 @@ def test_groupby_groups_datetimeindex_tz(self):
df['datetime'] = df['datetime'].apply(
lambda d: Timestamp(d, tz='US/Pacific'))

exp_idx1 = pd.DatetimeIndex(
['2011-07-19 07:00:00', '2011-07-19 07:00:00',
'2011-07-19 08:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00', '2011-07-19 09:00:00'],
tz='US/Pacific', name='datetime')
exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00',
'2011-07-19 07:00:00',
'2011-07-19 08:00:00',
'2011-07-19 08:00:00',
'2011-07-19 09:00:00',
'2011-07-19 09:00:00'],
tz='US/Pacific', name='datetime')
exp_idx2 = Index(['a', 'b'] * 3, name='label')
exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
Expand All @@ -4013,9 +4015,9 @@ def test_groupby_groups_datetimeindex_tz(self):
'value2': [1, 2, 3, 1, 2, 3]},
index=didx)

exp_idx = pd.DatetimeIndex(
['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00'], tz='Asia/Tokyo')
exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00',
'2011-07-19 08:00:00',
'2011-07-19 09:00:00'], tz='Asia/Tokyo')
expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
index=exp_idx, columns=['value1', 'value2'])

Expand All @@ -4032,8 +4034,8 @@ def test_groupby_multi_timezone(self):
3,2000-01-31 16:50:00,America/Chicago
4,2000-01-01 16:50:00,America/New_York"""

df = pd.read_csv(
StringIO(data), header=None, names=['value', 'date', 'tz'])
df = pd.read_csv(StringIO(data), header=None,
names=['value', 'date', 'tz'])
result = df.groupby('tz').date.apply(
lambda x: pd.to_datetime(x).dt.tz_localize(x.name))

Expand All @@ -4051,14 +4053,54 @@ def test_groupby_multi_timezone(self):
assert_series_equal(result, expected)

tz = 'America/Chicago'
result = pd.to_datetime(df.groupby('tz').date.get_group(
tz)).dt.tz_localize(tz)
expected = pd.to_datetime(Series(
['2000-01-28 16:47:00', '2000-01-29 16:48:00',
'2000-01-31 16:50:00'], index=[0, 1, 3
], name='date')).dt.tz_localize(tz)
res_values = df.groupby('tz').date.get_group(tz)
result = pd.to_datetime(res_values).dt.tz_localize(tz)
exp_values = Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00',
'2000-01-31 16:50:00'],
index=[0, 1, 3], name='date')
expected = pd.to_datetime(exp_values).dt.tz_localize(tz)
assert_series_equal(result, expected)

def test_groupby_groups_periods(self):
dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
'2011-07-19 08:00:00', '2011-07-19 09:00:00']
df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'],
'period': [pd.Period(d, freq='H') for d in dates],
'value1': np.arange(6, dtype='int64'),
'value2': [1, 2] * 3})

exp_idx1 = pd.PeriodIndex(['2011-07-19 07:00:00',
'2011-07-19 07:00:00',
'2011-07-19 08:00:00',
'2011-07-19 08:00:00',
'2011-07-19 09:00:00',
'2011-07-19 09:00:00'],
freq='H', name='period')
exp_idx2 = Index(['a', 'b'] * 3, name='label')
exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5],
'value2': [1, 2, 2, 1, 1, 2]},
index=exp_idx, columns=['value1', 'value2'])

result = df.groupby(['period', 'label']).sum()
assert_frame_equal(result, expected)

# by level
didx = pd.PeriodIndex(dates, freq='H')
df = DataFrame({'value1': np.arange(6, dtype='int64'),
'value2': [1, 2, 3, 1, 2, 3]},
index=didx)

exp_idx = pd.PeriodIndex(['2011-07-19 07:00:00',
'2011-07-19 08:00:00',
'2011-07-19 09:00:00'], freq='H')
expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]},
index=exp_idx, columns=['value1', 'value2'])

result = df.groupby(level=0).sum()
assert_frame_equal(result, expected)

def test_groupby_reindex_inside_function(self):
from pandas.tseries.api import DatetimeIndex

Expand Down
63 changes: 63 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,36 @@ def test_merge_on_datetime64tz(self):
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

def test_merge_on_periods(self):
left = pd.DataFrame({'key': pd.period_range('20151010', periods=2,
freq='D'),
'value': [1, 2]})
right = pd.DataFrame({'key': pd.period_range('20151011', periods=3,
freq='D'),
'value': [1, 2, 3]})

expected = DataFrame({'key': pd.period_range('20151010', periods=4,
freq='D'),
'value_x': [1, 2, np.nan, np.nan],
'value_y': [np.nan, 1, 2, 3]})
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

left = pd.DataFrame({'value': pd.period_range('20151010', periods=2,
freq='D'),
'key': [1, 2]})
right = pd.DataFrame({'value': pd.period_range('20151011', periods=2,
freq='D'),
'key': [2, 3]})

exp_x = pd.period_range('20151010', periods=2, freq='D')
exp_y = pd.period_range('20151011', periods=2, freq='D')
expected = DataFrame({'value_x': list(exp_x) + [pd.NaT],
'value_y': [pd.NaT] + list(exp_y),
'key': [1., 2, 3]})
result = pd.merge(left, right, on='key', how='outer')
assert_frame_equal(result, expected)

def test_concat_NaT_series(self):
# GH 11693
# test for merging NaT series with datetime series.
Expand Down Expand Up @@ -1131,6 +1161,39 @@ def test_concat_tz_series(self):
result = pd.concat([first, second])
self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]')

def test_concat_period_series(self):
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# different freq
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M'))
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M'))
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

# non-period
x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01']))
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
y = Series(['A', 'B'])
expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
df1 = DataFrame({'col1': [0, 1], 'col_left': [
Expand Down
33 changes: 33 additions & 0 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,39 @@ def test_pivot_with_tz(self):
pv = df.pivot(index='dt1', columns='dt2', values='data1')
tm.assert_frame_equal(pv, expected)

def test_pivot_periods(self):
df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
pd.Period('2013-01-02', 'D'),
pd.Period('2013-01-01', 'D'),
pd.Period('2013-01-02', 'D')],
'p2': [pd.Period('2013-01', 'M'),
pd.Period('2013-01', 'M'),
pd.Period('2013-02', 'M'),
pd.Period('2013-02', 'M')],
'data1': np.arange(4, dtype='int64'),
'data2': np.arange(4, dtype='int64')})

exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
exp_col2 = pd.PeriodIndex(['2013-01', '2013-02'] * 2,
name='p2', freq='M')
exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
name='p1', freq='D'),
columns=exp_col)

pv = df.pivot(index='p1', columns='p2')
tm.assert_frame_equal(pv, expected)

expected = DataFrame([[0, 2], [1, 3]],
index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
name='p1', freq='D'),
columns=pd.PeriodIndex(['2013-01', '2013-02'],
name='p2', freq='M'))

pv = df.pivot(index='p1', columns='p2', values='data1')
tm.assert_frame_equal(pv, expected)

def test_margins(self):
def _check_output(result, values_col, index=['A', 'B'],
columns=['C'],
Expand Down
148 changes: 148 additions & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -2877,6 +2877,17 @@ def test_union(self):
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
self.assertRaises(ValueError, index.join, index3)

def test_union_dataframe_index(self):
rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M')
s1 = pd.Series(np.random.randn(len(rng1)), rng1)

rng2 = pd.period_range('1/1/1980', '12/1/2001', freq='M')
s2 = pd.Series(np.random.randn(len(rng2)), rng2)
df = pd.DataFrame({'s1': s1, 's2': s2})

exp = pd.period_range('1/1/1980', '1/1/2012', freq='M')
self.assert_index_equal(df.index, exp)

def test_intersection(self):
index = period_range('1/1/2000', '1/20/2000', freq='D')

Expand All @@ -2897,6 +2908,63 @@ def test_intersection(self):
index3 = period_range('1/1/2000', '1/20/2000', freq='2D')
self.assertRaises(ValueError, index.intersection, index3)

def test_intersection_cases(self):
base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx')

# if target has the same name, it is preserved
rng2 = period_range('5/15/2000', '6/20/2000', freq='D', name='idx')
expected2 = period_range('6/1/2000', '6/20/2000', freq='D',
name='idx')

# if target name is different, it will be reset
rng3 = period_range('5/15/2000', '6/20/2000', freq='D', name='other')
expected3 = period_range('6/1/2000', '6/20/2000', freq='D',
name=None)

rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
expected4 = PeriodIndex([], name='idx', freq='D')

for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng)
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)
self.assertEqual(result.freq, expected.freq)

# non-monotonic
base = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-02',
'2011-01-03'], freq='D', name='idx')

rng2 = PeriodIndex(['2011-01-04', '2011-01-02',
'2011-02-02', '2011-02-03'],
freq='D', name='idx')
expected2 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
name='idx')

rng3 = PeriodIndex(['2011-01-04', '2011-01-02', '2011-02-02',
'2011-02-03'],
freq='D', name='other')
expected3 = PeriodIndex(['2011-01-04', '2011-01-02'], freq='D',
name=None)

rng4 = period_range('7/1/2000', '7/31/2000', freq='D', name='idx')
expected4 = PeriodIndex([], freq='D', name='idx')

for (rng, expected) in [(rng2, expected2), (rng3, expected3),
(rng4, expected4)]:
result = base.intersection(rng)
self.assertTrue(result.equals(expected))
self.assertEqual(result.name, expected.name)
self.assertEqual(result.freq, 'D')

# empty same freq
rng = date_range('6/1/2000', '6/15/2000', freq='T')
result = rng[0:0].intersection(rng)
self.assertEqual(len(result), 0)

result = rng.intersection(rng[0:0])
self.assertEqual(len(result), 0)

def test_fields(self):
# year, month, day, hour, minute
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
Expand Down Expand Up @@ -3734,6 +3802,86 @@ def test_pi_nat_comp(self):
idx1 == diff


class TestSeriesPeriod(tm.TestCase):

def setUp(self):
self.series = Series(period_range('2000-01-01', periods=10, freq='D'))

def test_auto_conversion(self):
series = Series(list(period_range('2000-01-01', periods=10, freq='D')))
self.assertEqual(series.dtype, 'object')

def test_constructor_cant_cast_period(self):
with tm.assertRaises(TypeError):
Series(period_range('2000-01-01', periods=10, freq='D'),
dtype=float)

def test_series_comparison_scalars(self):
val = pd.Period('2000-01-04', freq='D')
result = self.series > val
expected = np.array([x > val for x in self.series])
self.assert_numpy_array_equal(result, expected)

val = self.series[5]
result = self.series > val
expected = np.array([x > val for x in self.series])
self.assert_numpy_array_equal(result, expected)

def test_between(self):
left, right = self.series[[2, 7]]
result = self.series.between(left, right)
expected = (self.series >= left) & (self.series <= right)
assert_series_equal(result, expected)

# ---------------------------------------------------------------------
# NaT support

"""
# ToDo: Enable when support period dtype
def test_NaT_scalar(self):
series = Series([0, 1000, 2000, iNaT], dtype='period[D]')

val = series[3]
self.assertTrue(com.isnull(val))

series[2] = val
self.assertTrue(com.isnull(series[2]))

def test_NaT_cast(self):
result = Series([np.nan]).astype('period[D]')
expected = Series([NaT])
assert_series_equal(result, expected)
"""

def test_set_none_nan(self):
# currently Period is stored as object dtype, not as NaT
self.series[3] = None
self.assertIs(self.series[3], None)

self.series[3:5] = None
self.assertIs(self.series[4], None)

self.series[5] = np.nan
self.assertTrue(np.isnan(self.series[5]))

self.series[5:7] = np.nan
self.assertTrue(np.isnan(self.series[6]))

def test_intercept_astype_object(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sinhrks im trying to decipher what this test is aimed at. ring any bells?

expected = self.series.astype('object')

df = DataFrame({'a': self.series,
'b': np.random.randn(len(self.series))})

result = df.values.squeeze()
self.assertTrue((result[:, 0] == expected.values).all())

df = DataFrame({'a': self.series, 'b': ['foo'] * len(self.series)})

result = df.values.squeeze()
self.assertTrue((result[:, 0] == expected.values).all())


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
Loading