diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b2161aa5c75c6..46d3ce96196aa 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,12 +1,20 @@ +import numpy as np from datetime import datetime, timedelta, time import pandas as pd import pandas.util.testing as tm -from pandas import date_range, offsets, DatetimeIndex, Timestamp from pandas import compat +from pandas.core import common as com +from pandas.util.testing import assertRaisesRegexp +from pandas.tseries.index import bdate_range, cdate_range +from pandas import date_range, offsets, DatetimeIndex, Timestamp, Index +from pandas.tseries.offsets import (generate_range, CDay, BDay, Minute, + BMonthEnd, DateOffset, MonthEnd) from pandas.tests.series.common import TestData +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + class TestTimeSeries(TestData, tm.TestCase): _multiprocess_can_split_ = True @@ -128,3 +136,803 @@ def test_catch_infinite_loop(self): # blow up, don't loop forever self.assertRaises(Exception, date_range, datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset) + + +def eq_gen_range(kwargs, expected): + rng = generate_range(**kwargs) + assert (np.array_equal(list(rng), expected)) + + +class TestGenRangeGeneration(tm.TestCase): + + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, time_rule='B')) + self.assertEqual(rng1, rng2) + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, time_rule='C')) + self.assertEqual(rng1, rng2) + + def test_1(self): + eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), + [datetime(2009, 3, 25), datetime(2009, 3, 26)]) + + def test_2(self): + eq_gen_range(dict(start=datetime(2008, 1, 1), + end=datetime(2008, 1, 3)), + [datetime(2008, 1, 1), + datetime(2008, 1, 2), + datetime(2008, 1, 3)]) + + def test_3(self): + eq_gen_range(dict(start=datetime(2008, 1, 5), + end=datetime(2008, 1, 6)), + []) + + def test_precision_finer_than_offset(self): + # GH 9907 + result1 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2016-04-22 00:00:00', freq='Q') + result2 = DatetimeIndex(start='2015-04-15 00:00:03', + end='2015-06-22 00:00:04', freq='W') + expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', + '2015-12-31 00:00:03', '2016-03-31 00:00:03'] + expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', + '2015-05-03 00:00:03', '2015-05-10 00:00:03', + '2015-05-17 00:00:03', '2015-05-24 00:00:03', + '2015-05-31 00:00:03', '2015-06-07 00:00:03', + '2015-06-14 00:00:03', '2015-06-21 00:00:03'] + expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', + freq='Q-DEC', tz=None) + expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', + freq='W-SUN', tz=None) + self.assert_index_equal(result1, expected1) + self.assert_index_equal(result2, expected2) + + +class TestDateRange(tm.TestCase): + def setUp(self): + self.rng = bdate_range(START, END) + + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') + self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + def test_naive_aware_conflicts(self): + naive = bdate_range(START, END, freq=BDay(), tz=None) + aware = bdate_range(START, END, freq=BDay(), + tz="Asia/Hong_Kong") + assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) + assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, offset=BDay()) + DatetimeIndex._cached_range(START, periods=20, offset=BDay()) + DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) + + assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, + START, END) + + assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, START, + offset=BDay()) + + assertRaisesRegexp(TypeError, "specify period", + DatetimeIndex._cached_range, end=END, + offset=BDay()) + + assertRaisesRegexp(TypeError, "start or end", + DatetimeIndex._cached_range, periods=20, + offset=BDay()) + + def test_cached_range_bug(self): + rng = date_range('2010-09-01 05:00:00', periods=50, + freq=DateOffset(hours=6)) + self.assertEqual(len(rng), 50) + self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) + + def test_timezone_comparaison_bug(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + try: + date_range(start, periods=2, tz='US/Eastern') + except AssertionError: + self.fail() + + def test_timezone_comparaison_assert(self): + start = Timestamp('20130220 10:00', tz='US/Eastern') + self.assertRaises(AssertionError, date_range, start, periods=2, + tz='Europe/Berlin') + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, BDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=BDay()) + self.assertEqual(shifted[0], rng[0] + BDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + tm.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_union_not_cacheable(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2) + self.assert_index_equal(the_union, rng) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2) + expected = rng[10:] + self.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range('1/1/2000', periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + self.assert_index_equal(the_int, expected) + tm.assertIsInstance(the_int, DatetimeIndex) + self.assertEqual(the_int.offset, rng.offset) + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + self.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + self.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range('11/30/2011', '12/31/2011') + b = bdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + self.assertRaises(ValueError, Timestamp, badly_formed_date) + + self.assertRaises(ValueError, bdate_range, start=badly_formed_date, + periods=10) + self.assertRaises(ValueError, bdate_range, end=badly_formed_date, + periods=10) + self.assertRaises(ValueError, bdate_range, badly_formed_date, + badly_formed_date) + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_identical(self): + t1 = self.rng.copy() + t2 = self.rng.copy() + self.assertTrue(t1.identical(t2)) + + # name + t1 = t1.rename('foo') + self.assertTrue(t1.equals(t2)) + self.assertFalse(t1.identical(t2)) + t2 = t2.rename('foo') + self.assertTrue(t1.identical(t2)) + + # freq + t2v = Index(t2.values) + self.assertTrue(t1.equals(t2v)) + self.assertFalse(t1.identical(t2v)) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range('12/5/2011', '12/5/2011') + rng2 = bdate_range('12/2/2011', '12/5/2011') + rng2.offset = BDay() + + result = rng1.union(rng2) + tm.assertIsInstance(result, DatetimeIndex) + + def test_error_with_zero_monthends(self): + self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', + freq=MonthEnd(0)) + + def test_range_bug(self): + # GH #770 + offset = DateOffset(months=3) + result = date_range("2011-1-1", "2012-1-31", freq=offset) + + start = datetime(2011, 1, 1) + exp_values = [start + i * offset for i in range(5)] + tm.assert_index_equal(result, DatetimeIndex(exp_values)) + + def test_range_tz_pytz(self): + # GH 2906 + tm._skip_if_no_pytz() + from pytz import timezone + + tz = timezone('US/Eastern') + start = tz.localize(datetime(2011, 1, 1)) + end = tz.localize(datetime(2011, 1, 3)) + + dr = date_range(start=start, periods=3) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + dr = date_range(end=end, periods=3) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + dr = date_range(start=start, end=end) + self.assertEqual(dr.tz.zone, tz.zone) + self.assertEqual(dr[0], start) + self.assertEqual(dr[2], end) + + def test_range_tz_dst_straddle_pytz(self): + + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + dates = [(tz.localize(datetime(2014, 3, 6)), + tz.localize(datetime(2014, 3, 12))), + (tz.localize(datetime(2013, 11, 1)), + tz.localize(datetime(2013, 11, 6)))] + for (start, end) in dates: + dr = date_range(start, end, freq='D') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + dr = date_range(start, end, freq='D', tz='US/Eastern') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + dr = date_range(start.replace(tzinfo=None), end.replace( + tzinfo=None), freq='D', tz='US/Eastern') + self.assertEqual(dr[0], start) + self.assertEqual(dr[-1], end) + self.assertEqual(np.all(dr.hour == 0), True) + + def test_range_tz_dateutil(self): + # GH 2906 + tm._skip_if_no_dateutil() + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas.tslib import maybe_get_tz + tz = lambda x: maybe_get_tz('dateutil/' + x) + + start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) + end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) + + dr = date_range(start=start, periods=3) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + dr = date_range(end=end, periods=3) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + dr = date_range(start=start, end=end) + self.assertTrue(dr.tz == tz('US/Eastern')) + self.assertTrue(dr[0] == start) + self.assertTrue(dr[2] == end) + + def test_month_range_union_tz_pytz(self): + tm._skip_if_no_pytz() + from pytz import timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_month_range_union_tz_dateutil(self): + tm._skip_if_windows_python_3() + tm._skip_if_no_dateutil() + from pandas.tslib import _dateutil_gettz as timezone + tz = timezone('US/Eastern') + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, + freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, + freq=MonthEnd()) + + early_dr.union(late_dr) + + def test_range_closed(self): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + def test_range_closed_with_tz_aware_start_end(self): + # GH12409, GH12684 + begin = Timestamp('2011/1/1', tz='US/Eastern') + end = Timestamp('2014/1/1', tz='US/Eastern') + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + begin = Timestamp('2011/1/1') + end = Timestamp('2014/1/1') + begintz = Timestamp('2011/1/1', tz='US/Eastern') + endtz = Timestamp('2014/1/1', tz='US/Eastern') + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq, + tz='US/Eastern') + left = date_range(begin, end, closed="left", freq=freq, + tz='US/Eastern') + right = date_range(begin, end, closed="right", freq=freq, + tz='US/Eastern') + expected_left = left + expected_right = right + + if endtz == closed[-1]: + expected_left = closed[:-1] + if begintz == closed[0]: + expected_right = closed[1:] + + self.assert_index_equal(expected_left, left) + self.assert_index_equal(expected_right, right) + + def test_range_closed_boundary(self): + # GH 11804 + for closed in ['right', 'left', None]: + right_boundary = date_range('2015-09-12', '2015-12-01', + freq='QS-MAR', closed=closed) + left_boundary = date_range('2015-09-01', '2015-09-12', + freq='QS-MAR', closed=closed) + both_boundary = date_range('2015-09-01', '2015-12-01', + freq='QS-MAR', closed=closed) + expected_right = expected_left = expected_both = both_boundary + + if closed == 'right': + expected_left = both_boundary[1:] + if closed == 'left': + expected_right = both_boundary[:-1] + if closed is None: + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + self.assert_index_equal(right_boundary, expected_right) + self.assert_index_equal(left_boundary, expected_left) + self.assert_index_equal(both_boundary, expected_both) + + def test_years_only(self): + # GH 6961 + dr = date_range('2014', '2015', freq='M') + self.assertEqual(dr[0], datetime(2014, 1, 31)) + self.assertEqual(dr[-1], datetime(2014, 12, 31)) + + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', + freq='345min') + result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', + freq='345min') + expected_1 = DatetimeIndex(['2005-01-12 10:00:00', + '2005-01-12 15:45:00'], + dtype='datetime64[ns]', freq='345T', + tz=None) + expected_2 = DatetimeIndex(['2005-01-13 10:00:00', + '2005-01-13 15:45:00'], + dtype='datetime64[ns]', freq='345T', + tz=None) + self.assert_index_equal(result_1, expected_1) + self.assert_index_equal(result_2, expected_2) + + +class TestCustomDateRange(tm.TestCase): + def setUp(self): + self.rng = cdate_range(START, END) + + def test_constructor(self): + cdate_range(START, END, freq=CDay()) + cdate_range(START, periods=20, freq=CDay()) + cdate_range(end=START, periods=20, freq=CDay()) + self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') + self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + + def test_cached_range(self): + DatetimeIndex._cached_range(START, END, offset=CDay()) + DatetimeIndex._cached_range(START, periods=20, + offset=CDay()) + DatetimeIndex._cached_range(end=START, periods=20, + offset=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) + + self.assertRaises(Exception, DatetimeIndex._cached_range, START, + freq=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, + freq=CDay()) + + self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, + freq=CDay()) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + self.assertTrue(comp[11]) + self.assertFalse(comp[9]) + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + self.assert_index_equal(cp, self.rng) + + def test_repr(self): + # only really care that it works + repr(self.rng) + + def test_getitem(self): + smaller = self.rng[:5] + exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) + self.assert_index_equal(smaller, exp) + self.assertEqual(smaller.offset, self.rng.offset) + + sliced = self.rng[::5] + self.assertEqual(sliced.offset, CDay() * 5) + + fancy_indexed = self.rng[[4, 3, 2, 1, 0]] + self.assertEqual(len(fancy_indexed), 5) + tm.assertIsInstance(fancy_indexed, DatetimeIndex) + self.assertIsNone(fancy_indexed.freq) + + # 32-bit vs. 64-bit platforms + self.assertEqual(self.rng[4], self.rng[np.int_(4)]) + + def test_getitem_matplotlib_hackaround(self): + values = self.rng[:, None] + expected = self.rng.values[:, None] + self.assert_numpy_array_equal(values, expected) + + def test_shift(self): + + shifted = self.rng.shift(5) + self.assertEqual(shifted[0], self.rng[5]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(-5) + self.assertEqual(shifted[5], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + shifted = self.rng.shift(0) + self.assertEqual(shifted[0], self.rng[0]) + self.assertEqual(shifted.offset, self.rng.offset) + + with tm.assert_produces_warning(com.PerformanceWarning): + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=CDay()) + self.assertEqual(shifted[0], rng[0] + CDay()) + + def test_pickle_unpickle(self): + unpickled = self.round_trip_pickle(self.rng) + self.assertIsNotNone(unpickled.offset) + + def test_union(self): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right) + tm.assertIsInstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right) + tm.assertIsInstance(the_union, DatetimeIndex) + + # order does not matter + self.assert_index_equal(right.union(left), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng) + tm.assertIsInstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how='outer') + tm.assertIsInstance(the_join, DatetimeIndex) + self.assertIsNone(the_join.freq) + + def test_intersection_bug(self): + # GH #771 + a = cdate_range('11/30/2011', '12/31/2011') + b = cdate_range('12/10/2011', '12/20/2011') + result = a.intersection(b) + self.assert_index_equal(result, b) + + def test_summary(self): + self.rng.summary() + self.rng[2:2].summary() + + def test_summary_pytz(self): + tm._skip_if_no_pytz() + import pytz + cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + + def test_summary_dateutil(self): + tm._skip_if_no_dateutil() + import dateutil + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = cdate_range(end=end, periods=20) + firstDate = end - 19 * CDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = '2007/100/1' + + self.assertRaises(ValueError, Timestamp, badly_formed_date) + + self.assertRaises(ValueError, cdate_range, start=badly_formed_date, + periods=10) + self.assertRaises(ValueError, cdate_range, end=badly_formed_date, + periods=10) + self.assertRaises(ValueError, cdate_range, badly_formed_date, + badly_formed_date) + + def test_equals(self): + self.assertFalse(self.rng.equals(list(self.rng))) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = cdate_range('12/5/2011', '12/5/2011') + rng2 = cdate_range('12/2/2011', '12/5/2011') + rng2.offset = CDay() + + result = rng1.union(rng2) + tm.assertIsInstance(result, DatetimeIndex) + + def test_cdaterange(self): + rng = cdate_range('2013-05-01', periods=3) + xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_weekmask(self): + rng = cdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu') + xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_holidays(self): + rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) + xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) + self.assert_index_equal(xp, rng) + + def test_cdaterange_weekmask_and_holidays(self): + rng = cdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) + self.assert_index_equal(xp, rng) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c25cd6a3fa90e..56be55d25a6ea 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -8,7 +8,7 @@ from pandas.core.common import PerformanceWarning from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, - datetime, Float64Index) + datetime, Float64Index, offsets) from pandas.tests.test_base import Ops @@ -1071,3 +1071,18 @@ def test_datetime64_with_DateOffset(self): assert_func(klass([x + op for x in s]), s + op) assert_func(klass([x - op for x in s]), s - op) assert_func(klass([op + x for x in s]), op + s) + + +class TestTslib(tm.TestCase): + + def test_shift_months(self): + s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( + '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( + '2000-02-29'), Timestamp('2000-12-31')]) + for years in [-1, 0, 1]: + for months in [-2, 0, 2]: + actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + + months)) + expected = DatetimeIndex([x + offsets.DateOffset( + years=years, months=months) for x in s]) + tm.assert_index_equal(actual, expected) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 42d135f634298..6d1483828436c 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,22 +1,26 @@ """ test to_datetime """ -import nose - import sys -import calendar +import nose import locale -from datetime import datetime - +import calendar import numpy as np -from pandas.types.common import is_datetime64_ns_dtype -from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, - Index, DatetimeIndex, NaT, date_range, bdate_range) -from pandas import tslib -from pandas.compat import lmap +from datetime import datetime, date, time +from distutils.version import LooseVersion + import pandas as pd +from pandas import tslib from pandas.tseries import tools +from pandas.tseries.tools import normalize_date +from pandas.tseries.util import pivot_annual, isleapyear +from pandas.compat import lmap +from pandas.compat.numpy import np_array_datetime64_compat +from pandas.types.common import is_datetime64_ns_dtype from pandas.util import testing as tm -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_series_equal, _skip_if_has_locale +from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, + Index, DatetimeIndex, NaT, date_range, bdate_range, + compat, lib) class TimeConversionFormats(tm.TestCase): @@ -1017,3 +1021,603 @@ def test_day_not_in_month_ignore(self): '2015-02-32', errors='ignore', format="%Y-%m-%d"), '2015-02-32') self.assertEqual(to_datetime( '2015-04-31', errors='ignore', format="%Y-%m-%d"), '2015-04-31') + + +class TestDatetimeParsingWrappers(tm.TestCase): + def test_does_not_convert_mixed_integer(self): + bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') + + for bad_date_string in bad_date_strings: + self.assertFalse(tslib._does_string_look_like_datetime( + bad_date_string)) + + good_date_strings = ('2012-01-01', + '01/01/2012', + 'Mon Sep 16, 2013', + '01012012', + '0101', + '1-1', ) + + for good_date_string in good_date_strings: + self.assertTrue(tslib._does_string_look_like_datetime( + good_date_string)) + + def test_parsers(self): + + # https://github.com/dateutil/dateutil/issues/217 + import dateutil + yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') + + cases = {'2011-01-01': datetime(2011, 1, 1), + '2Q2005': datetime(2005, 4, 1), + '2Q05': datetime(2005, 4, 1), + '2005Q1': datetime(2005, 1, 1), + '05Q1': datetime(2005, 1, 1), + '2011Q3': datetime(2011, 7, 1), + '11Q3': datetime(2011, 7, 1), + '3Q2011': datetime(2011, 7, 1), + '3Q11': datetime(2011, 7, 1), + + # quarterly without space + '2000Q4': datetime(2000, 10, 1), + '00Q4': datetime(2000, 10, 1), + '4Q2000': datetime(2000, 10, 1), + '4Q00': datetime(2000, 10, 1), + '2000q4': datetime(2000, 10, 1), + '2000-Q4': datetime(2000, 10, 1), + '00-Q4': datetime(2000, 10, 1), + '4Q-2000': datetime(2000, 10, 1), + '4Q-00': datetime(2000, 10, 1), + '00q4': datetime(2000, 10, 1), + '2005': datetime(2005, 1, 1), + '2005-11': datetime(2005, 11, 1), + '2005 11': datetime(2005, 11, 1), + '11-2005': datetime(2005, 11, 1), + '11 2005': datetime(2005, 11, 1), + '200511': datetime(2020, 5, 11), + '20051109': datetime(2005, 11, 9), + '20051109 10:15': datetime(2005, 11, 9, 10, 15), + '20051109 08H': datetime(2005, 11, 9, 8, 0), + '2005-11-09 10:15': datetime(2005, 11, 9, 10, 15), + '2005-11-09 08H': datetime(2005, 11, 9, 8, 0), + '2005/11/09 10:15': datetime(2005, 11, 9, 10, 15), + '2005/11/09 08H': datetime(2005, 11, 9, 8, 0), + "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, + 36, 28), + "Thu Sep 25 2003": datetime(2003, 9, 25), + "Sep 25 2003": datetime(2003, 9, 25), + "January 1 2014": datetime(2014, 1, 1), + + # GH 10537 + '2014-06': datetime(2014, 6, 1), + '06-2014': datetime(2014, 6, 1), + '2014-6': datetime(2014, 6, 1), + '6-2014': datetime(2014, 6, 1), + + '20010101 12': datetime(2001, 1, 1, 12), + '20010101 1234': datetime(2001, 1, 1, 12, 34), + '20010101 123456': datetime(2001, 1, 1, 12, 34, 56), + } + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, + yearfirst=yearfirst) + result2 = to_datetime(date_str, yearfirst=yearfirst) + result3 = to_datetime([date_str], yearfirst=yearfirst) + # result5 is used below + result4 = to_datetime(np.array([date_str], dtype=object), + yearfirst=yearfirst) + result6 = DatetimeIndex([date_str], yearfirst=yearfirst) + # result7 is used below + result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) + result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) + + for res in [result1, result2]: + self.assertEqual(res, expected) + for res in [result3, result4, result6, result8, result9]: + exp = DatetimeIndex([pd.Timestamp(expected)]) + tm.assert_index_equal(res, exp) + + # these really need to have yearfist, but we don't support + if not yearfirst: + result5 = Timestamp(date_str) + self.assertEqual(result5, expected) + result7 = date_range(date_str, freq='S', periods=1, + yearfirst=yearfirst) + self.assertEqual(result7, expected) + + # NaT + result1, _, _ = tools.parse_time_string('NaT') + result2 = to_datetime('NaT') + result3 = Timestamp('NaT') + result4 = DatetimeIndex(['NaT'])[0] + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + self.assertTrue(result1 is tslib.NaT) + + def test_parsers_quarter_invalid(self): + + cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] + for case in cases: + self.assertRaises(ValueError, tools.parse_time_string, case) + + def test_parsers_dayfirst_yearfirst(self): + tm._skip_if_no_dateutil() + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + + # bug fix in 2.5.2 + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + + # revert of bug in 2.5.2 + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + + import dateutil + is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') + + # str : dayfirst, yearfirst, expected + cases = {'10-11-12': [(False, False, + datetime(2012, 10, 11)), + (True, False, + datetime(2012, 11, 10)), + (False, True, + datetime(2010, 11, 12)), + (True, True, + datetime(2010, 12, 11))], + '20/12/21': [(False, False, + datetime(2021, 12, 20)), + (True, False, + datetime(2021, 12, 20)), + (False, True, + datetime(2020, 12, 21)), + (True, True, + datetime(2020, 12, 21))]} + + from dateutil.parser import parse + for date_str, values in compat.iteritems(cases): + for dayfirst, yearfirst, expected in values: + + # odd comparisons across version + # let's just skip + if dayfirst and yearfirst and is_lt_253: + continue + + # compare with dateutil result + dateutil_result = parse(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + self.assertEqual(dateutil_result, expected) + + result1, _, _ = tools.parse_time_string(date_str, + dayfirst=dayfirst, + yearfirst=yearfirst) + + # we don't support dayfirst/yearfirst here: + if not dayfirst and not yearfirst: + result2 = Timestamp(date_str) + self.assertEqual(result2, expected) + + result3 = to_datetime(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + + result4 = DatetimeIndex([date_str], dayfirst=dayfirst, + yearfirst=yearfirst)[0] + + self.assertEqual(result1, expected) + self.assertEqual(result3, expected) + self.assertEqual(result4, expected) + + def test_parsers_timestring(self): + tm._skip_if_no_dateutil() + from dateutil.parser import parse + + # must be the same as dateutil result + cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), + '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} + + for date_str, (exp_now, exp_def) in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = Timestamp(date_str) + result5 = DatetimeIndex([date_str])[0] + # parse time string return time string based on default date + # others are not, and can't be changed because it is used in + # time series plot + self.assertEqual(result1, exp_def) + self.assertEqual(result2, exp_now) + self.assertEqual(result3, exp_now) + self.assertEqual(result4, exp_now) + self.assertEqual(result5, exp_now) + + def test_parsers_time(self): + # GH11818 + _skip_if_has_locale() + strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", + "2:15:00pm", "021500pm", time(14, 15)] + expected = time(14, 15) + + for time_string in strings: + self.assertEqual(tools.to_time(time_string), expected) + + new_string = "14.15" + self.assertRaises(ValueError, tools.to_time, new_string) + self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) + + arg = ["14:15", "20:20"] + expected_arr = [time(14, 15), time(20, 20)] + self.assertEqual(tools.to_time(arg), expected_arr) + self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) + self.assertEqual(tools.to_time(arg, infer_time_format=True), + expected_arr) + self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), + [None, None]) + + res = tools.to_time(arg, format="%I:%M%p", errors="ignore") + self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + with tm.assertRaises(ValueError): + tools.to_time(arg, format="%I:%M%p", errors="raise") + + self.assert_series_equal(tools.to_time(Series(arg, name="test")), + Series(expected_arr, name="test")) + + res = tools.to_time(np.array(arg)) + self.assertIsInstance(res, list) + self.assert_equal(res, expected_arr) + + def test_parsers_monthfreq(self): + cases = {'201101': datetime(2011, 1, 1, 0, 0), + '200005': datetime(2000, 5, 1, 0, 0)} + + for date_str, expected in compat.iteritems(cases): + result1, _, _ = tools.parse_time_string(date_str, freq='M') + self.assertEqual(result1, expected) + + def test_parsers_quarterly_with_freq(self): + msg = ('Incorrect quarterly string is given, quarter ' + 'must be between 1 and 4: 2013Q5') + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q5') + + # GH 5418 + msg = ('Unable to retrieve month information from given freq: ' + 'INVLD-L-DEC-SAT') + with tm.assertRaisesRegexp(tslib.DateParseError, msg): + tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') + + cases = {('2013Q2', None): datetime(2013, 4, 1), + ('2013Q2', 'A-APR'): datetime(2012, 8, 1), + ('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)} + + for (date_str, freq), exp in compat.iteritems(cases): + result, _, _ = tools.parse_time_string(date_str, freq=freq) + self.assertEqual(result, exp) + + def test_parsers_timezone_minute_offsets_roundtrip(self): + # GH11708 + base = to_datetime("2013-01-01 00:00:00") + dt_strings = [ + ('2013-01-01 05:45+0545', + "Asia/Katmandu", + "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), + ('2013-01-01 05:30+0530', + "Asia/Kolkata", + "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") + ] + + for dt_string, tz, dt_string_repr in dt_strings: + dt_time = to_datetime(dt_string) + self.assertEqual(base, dt_time) + converted_time = dt_time.tz_localize('UTC').tz_convert(tz) + self.assertEqual(dt_string_repr, repr(converted_time)) + + def test_parsers_iso8601(self): + # GH 12060 + # test only the iso parser - flexibility to different + # separators and leadings 0s + # Timestamp construction falls back to dateutil + cases = {'2011-01-02': datetime(2011, 1, 2), + '2011-1-2': datetime(2011, 1, 2), + '2011-01': datetime(2011, 1, 1), + '2011-1': datetime(2011, 1, 1), + '2011 01 02': datetime(2011, 1, 2), + '2011.01.02': datetime(2011, 1, 2), + '2011/01/02': datetime(2011, 1, 2), + '2011\\01\\02': datetime(2011, 1, 2), + '2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30), + '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} + for date_str, exp in compat.iteritems(cases): + actual = tslib._test_parse_iso8601(date_str) + self.assertEqual(actual, exp) + + # seperators must all match - YYYYMM not valid + invalid_cases = ['2011-01/02', '2011^11^11', + '201401', '201111', '200101', + # mixed separated and unseparated + '2005-0101', '200501-01', + '20010101 12:3456', '20010101 1234:56', + # HHMMSS must have two digits in each component + # if unseparated + '20010101 1', '20010101 123', '20010101 12345', + '20010101 12345Z', + # wrong separator for HHMMSS + '2001-01-01 12-34-56'] + for date_str in invalid_cases: + with tm.assertRaises(ValueError): + tslib._test_parse_iso8601(date_str) + # If no ValueError raised, let me know which case failed. + raise Exception(date_str) + + +class TestTsUtil(tm.TestCase): + + def test_try_parse_dates(self): + from dateutil.parser import parse + arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) + + result = lib.try_parse_dates(arr, dayfirst=True) + expected = [parse(d, dayfirst=True) for d in arr] + self.assertTrue(np.array_equal(result, expected)) + + +class TestArrayToDatetime(tm.TestCase): + def test_parsing_valid_dates(self): + arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr), + np_array_datetime64_compat( + [ + '2013-01-01T00:00:00.000000000-0000', + '2013-01-02T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr), + np_array_datetime64_compat( + [ + '2013-09-16T00:00:00.000000000-0000', + '2013-09-17T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + def test_parsing_timezone_offsets(self): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added + dt_strings = [ + '01-01-2013 08:00:00+08:00', + '2013-01-01T08:00:00.000000000+0800', + '2012-12-31T16:00:00.000000000-0800', + '12-31-2012 23:00:00-01:00' + ] + + expected_output = tslib.array_to_datetime(np.array( + ['01-01-2013 00:00:00'], dtype=object)) + + for dt_string in dt_strings: + self.assert_numpy_array_equal( + tslib.array_to_datetime( + np.array([dt_string], dtype=object) + ), + expected_output + ) + + def test_number_looking_strings_not_into_datetime(self): + # #4601 + # These strings don't look like datetimes so they shouldn't be + # attempted to be converted + arr = np.array(['-352.737091', '183.575577'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + arr = np.array(['1', '2', '3', '4', '5'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + def test_coercing_dates_outside_of_datetime64_ns_bounds(self): + invalid_dates = [ + date(1000, 1, 1), + datetime(1000, 1, 1), + '1000-01-01', + 'Jan 1, 1000', + np.datetime64('1000-01-01'), + ] + + for invalid_date in invalid_dates: + self.assertRaises(ValueError, + tslib.array_to_datetime, + np.array( + [invalid_date], dtype='object'), + errors='raise', ) + self.assert_numpy_array_equal( + tslib.array_to_datetime( + np.array([invalid_date], dtype='object'), + errors='coerce'), + np.array([tslib.iNaT], dtype='M8[ns]') + ) + + arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='coerce'), + np_array_datetime64_compat( + [ + tslib.iNaT, + '2000-01-01T00:00:00.000000000-0000' + ], + dtype='M8[ns]' + ) + ) + + def test_coerce_of_invalid_datetimes(self): + arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) + + # Without coercing, the presence of any invalid dates prevents + # any values from being converted + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='ignore'), arr) + + # With coercing, the invalid dates becomes iNaT + self.assert_numpy_array_equal( + tslib.array_to_datetime(arr, errors='coerce'), + np_array_datetime64_compat( + [ + '2013-01-01T00:00:00.000000000-0000', + tslib.iNaT, + tslib.iNaT + ], + dtype='M8[ns]' + ) + ) + + +class TestPivotAnnual(tm.TestCase): + """ + New pandas of scikits.timeseries pivot_annual + """ + + def test_daily(self): + rng = date_range('1/1/2000', '12/31/2004', freq='D') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'D') + + doy = ts.index.dayofyear + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + # check leap days + leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] + day = leaps.index.dayofyear[0] + leaps.index = leaps.index.year + leaps.name = 60 + tm.assert_series_equal(annual[day].dropna(), leaps) + + def test_hourly(self): + rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), + freq='H') + data_hourly = np.random.randint(100, 350, rng_hourly.size) + ts_hourly = Series(data_hourly, index=rng_hourly) + + grouped = ts_hourly.groupby(ts_hourly.index.year) + hoy = grouped.apply(lambda x: x.reset_index(drop=True)) + hoy = hoy.index.droplevel(0).values + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 + hoy += 1 + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts_hourly) + + ts_hourly = ts_hourly.astype(float) + for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: + subset = ts_hourly[hoy == i] + subset.index = [x.year for x in subset.index] + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + leaps = ts_hourly[(ts_hourly.index.month == 2) & ( + ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] + hour = leaps.index.dayofyear[0] * 24 - 23 + leaps.index = leaps.index.year + leaps.name = 1417 + tm.assert_series_equal(annual[hour].dropna(), leaps) + + def test_weekly(self): + pass + + def test_monthly(self): + rng = date_range('1/1/2000', '12/31/2004', freq='M') + ts = Series(np.random.randn(len(rng)), index=rng) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + annual = pivot_annual(ts, 'M') + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = [x.year for x in subset.index] + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + self.assertEqual(result.name, i) + + def test_period_monthly(self): + pass + + def test_period_daily(self): + pass + + def test_period_weekly(self): + pass + + def test_isleapyear_deprecate(self): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2000)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertFalse(isleapyear(2001)) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(isleapyear(2004)) + + +def test_normalize_date(): + value = date(2012, 9, 7) + + result = normalize_date(value) + assert (result == datetime(2012, 9, 7)) + + value = datetime(2012, 9, 7, 12) + + result = normalize_date(value) + assert (result == datetime(2012, 9, 7)) diff --git a/pandas/tests/indexes/period/__init__.py b/pandas/tests/indexes/period/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py new file mode 100644 index 0000000000000..33653c92da719 --- /dev/null +++ b/pandas/tests/indexes/period/test_period.py @@ -0,0 +1,233 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +from pandas.util import testing as tm +from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, + Index, Period, Int64Index) + +from ..datetimelike import DatetimeLike + + +class TestPeriodIndex(DatetimeLike, tm.TestCase): + _holder = PeriodIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makePeriodIndex(10)) + self.setup_indices() + + def create_index(self): + return period_range('20130101', periods=5, freq='D') + + def test_construction_base_constructor(self): + # GH 13664 + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.PeriodIndex(np.array(arr))) + + arr = [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='D')] + tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) + + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.Index(np.array(arr), dtype=object)) + + def test_astype(self): + # GH 13149, GH 13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + result = idx.astype(object) + expected = Index([Period('2016-05-16', freq='D')] + + [Period(NaT, freq='D')] * 3, dtype='object') + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([16937] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + idx = period_range('1990', '2009', freq='A') + result = idx.astype('i8') + self.assert_index_equal(result, Index(idx.asi8)) + self.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') + + self.assertRaises(ValueError, idx.astype, str) + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, 'timedelta64') + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') + + def test_shift(self): + + # test shift for PeriodIndex + # GH8083 + drange = self.create_index() + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + def test_pickle_compat_construction(self): + pass + + def test_get_loc(self): + idx = pd.period_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual( + idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) + self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) + self.assertEqual( + idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + idx = pd.period_range('2000-01-01', periods=5)[::2] + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-10', method='nearest', tolerance='foo') + + msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' + with tm.assertRaisesRegexp(ValueError, msg): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + + def test_where(self): + i = self.create_index() + result = i.where(notnull(i)) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2)) + expected = i2 + tm.assert_index_equal(result, expected) + + def test_where_other(self): + + i = self.create_index() + for arr in [np.nan, pd.NaT]: + result = i.where(notnull(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), + freq='D') + result = i.where(notnull(i2), i2.values) + tm.assert_index_equal(result, i2) + + def test_get_indexer(self): + idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', + '2000-01-02T01'], freq='H') + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 hour'), + np.array([0, -1, 1], dtype=np.intp)) + + msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' + with self.assertRaisesRegexp(ValueError, msg): + idx.get_indexer(target, 'nearest', tolerance='1 minute') + + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', + tolerance='1 day'), + np.array([0, 1, 1], dtype=np.intp)) + + def test_repeat(self): + # GH10183 + idx = pd.period_range('2000-01-01', periods=3, freq='D') + res = idx.repeat(3) + exp = PeriodIndex(idx.values.repeat(3), freq='D') + self.assert_index_equal(res, exp) + self.assertEqual(res.freqstr, 'D') + + def test_period_index_indexer(self): + # GH4125 + idx = pd.period_range('2002-01', '2003-12', freq='M') + df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) + self.assert_frame_equal(df, df.loc[idx]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + self.assert_frame_equal(df, df.loc[list(idx)]) + + def test_fillna_period(self): + # GH 11343 + idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00'], freq='H') + + exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00'], freq='H') + self.assert_index_equal( + idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), + pd.Period('2011-01-01', freq='D'), + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), + exp) + + def test_no_millisecond_field(self): + with self.assertRaises(AttributeError): + DatetimeIndex.millisecond + + with self.assertRaises(AttributeError): + DatetimeIndex([]).millisecond + + def test_difference_freq(self): + # GH14323: difference of Period MUST preserve frequency + # but the ability to union results must be preserved + + index = period_range("20160920", "20160925", freq="D") + + other = period_range("20160921", "20160924", freq="D") + expected = PeriodIndex(["20160920", "20160925"], freq='D') + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = period_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other) + expected = PeriodIndex(["20160920", "20160921"], freq='D') + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py deleted file mode 100644 index e5a4ced4ced4d..0000000000000 --- a/pandas/tests/indexes/test_datetimelike.py +++ /dev/null @@ -1,467 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -from datetime import timedelta - -import pandas as pd -from pandas.util import testing as tm -from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, - NaT, Period, PeriodIndex, Series, Timedelta, - TimedeltaIndex, period_range, - timedelta_range, notnull) - - -from .datetimelike import DatetimeLike - - -class TestPeriodIndex(DatetimeLike, tm.TestCase): - _holder = PeriodIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makePeriodIndex(10)) - self.setup_indices() - - def create_index(self): - return period_range('20130101', periods=5, freq='D') - - def test_construction_base_constructor(self): - # GH 13664 - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')] - tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.PeriodIndex(np.array(arr))) - - arr = [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='D')] - tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) - - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.Index(np.array(arr), dtype=object)) - - def test_astype(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - result = idx.astype(object) - expected = Index([Period('2016-05-16', freq='D')] + - [Period(NaT, freq='D')] * 3, dtype='object') - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([16937] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - idx = period_range('1990', '2009', freq='A') - result = idx.astype('i8') - self.assert_index_equal(result, Index(idx.asi8)) - self.assert_numpy_array_equal(result.values, idx.asi8) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, 'timedelta64') - self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - - def test_shift(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - def test_pickle_compat_construction(self): - pass - - def test_get_loc(self): - idx = pd.period_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual( - idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual( - idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - idx = pd.period_range('2000-01-01', periods=5)[::2] - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-10', method='nearest', tolerance='foo') - - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') - - def test_where(self): - i = self.create_index() - result = i.where(notnull(i)) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2)) - expected = i2 - tm.assert_index_equal(result, expected) - - def test_where_other(self): - - i = self.create_index() - for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) - expected = i - tm.assert_index_equal(result, expected) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2) - tm.assert_index_equal(result, i2) - - i2 = i.copy() - i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), - freq='D') - result = i.where(notnull(i2), i2.values) - tm.assert_index_equal(result, i2) - - def test_get_indexer(self): - idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', - '2000-01-02T01'], freq='H') - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 hour'), - np.array([0, -1, 1], dtype=np.intp)) - - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with self.assertRaisesRegexp(ValueError, msg): - idx.get_indexer(target, 'nearest', tolerance='1 minute') - - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', - tolerance='1 day'), - np.array([0, 1, 1], dtype=np.intp)) - - def test_repeat(self): - # GH10183 - idx = pd.period_range('2000-01-01', periods=3, freq='D') - res = idx.repeat(3) - exp = PeriodIndex(idx.values.repeat(3), freq='D') - self.assert_index_equal(res, exp) - self.assertEqual(res.freqstr, 'D') - - def test_period_index_indexer(self): - # GH4125 - idx = pd.period_range('2002-01', '2003-12', freq='M') - df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) - self.assert_frame_equal(df, df.loc[idx]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) - self.assert_frame_equal(df, df.loc[list(idx)]) - - def test_fillna_period(self): - # GH 11343 - idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00'], freq='H') - - exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00'], freq='H') - self.assert_index_equal( - idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), - pd.Period('2011-01-01', freq='D'), - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')), - exp) - - def test_no_millisecond_field(self): - with self.assertRaises(AttributeError): - DatetimeIndex.millisecond - - with self.assertRaises(AttributeError): - DatetimeIndex([]).millisecond - - def test_difference_freq(self): - # GH14323: difference of Period MUST preserve frequency - # but the ability to union results must be preserved - - index = period_range("20160920", "20160925", freq="D") - - other = period_range("20160921", "20160924", freq="D") - expected = PeriodIndex(["20160920", "20160925"], freq='D') - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = period_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) - expected = PeriodIndex(["20160920", "20160921"], freq='D') - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): - _holder = TimedeltaIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makeTimedeltaIndex(10)) - self.setup_indices() - - def create_index(self): - return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - - def test_construction_base_constructor(self): - arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] - tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.TimedeltaIndex(np.array(arr))) - - arr = [np.nan, pd.NaT, pd.Timedelta('1 days')] - tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) - tm.assert_index_equal(pd.Index(np.array(arr)), - pd.TimedeltaIndex(np.array(arr))) - - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - self.assert_index_equal(result, expected) - - def test_astype(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - result = idx.astype(object) - expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, - dtype=object) - tm.assert_index_equal(result, expected) - - result = idx.astype(int) - expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, - dtype=np.int64) - tm.assert_index_equal(result, expected) - - rng = timedelta_range('1 days', periods=10) - - result = rng.astype('i8') - self.assert_index_equal(result, Index(rng.asi8)) - self.assert_numpy_array_equal(rng.asi8, result.values) - - def test_astype_timedelta64(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - result = idx.astype('timedelta64') - expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') - tm.assert_index_equal(result, expected) - - result = idx.astype('timedelta64[ns]') - tm.assert_index_equal(result, idx) - self.assertFalse(result is idx) - - result = idx.astype('timedelta64[ns]', copy=False) - tm.assert_index_equal(result, idx) - self.assertTrue(result is idx) - - def test_astype_raises(self): - # GH 13149, GH 13209 - idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) - - self.assertRaises(ValueError, idx.astype, float) - self.assertRaises(ValueError, idx.astype, str) - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') - - def test_get_loc(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) - - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc(idx[1], method='nearest', tolerance='foo') - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) - - def test_get_indexer(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - tm.assert_numpy_array_equal(idx.get_indexer(idx), - np.array([0, 1, 2], dtype=np.intp)) - - target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), - np.array([-1, 0, 1], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), - np.array([0, 1, 2], dtype=np.intp)) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), - np.array([0, 1, 1], dtype=np.intp)) - - res = idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) - - def test_numeric_compat(self): - - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - result = idx / 1 - tm.assert_index_equal(result, idx) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) - - # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) - - def test_pickle_compat_construction(self): - pass - - def test_ufunc_coercions(self): - # normal ops are also tested in tseries/test_timedeltas.py - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - - for result in [idx * 2, np.multiply(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], - freq='4H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4H') - - for result in [idx / 2, np.divide(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], - freq='H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'H') - - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - for result in [-idx, np.negative(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], - freq='-2H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2H') - - idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], - freq='H', name='x') - for result in [abs(idx), np.absolute(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], - freq=None, name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_fillna_timedelta(self): - # GH 11343 - idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) - - exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) - self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) - - exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) - idx.fillna(pd.Timedelta('3 hour')) - - exp = pd.Index( - [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - def test_difference_freq(self): - # GH14323: Difference of TimedeltaIndex should not preserve frequency - - index = timedelta_range("0 days", "5 days", freq="D") - - other = timedelta_range("1 days", "4 days", freq="D") - expected = TimedeltaIndex(["0 days", "5 days"], freq=None) - idx_diff = index.difference(other) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) - - other = timedelta_range("2 days", "5 days", freq="D") - idx_diff = index.difference(other) - expected = TimedeltaIndex(["0 days", "1 days"], freq=None) - tm.assert_index_equal(idx_diff, expected) - tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/test_timedelta.py b/pandas/tests/indexes/test_timedelta.py deleted file mode 100644 index be01ad03a0660..0000000000000 --- a/pandas/tests/indexes/test_timedelta.py +++ /dev/null @@ -1,43 +0,0 @@ -import numpy as np -from datetime import timedelta - -import pandas as pd -import pandas.util.testing as tm -from pandas import (timedelta_range, date_range, Series, Timedelta, - DatetimeIndex) - - -class TestSlicing(tm.TestCase): - - def test_timedelta(self): - # this is valid too - index = date_range('1/1/2000', periods=50, freq='B') - shifted = index + timedelta(1) - back = shifted + timedelta(-1) - self.assertTrue(tm.equalContents(index, back)) - self.assertEqual(shifted.freq, index.freq) - self.assertEqual(shifted.freq, back.freq) - - result = index - timedelta(1) - expected = index + timedelta(-1) - tm.assert_index_equal(result, expected) - - # GH4134, buggy with timedeltas - rng = date_range('2013', '2014') - s = Series(rng) - result1 = rng - pd.offsets.Hour(1) - result2 = DatetimeIndex(s - np.timedelta64(100000000)) - result3 = rng - np.timedelta64(100000000) - result4 = DatetimeIndex(s - pd.offsets.Hour(1)) - tm.assert_index_equal(result1, result4) - tm.assert_index_equal(result2, result3) - - -class TestTimeSeries(tm.TestCase): - _multiprocess_can_split_ = True - - def test_series_box_timedelta(self): - rng = timedelta_range('1 day 1 s', periods=5, freq='h') - s = Series(rng) - tm.assertIsInstance(s[1], Timedelta) - tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/__init__.py b/pandas/tests/indexes/timedeltas/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py new file mode 100644 index 0000000000000..88e7b1387feff --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -0,0 +1,121 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import (TimedeltaIndex, timedelta_range, Int64Index, Float64Index, + Index, Timedelta, Series) + +from ..datetimelike import DatetimeLike + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_astype(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, + dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, + dtype=np.int64) + tm.assert_index_equal(result, expected) + + rng = timedelta_range('1 days', periods=10) + + result = rng.astype('i8') + self.assert_index_equal(result, Index(rng.asi8)) + self.assert_numpy_array_equal(rng.asi8, result.values) + + def test_astype_timedelta64(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + result = idx.astype('timedelta64') + expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') + tm.assert_index_equal(result, expected) + + result = idx.astype('timedelta64[ns]') + tm.assert_index_equal(result, idx) + self.assertFalse(result is idx) + + result = idx.astype('timedelta64[ns]', copy=False) + tm.assert_index_equal(result, idx) + self.assertTrue(result is idx) + + def test_astype_raises(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) + + self.assertRaises(ValueError, idx.astype, float) + self.assertRaises(ValueError, idx.astype, str) + self.assertRaises(ValueError, idx.astype, 'datetime64') + self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') + + def test_pickle_compat_construction(self): + pass + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py new file mode 100644 index 0000000000000..0810b13eb0f53 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -0,0 +1,88 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, tslib, to_timedelta + +iNaT = tslib.iNaT + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_construction_base_constructor(self): + arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.TimedeltaIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timedelta('1 days')] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), + pd.TimedeltaIndex(np.array(arr))) + + def test_constructor(self): + expected = TimedeltaIndex(['1 days', '1 days 00:00:05', '2 days', + '2 days 00:00:02', '0 days 00:00:03']) + result = TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64( + 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) + tm.assert_index_equal(result, expected) + + # unicode + result = TimedeltaIndex([u'1 days', '1 days, 00:00:05', np.timedelta64( + 2, 'D'), timedelta(days=2, seconds=2), pd.offsets.Second(3)]) + + expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', + '0 days 00:00:02']) + tm.assert_index_equal(TimedeltaIndex(range(3), unit='s'), expected) + expected = TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:05', + '0 days 00:00:09']) + tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit='s'), expected) + expected = TimedeltaIndex( + ['0 days 00:00:00.400', '0 days 00:00:00.450', + '0 days 00:00:01.200']) + tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit='ms'), + expected) + + def test_constructor_coverage(self): + rng = timedelta_range('1 days', periods=10.5) + exp = timedelta_range('1 days', periods=10) + self.assert_index_equal(rng, exp) + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + periods='foo', freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, start='1 days', + end='10 days') + + self.assertRaises(ValueError, TimedeltaIndex, '1 days') + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + self.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(['1 days', '2 days', '3 days']) + result = TimedeltaIndex(strings) + expected = to_timedelta([1, 2, 3], unit='d') + self.assert_index_equal(result, expected) + + from_ints = TimedeltaIndex(expected.asi8) + self.assert_index_equal(from_ints, expected) + + # non-conforming freq + self.assertRaises(ValueError, TimedeltaIndex, + ['1 days', '2 days', '4 days'], freq='D') + + self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D') + + def test_constructor_name(self): + idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST') + self.assertEqual(idx.name, 'TEST') + + # GH10025 + idx2 = TimedeltaIndex(idx, name='something else') + self.assertEqual(idx2.name, 'something else') diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py new file mode 100644 index 0000000000000..b4a8bc79921bf --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -0,0 +1,110 @@ +from datetime import timedelta + +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, compat, Index, Timedelta + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_insert(self): + + idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') + self.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, 'inserted') + expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), + Timedelta('2day')], name='idx') + self.assertNotIsInstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + + idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') + + # preserve freq + expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', + '1day 00:00:03'], + name='idx', freq='s') + expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:04'], + name='idx', freq='s') + + # reset freq to None + expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', + '1day 00:00:02', '1day 00:00:03'], + name='idx', freq=None) + expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', + '1day 00:00:03', '1day 00:00:05'], + name='idx', freq=None) + + cases = [(0, Timedelta('1day'), expected_0), + (-3, Timedelta('1day'), expected_0), + (3, Timedelta('1day 00:00:04'), expected_3), + (1, Timedelta('1day 00:00:01'), expected_1_nofreq), + (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] + + for n, d, expected in cases: + result = idx.insert(n, d) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + def test_delete(self): + idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') + + # prserve freq + expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', + name='idx') + expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', + name='idx') + + # reset freq to None + expected_1 = TimedeltaIndex( + ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') + + cases = {0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') + + # prserve freq + expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', + name='idx') + expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', + name='idx') + + # reset freq to None + expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', + '7 d', '8 d', '9 d', '10d'], + freq=None, name='idx') + + cases = {(0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5} + for n, expected in compat.iteritems(cases): + result = idx.delete(n) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) + + result = idx.delete(slice(n[0], n[-1] + 1)) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(result.freq, expected.freq) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py new file mode 100644 index 0000000000000..406a5bdbf3bcd --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -0,0 +1,1276 @@ +import numpy as np +from datetime import timedelta +from distutils.version import LooseVersion + +import pandas as pd +import pandas.util.testing as tm +from pandas import to_timedelta +from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas import (Series, Timedelta, DataFrame, Timestamp, TimedeltaIndex, + timedelta_range, date_range, DatetimeIndex, Int64Index, + _np_version_under1p10, Float64Index, Index, tslib) + +from pandas.tests.test_base import Ops + + +class TestTimedeltaIndexOps(Ops): + def setUp(self): + super(TestTimedeltaIndexOps, self).setUp() + mask = lambda x: isinstance(x, TimedeltaIndex) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [] + + def test_ops_properties(self): + self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', + 'milliseconds']) + self.check_ops_properties(['microseconds', 'nanoseconds']) + + def test_asobject_tolist(self): + idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), + Timedelta('3 days'), Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, + timedelta(days=4)], name='idx') + expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, + Timedelta('4 days')] + expected = pd.Index(expected_list, dtype=object, name='idx') + result = idx.asobject + self.assertTrue(isinstance(result, Index)) + self.assertEqual(result.dtype, object) + self.assert_index_equal(result, expected) + self.assertEqual(result.name, expected.name) + self.assertEqual(idx.tolist(), expected_list) + + def test_minmax(self): + + # monotonic + idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) + self.assertTrue(idx1.is_monotonic) + + # non-monotonic + idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) + self.assertFalse(idx2.is_monotonic) + + for idx in [idx1, idx2]: + self.assertEqual(idx.min(), Timedelta('1 days')), + self.assertEqual(idx.max(), Timedelta('3 days')), + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) + + for op in ['min', 'max']: + # Return NaT + obj = TimedeltaIndex([]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + self.assertTrue(pd.isnull(getattr(obj, op)())) + + def test_numpy_minmax(self): + dr = pd.date_range(start='2016-01-15', end='2016-01-20') + td = TimedeltaIndex(np.asarray(dr)) + + self.assertEqual(np.min(td), Timedelta('16815 days')) + self.assertEqual(np.max(td), Timedelta('16820 days')) + + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) + + self.assertEqual(np.argmin(td), 0) + self.assertEqual(np.argmax(td), 5) + + if not _np_version_under1p10: + errmsg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) + tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) + + def test_round(self): + td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') + elt = td[1] + + expected_rng = TimedeltaIndex([ + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 00:00:00'), + Timedelta('16801 days 01:00:00'), + Timedelta('16801 days 02:00:00'), + Timedelta('16801 days 02:00:00'), + ]) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq='H'), expected_rng) + self.assertEqual(elt.round(freq='H'), expected_elt) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + td.round(freq='foo') + with tm.assertRaisesRegexp(ValueError, msg): + elt.round(freq='foo') + + msg = " is a non-fixed frequency" + tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') + tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') + + def test_representation(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" + + exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " + "freq='D')") + + exp3 = ("TimedeltaIndex(['1 days', '2 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')") + + exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + for func in ['__repr__', '__unicode__', '__str__']: + result = getattr(idx, func)() + self.assertEqual(result, expected) + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = """0 1 days +dtype: timedelta64[ns]""" + + exp3 = """0 1 days +1 2 days +dtype: timedelta64[ns]""" + + exp4 = """0 1 days +1 2 days +2 3 days +dtype: timedelta64[ns]""" + + exp5 = """0 1 days 00:00:01 +1 2 days 00:00:00 +2 3 days 00:00:00 +dtype: timedelta64[ns]""" + + with pd.option_context('display.width', 300): + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = repr(pd.Series(idx)) + self.assertEqual(result, expected) + + def test_summary(self): + # GH9116 + idx1 = TimedeltaIndex([], freq='D') + idx2 = TimedeltaIndex(['1 days'], freq='D') + idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') + idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') + idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) + + exp1 = """TimedeltaIndex: 0 entries +Freq: D""" + + exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days +Freq: D""" + + exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days +Freq: D""" + + exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days +Freq: D""" + + exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " + "00:00:00") + + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], + [exp1, exp2, exp3, exp4, exp5]): + result = idx.summary() + self.assertEqual(result, expected) + + def test_add_iadd(self): + + # only test adding/sub offsets as + is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days', '10 days') + result = rng + delta + expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', + freq='D') + tm.assert_index_equal(result, expected) + rng += delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng + 1 + expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng += 1 + tm.assert_index_equal(rng, expected) + + def test_sub_isub(self): + # only test adding/sub offsets as - is now numeric + + # offset + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + for delta in offsets: + rng = timedelta_range('1 days', '10 days') + result = rng - delta + expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') + tm.assert_index_equal(result, expected) + rng -= delta + tm.assert_index_equal(rng, expected) + + # int + rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) + result = rng - 1 + expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) + tm.assert_index_equal(result, expected) + rng -= 1 + tm.assert_index_equal(rng, expected) + + idx = TimedeltaIndex(['1 day', '2 day']) + msg = "cannot subtract a datelike from a TimedeltaIndex" + with tm.assertRaisesRegexp(TypeError, msg): + idx - Timestamp('2011-01-01') + + result = Timestamp('2011-01-01') + idx + expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + tm.assert_index_equal(result, expected) + + def test_ops_compat(self): + + offsets = [pd.offsets.Hour(2), timedelta(hours=2), + np.timedelta64(2, 'h'), Timedelta(hours=2)] + + rng = timedelta_range('1 days', '10 days', name='foo') + + # multiply + for offset in offsets: + self.assertRaises(TypeError, lambda: rng * offset) + + # divide + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result, expected, exact=False) + + # divide with nats + rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + expected = Float64Index([12, np.nan, 24], name='foo') + for offset in offsets: + result = rng / offset + tm.assert_index_equal(result, expected) + + # don't allow division by NaT (make could in the future) + self.assertRaises(TypeError, lambda: rng / pd.NaT) + + def test_subtraction_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + self.assertRaises(TypeError, lambda: tdi - dt) + self.assertRaises(TypeError, lambda: tdi - dti) + self.assertRaises(TypeError, lambda: td - dt) + self.assertRaises(TypeError, lambda: td - dti) + + result = dt - dti + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], name='bar') + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self): + + # check that dt/dti subtraction ops with tz are validated + dti = date_range('20130101', periods=3) + ts = Timestamp('20130101') + dt = ts.to_pydatetime() + dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') + ts_tz = Timestamp('20130101').tz_localize('US/Eastern') + ts_tz2 = Timestamp('20130101').tz_localize('CET') + dt_tz = ts_tz.to_pydatetime() + td = Timedelta('1 days') + + def _check(result, expected): + self.assertEqual(result, expected) + self.assertIsInstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta('0 days') + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta('0 days') + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta('0 days') + _check(result, expected) + + # tz mismatches + self.assertRaises(TypeError, lambda: dt_tz - ts) + self.assertRaises(TypeError, lambda: dt_tz - dt) + self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) + self.assertRaises(TypeError, lambda: dt - dt_tz) + self.assertRaises(TypeError, lambda: ts - dt_tz) + self.assertRaises(TypeError, lambda: ts_tz2 - ts) + self.assertRaises(TypeError, lambda: ts_tz2 - dt) + self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) + + # with dti + self.assertRaises(TypeError, lambda: dti - ts_tz) + self.assertRaises(TypeError, lambda: dti_tz - ts) + self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) + + result = dti_tz - dt_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(['0 days', '1 days', '2 days']) + tm.assert_index_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) + tm.assert_index_equal(result, expected) + + result = td - td + expected = Timedelta('0 days') + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex( + ['20121231', '20130101', '20130102'], tz='US/Eastern') + tm.assert_index_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + + # TODO(wesm): unused? + # td = Timedelta('1 days') + # dt = Timestamp('20130101') + + result = tdi - tdi + expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) + tm.assert_index_equal(result, expected) + + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'H']: + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + + def test_addition_ops(self): + + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') + dti = date_range('20130101', periods=3, name='bar') + td = Timedelta('1 days') + dt = Timestamp('20130101') + + result = tdi + dt + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') + tm.assert_index_equal(result, expected) + + # unequal length + self.assertRaises(ValueError, lambda: tdi + dti[0:1]) + self.assertRaises(ValueError, lambda: tdi[0:1] + dti) + + # random indexes + self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) + + # this is a union! + # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + result = td + dt + expected = Timestamp('20130102') + self.assertEqual(result, expected) + + def test_comp_nat(self): + left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')]) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + + def test_value_counts_unique(self): + # GH 7735 + + idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) + + exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) + expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', + '1 days 09:00:00', '1 days 08:00:00', + '1 days 08:00:00', pd.NaT]) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', + pd.NaT]) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], + ['00:01:00', '00:01:00', '00:02:00'], + ['00:01:00', '00:01:00', '00:00:01'])): + tm.assertIn(idx[0], idx) + + def test_unknown_attribute(self): + # GH 9680 + tdi = pd.timedelta_range(start=0, periods=10, freq='1s') + ts = pd.Series(np.random.normal(size=10), index=tdi) + self.assertNotIn('foo', ts.__dict__.keys()) + self.assertRaises(AttributeError, lambda: ts.foo) + + def test_order(self): + # GH 10295 + idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', + name='idx') + idx2 = TimedeltaIndex( + ['1 hour', '2 hour', '3 hour'], freq='H', name='idx') + + for idx in [idx1, idx2]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, + np.array([0, 1, 2]), + check_dtype=False) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, idx[::-1]) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', + '2 hour ', '1 hour'], name='idx1') + exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', + '3 hour', '5 hour'], name='idx1') + + idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', + '2 day', '1 day'], name='idx2') + + # TODO(wesm): unused? + # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', + # '3 day', '5 day'], name='idx2') + + # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', + # '2 minute', pd.NaT], name='idx3') + # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', + # '5 minute'], name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.sort_values() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.sort_values(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True) + self.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.sort_values(return_indexer=True, + ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) + self.assertIsNone(ordered.freq) + + def test_getitem(self): + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx[0] + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx[0:5] + expected = pd.timedelta_range('1 day', '5 day', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[0:10:2] + expected = pd.timedelta_range('1 day', '9 day', freq='2D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[-20:-5:3] + expected = pd.timedelta_range('12 day', '24 day', freq='3D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[4::-1] + expected = TimedeltaIndex(['5 day', '4 day', '3 day', + '2 day', '1 day'], + freq='-1D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + result = idx.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertEqual(idx.freq, result.freq) + + idx_dup = idx.append(idx) + self.assertIsNone(idx_dup.freq) # freq is reset + result = idx_dup.drop_duplicates() + self.assert_index_equal(idx, result) + self.assertIsNone(result.freq) + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep='last') + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep='last') + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + def test_take(self): + # GH 10295 + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx.take([-1]) + self.assertEqual(result, pd.Timedelta('31 day')) + + result = idx.take([0, 1, 2]) + expected = pd.timedelta_range('1 day', '3 day', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.timedelta_range('1 day', '5 day', freq='2D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.timedelta_range('8 day', '2 day', freq='-3D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + def test_take_invalid_kwargs(self): + idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, idx.take, + indices, mode='clip') + + def test_infer_freq(self): + # GH 11018 + for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' + ]: + idx = pd.timedelta_range('1', freq=freq, periods=10) + result = pd.TimedeltaIndex(idx.asi8, freq='infer') + tm.assert_index_equal(idx, result) + self.assertEqual(result.freq, freq) + + def test_nat_new(self): + + idx = pd.timedelta_range('1', freq='D', periods=5, name='x') + result = idx._nat_new() + exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') + tm.assert_index_equal(result, exp) + + result = idx._nat_new(box=False) + exp = np.array([tslib.iNaT] * 5, dtype=np.int64) + tm.assert_numpy_array_equal(result, exp) + + def test_shift(self): + # GH 9903 + idx = pd.TimedeltaIndex([], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + tm.assert_index_equal(idx.shift(3, freq='H'), idx) + + idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') + tm.assert_index_equal(idx.shift(0, freq='H'), idx) + exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') + tm.assert_index_equal(idx.shift(3, freq='H'), exp) + exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='H'), exp) + + tm.assert_index_equal(idx.shift(0, freq='T'), idx) + exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], + name='xxx') + tm.assert_index_equal(idx.shift(3, freq='T'), exp) + exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], + name='xxx') + tm.assert_index_equal(idx.shift(-3, freq='T'), exp) + + def test_repeat(self): + index = pd.timedelta_range('1 days', periods=2, freq='D') + exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + index = TimedeltaIndex(['1 days', 'NaT', '3 days']) + exp = TimedeltaIndex(['1 days', '1 days', '1 days', + 'NaT', 'NaT', 'NaT', + '3 days', '3 days', '3 days']) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + self.assertIsNone(res.freq) + + def test_nat(self): + self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) + self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) + + idx = pd.TimedeltaIndex(['1 days', '2 days']) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + self.assertFalse(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([], dtype=np.intp)) + + idx = pd.TimedeltaIndex(['1 days', 'NaT']) + self.assertTrue(idx._can_hold_na) + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + self.assertTrue(idx.hasnans) + tm.assert_numpy_array_equal(idx._nan_idxs, + np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) + self.assertTrue(idx.equals(idx)) + self.assertTrue(idx.equals(idx.copy())) + self.assertTrue(idx.equals(idx.asobject)) + self.assertTrue(idx.asobject.equals(idx)) + self.assertTrue(idx.asobject.equals(idx.asobject)) + self.assertFalse(idx.equals(list(idx))) + self.assertFalse(idx.equals(pd.Series(idx))) + + idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) + self.assertFalse(idx.equals(idx2)) + self.assertFalse(idx.equals(idx2.copy())) + self.assertFalse(idx.equals(idx2.asobject)) + self.assertFalse(idx.asobject.equals(idx2)) + self.assertFalse(idx.asobject.equals(idx2.asobject)) + self.assertFalse(idx.equals(list(idx2))) + self.assertFalse(idx.equals(pd.Series(idx2))) + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_ops(self): + + td = Timedelta(10, unit='d') + self.assertEqual(-td, Timedelta(-10, unit='d')) + self.assertEqual(+td, Timedelta(10, unit='d')) + self.assertEqual(td - td, Timedelta(0, unit='ns')) + self.assertTrue((td - pd.NaT) is pd.NaT) + self.assertEqual(td + td, Timedelta(20, unit='d')) + self.assertTrue((td + pd.NaT) is pd.NaT) + self.assertEqual(td * 2, Timedelta(20, unit='d')) + self.assertTrue((td * pd.NaT) is pd.NaT) + self.assertEqual(td / 2, Timedelta(5, unit='d')) + self.assertEqual(abs(td), td) + self.assertEqual(abs(-td), td) + self.assertEqual(td / td, 1) + self.assertTrue((td / pd.NaT) is np.nan) + + # invert + self.assertEqual(-td, Timedelta('-10d')) + self.assertEqual(td * -1, Timedelta('-10d')) + self.assertEqual(-1 * td, Timedelta('-10d')) + self.assertEqual(abs(-td), Timedelta('10d')) + + # invalid + self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) + + # invalid multiply with another timedelta + self.assertRaises(TypeError, lambda: td * td) + + # can't operate with integers + self.assertRaises(TypeError, lambda: td + 2) + self.assertRaises(TypeError, lambda: td - 2) + + def test_ops_offsets(self): + td = Timedelta(10, unit='d') + self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) + self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) + self.assertEqual(240, td / pd.offsets.Hour(1)) + self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) + self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) + self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) + + def test_ops_ndarray(self): + td = Timedelta('1 day') + + # timedelta, timedelta + other = pd.to_timedelta(['1 day']).values + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + self.assertRaises(TypeError, lambda: td + np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) + td) + + expected = pd.to_timedelta(['0 days']).values + self.assert_numpy_array_equal(td - other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(-other + td, expected) + self.assertRaises(TypeError, lambda: td - np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) - td) + + expected = pd.to_timedelta(['2 days']).values + self.assert_numpy_array_equal(td * np.array([2]), expected) + self.assert_numpy_array_equal(np.array([2]) * td, expected) + self.assertRaises(TypeError, lambda: td * other) + self.assertRaises(TypeError, lambda: other * td) + + self.assert_numpy_array_equal(td / other, + np.array([1], dtype=np.float64)) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other / td, + np.array([1], dtype=np.float64)) + + # timedelta, datetime + other = pd.to_datetime(['2000-01-01']).values + expected = pd.to_datetime(['2000-01-02']).values + self.assert_numpy_array_equal(td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + td, expected) + + expected = pd.to_datetime(['1999-12-31']).values + self.assert_numpy_array_equal(-td + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other - td, expected) + + def test_ops_series(self): + # regression test for GH8813 + td = Timedelta('1 day') + other = pd.Series([1, 2]) + expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) + tm.assert_series_equal(expected, td * other) + tm.assert_series_equal(expected, other * td) + + def test_ops_series_object(self): + # GH 13043 + s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), + pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], + name='xxx') + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), + pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) + + # object series & object series + s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), + pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], + name='xxx') + self.assertEqual(s2.dtype, object) + exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], + name='xxx') + tm.assert_series_equal(s2 - s, exp) + tm.assert_series_equal(s - s2, -exp) + + s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], + name='xxx', dtype=object) + self.assertEqual(s.dtype, object) + + exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], + name='xxx') + tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) + tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) + + def test_ops_notimplemented(self): + class Other: + pass + + other = Other() + + td = Timedelta('1 day') + self.assertTrue(td.__add__(other) is NotImplemented) + self.assertTrue(td.__sub__(other) is NotImplemented) + self.assertTrue(td.__truediv__(other) is NotImplemented) + self.assertTrue(td.__mul__(other) is NotImplemented) + self.assertTrue(td.__floordiv__(td) is NotImplemented) + + def test_ops_error_str(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + for l, r in [(tdi, 'a'), ('a', tdi)]: + with tm.assertRaises(TypeError): + l + r + + with tm.assertRaises(TypeError): + l > r + + with tm.assertRaises(TypeError): + l == r + + with tm.assertRaises(TypeError): + l != r + + def test_timedelta_ops(self): + # GH4984 + # make sure ops return Timedelta + s = Series([Timestamp('20130101') + timedelta(seconds=i * i) + for i in range(10)]) + td = s.diff() + + result = td.mean() + expected = to_timedelta(timedelta(seconds=9)) + self.assertEqual(result, expected) + + result = td.to_frame().mean() + self.assertEqual(result[0], expected) + + result = td.quantile(.1) + expected = Timedelta(np.timedelta64(2600, 'ms')) + self.assertEqual(result, expected) + + result = td.median() + expected = to_timedelta('00:00:09') + self.assertEqual(result, expected) + + result = td.to_frame().median() + self.assertEqual(result[0], expected) + + # GH 6462 + # consistency in returned values for sum + result = td.sum() + expected = to_timedelta('00:01:21') + self.assertEqual(result, expected) + + result = td.to_frame().sum() + self.assertEqual(result[0], expected) + + # std + result = td.std() + expected = to_timedelta(Series(td.dropna().values).std()) + self.assertEqual(result, expected) + + result = td.to_frame().std() + self.assertEqual(result[0], expected) + + # invalid ops + for op in ['skew', 'kurt', 'sem', 'prod']: + self.assertRaises(TypeError, getattr(td, op)) + + # GH 10040 + # make sure NaT is properly handled by median() + s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) + self.assertEqual(s.diff().median(), timedelta(days=4)) + + s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), + Timestamp('2015-02-15')]) + self.assertEqual(s.diff().median(), timedelta(days=6)) + + def test_timedelta_ops_scalar(self): + # GH 6808 + base = pd.to_datetime('20130101 09:01:12.123456') + expected_add = pd.to_datetime('20130101 09:01:22.123456') + expected_sub = pd.to_datetime('20130101 09:01:02.123456') + + for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), + np.timedelta64(10, 's'), + np.timedelta64(10000000000, 'ns'), + pd.offsets.Second(10)]: + result = base + offset + self.assertEqual(result, expected_add) + + result = base - offset + self.assertEqual(result, expected_sub) + + base = pd.to_datetime('20130102 09:01:12.123456') + expected_add = pd.to_datetime('20130103 09:01:22.123456') + expected_sub = pd.to_datetime('20130101 09:01:02.123456') + + for offset in [pd.to_timedelta('1 day, 00:00:10'), + pd.to_timedelta('1 days, 00:00:10'), + timedelta(days=1, seconds=10), + np.timedelta64(1, 'D') + np.timedelta64(10, 's'), + pd.offsets.Day() + pd.offsets.Second(10)]: + result = base + offset + self.assertEqual(result, expected_add) + + result = base - offset + self.assertEqual(result, expected_sub) + + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(['00:00:01'])) + s2 = pd.to_timedelta(Series(['00:00:02'])) + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = DataFrame(['00:00:01']).apply(pd.to_timedelta) + df2 = DataFrame(['00:00:02']).apply(pd.to_timedelta) + dfn = DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') + scalar2 = pd.to_timedelta('00:00:02') + timedelta_NaT = pd.to_timedelta('NaT') + NA = np.nan + + actual = scalar1 + scalar1 + self.assertEqual(actual, scalar2) + actual = scalar2 - scalar1 + self.assertEqual(actual, scalar1) + + actual = s1 + s1 + assert_series_equal(actual, s2) + actual = s2 - s1 + assert_series_equal(actual, s1) + + actual = s1 + scalar1 + assert_series_equal(actual, s2) + actual = scalar1 + s1 + assert_series_equal(actual, s2) + actual = s2 - scalar1 + assert_series_equal(actual, s1) + actual = -scalar1 + s2 + assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + assert_series_equal(actual, sn) + + actual = s1 + NA + assert_series_equal(actual, sn) + actual = NA + s1 + assert_series_equal(actual, sn) + actual = s1 - NA + assert_series_equal(actual, sn) + actual = -NA + s1 + assert_series_equal(actual, sn) + + actual = s1 + pd.NaT + assert_series_equal(actual, sn) + actual = s2 - pd.NaT + assert_series_equal(actual, sn) + + actual = s1 + df1 + assert_frame_equal(actual, df2) + actual = s2 - df1 + assert_frame_equal(actual, df1) + actual = df1 + s1 + assert_frame_equal(actual, df2) + actual = df2 - s1 + assert_frame_equal(actual, df1) + + actual = df1 + df1 + assert_frame_equal(actual, df2) + actual = df2 - df1 + assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + assert_frame_equal(actual, df2) + actual = df2 - scalar1 + assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + assert_frame_equal(actual, dfn) + + actual = df1 + NA + assert_frame_equal(actual, dfn) + actual = df1 - NA + assert_frame_equal(actual, dfn) + + actual = df1 + pd.NaT # NaT is datetime, not timedelta + assert_frame_equal(actual, dfn) + actual = df1 - pd.NaT + assert_frame_equal(actual, dfn) + + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + + def test_compare_timedelta_ndarray(self): + # GH11835 + periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] + arr = np.array(periods) + result = arr[0] > arr + expected = np.array([False, False]) + self.assert_numpy_array_equal(result, expected) + + +class TestSlicing(tm.TestCase): + + def test_tdi_ops_attributes(self): + rng = timedelta_range('2 days', periods=5, freq='2D', name='x') + + result = rng + 1 + exp = timedelta_range('4 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + result = rng - 2 + exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + result = rng * 2 + exp = timedelta_range('4 days', periods=5, freq='4D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4D') + + result = rng / 2 + exp = timedelta_range('1 days', periods=5, freq='D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'D') + + result = -rng + exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2D') + + rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') + + result = abs(rng) + exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', + '2 days'], name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_add_overflow(self): + # see gh-14068 + msg = "too (big|large) to convert" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(106580, 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta(106580, 'D') + + _NaT = int(pd.NaT) + 1 + msg = "Overflow in int64 addition" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([106580], 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta([106580], 'D') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([_NaT]) - Timedelta('1 days') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(['5 days', _NaT]) - Timedelta('1 days') + with tm.assertRaisesRegexp(OverflowError, msg): + (to_timedelta([_NaT, '5 days', '1 hours']) - + to_timedelta(['7 seconds', _NaT, '4 hours'])) + + # These should not overflow! + exp = TimedeltaIndex([pd.NaT]) + result = to_timedelta([pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(['4 days', pd.NaT]) + result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) + result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + + to_timedelta(['7 seconds', pd.NaT, '4 hours'])) + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py new file mode 100644 index 0000000000000..0d46ee4172211 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -0,0 +1,81 @@ +import numpy as np +import pandas.util.testing as tm + +import pandas as pd +from pandas import Series, timedelta_range, Timedelta +from pandas.util.testing import assert_series_equal + + +class TestSlicing(tm.TestCase): + + def test_partial_slice(self): + rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s['5 day':'6 day'] + expected = s.iloc[86:134] + assert_series_equal(result, expected) + + result = s['5 day':] + expected = s.iloc[86:] + assert_series_equal(result, expected) + + result = s[:'6 day'] + expected = s.iloc[:134] + assert_series_equal(result, expected) + + result = s['6 days, 23:11:12'] + self.assertEqual(result, s.iloc[133]) + + self.assertRaises(KeyError, s.__getitem__, '50 days') + + def test_partial_slice_high_reso(self): + + # higher reso + rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000) + s = Series(np.arange(len(rng)), index=rng) + + result = s['1 day 10:11:12':] + expected = s.iloc[0:] + assert_series_equal(result, expected) + + result = s['1 day 10:11:12.001':] + expected = s.iloc[1000:] + assert_series_equal(result, expected) + + result = s['1 days, 10:11:12.001001'] + self.assertEqual(result, s.iloc[1001]) + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) + assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) + + assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) + assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) + + assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) + assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- + 1], SLC[15:6:-1]) + assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], + SLC[15:6:-1]) + assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], + SLC[15:6:-1]) + + assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) + self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', + lambda: ts.loc[::0]) diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py new file mode 100644 index 0000000000000..9000fb3beb279 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -0,0 +1,76 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas import TimedeltaIndex, timedelta_range, Int64Index + + +class TestTimedeltaIndex(tm.TestCase): + _multiprocess_can_split_ = True + + def test_union(self): + + i1 = timedelta_range('1day', periods=5) + i2 = timedelta_range('3day', periods=5) + result = i1.union(i2) + expected = timedelta_range('1day', periods=7) + self.assert_index_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_coverage(self): + + idx = TimedeltaIndex(['3d', '1d', '2d']) + ordered = TimedeltaIndex(idx.sort_values(), freq='infer') + result = ordered.union(idx) + self.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + self.assert_index_equal(result, ordered) + self.assertEqual(result.freq, ordered.freq) + + def test_union_bug_1730(self): + + rng_a = timedelta_range('1 day', periods=4, freq='3H') + rng_b = timedelta_range('1 day', periods=4, freq='4H') + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) + self.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(['1 day 15:19:49.695000']) + right = TimedeltaIndex(['2 day 13:04:21.322000', + '1 day 15:27:24.873000', + '1 day 15:31:05.350000']) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day", "30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) + self.assert_index_equal(result, exp) + + def test_intersection_bug_1708(self): + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(5) + + result = index_1 & index_2 + self.assertEqual(len(result), 0) + + index_1 = timedelta_range('1 day', periods=4, freq='h') + index_2 = index_1 + pd.offsets.Hour(1) + + result = index_1 & index_2 + expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py new file mode 100644 index 0000000000000..4c8571e4f08f9 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -0,0 +1,592 @@ +import numpy as np +from datetime import timedelta + +import pandas as pd +import pandas.util.testing as tm +from pandas import (timedelta_range, date_range, Series, Timedelta, + DatetimeIndex, TimedeltaIndex, Index, DataFrame, + Int64Index, _np_version_under1p8) +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_index_equal) + +from ..datetimelike import DatetimeLike + +randn = np.random.randn + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_get_loc(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) + self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) + + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc(idx[1], method='nearest', tolerance='foo') + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) + + self.assertEqual(tidx.get_loc(pd.NaT), 1) + self.assertEqual(tidx.get_loc(None), 1) + self.assertEqual(tidx.get_loc(float('nan')), 1) + self.assertEqual(tidx.get_loc(np.nan), 1) + + def test_get_indexer(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 1, 1], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + + def test_pickle_compat_construction(self): + pass + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + + for result in [idx * 2, np.multiply(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], + freq='4H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4H') + + for result in [idx / 2, np.divide(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], + freq='H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'H') + + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + for result in [-idx, np.negative(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], + freq='-2H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2H') + + idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], + freq='H', name='x') + for result in [abs(idx), np.absolute(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], + freq=None, name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_fillna_timedelta(self): + # GH 11343 + idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) + + exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) + self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) + + exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) + idx.fillna(pd.Timedelta('3 hour')) + + exp = pd.Index( + [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + def test_difference_freq(self): + # GH14323: Difference of TimedeltaIndex should not preserve frequency + + index = timedelta_range("0 days", "5 days", freq="D") + + other = timedelta_range("1 days", "4 days", freq="D") + expected = TimedeltaIndex(["0 days", "5 days"], freq=None) + idx_diff = index.difference(other) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other) + expected = TimedeltaIndex(["0 days", "1 days"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + def test_take(self): + + tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] + idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') + expected = TimedeltaIndex(tds, freq=None, name='idx') + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + self.assert_index_equal(taken, expected) + tm.assertIsInstance(taken, TimedeltaIndex) + self.assertIsNone(taken.freq) + self.assertEqual(taken.name, expected.name) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], + name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], + name='xxx') + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], + name='xxx') + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_isin(self): + + index = tm.makeTimedeltaIndex(4) + result = index.isin(index) + self.assertTrue(result.all()) + + result = index.isin(list(index)) + self.assertTrue(result.all()) + + assert_almost_equal(index.isin([index[2], 5]), + np.array([False, False, True, False])) + + def test_factorize(self): + idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', + '3 day']) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) + + arr, idx = idx1.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, exp_idx) + + # freq must be preserved + idx3 = timedelta_range('1 day', periods=4, freq='s') + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + self.assert_numpy_array_equal(arr, exp_arr) + self.assert_index_equal(idx, idx3) + + def test_join_self(self): + + index = timedelta_range('1 day', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + tm.assert_index_equal(index, joined) + + def test_slice_keeps_name(self): + + # GH4226 + dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') + self.assertEqual(dr[1:].name, dr.name) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe(10, 10, + data_gen_f=lambda *args, **kwargs: randn(), + r_idx_type='i', c_idx_type='td') + str(df) + + cols = df.columns.join(df.index, how='outer') + joined = cols.join(df.columns) + self.assertEqual(cols.dtype, np.dtype('O')) + self.assertEqual(cols.dtype, joined.dtype) + tm.assert_index_equal(cols, joined) + + def test_sort_values(self): + + idx = TimedeltaIndex(['4d', '1d', '2d']) + + ordered = idx.sort_values() + self.assertTrue(ordered.is_monotonic) + + ordered = idx.sort_values(ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + + ordered, dexer = idx.sort_values(return_indexer=True) + self.assertTrue(ordered.is_monotonic) + self.assert_numpy_array_equal(dexer, + np.array([1, 2, 0]), + check_dtype=False) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + self.assertTrue(ordered[::-1].is_monotonic) + self.assert_numpy_array_equal(dexer, + np.array([0, 2, 1]), + check_dtype=False) + + def test_get_duplicates(self): + idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', + '4day']) + + result = idx.get_duplicates() + ex = TimedeltaIndex(['2 day', '3day']) + self.assert_index_equal(result, ex) + + def test_argmin_argmax(self): + idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', + '1 day 00:00:02']) + self.assertEqual(idx.argmin(), 1) + self.assertEqual(idx.argmax(), 0) + + def test_misc_coverage(self): + + rng = timedelta_range('1 day', periods=5) + result = rng.groupby(rng.days) + tm.assertIsInstance(list(result.values())[0][0], Timedelta) + + idx = TimedeltaIndex(['3d', '1d', '2d']) + self.assertFalse(idx.equals(list(idx))) + + non_td = Index(list('abc')) + self.assertFalse(idx.equals(list(non_td))) + + def test_map(self): + + rng = timedelta_range('1 day', periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = Int64Index([f(x) for x in rng]) + tm.assert_index_equal(result, exp) + + def test_comparisons_nat(self): + + tdidx1 = pd.TimedeltaIndex(['1 day', pd.NaT, '1 day 00:00:01', pd.NaT, + '1 day 00:00:01', '5 day 00:00:03']) + tdidx2 = pd.TimedeltaIndex(['2 day', '2 day', pd.NaT, pd.NaT, + '1 day 00:00:02', '5 days 00:00:03']) + tdarr = np.array([np.timedelta64(2, 'D'), + np.timedelta64(2, 'D'), np.timedelta64('nat'), + np.timedelta64('nat'), + np.timedelta64(1, 'D') + np.timedelta64(2, 's'), + np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) + + if _np_version_under1p8: + # cannot test array because np.datetime('nat') returns today's date + cases = [(tdidx1, tdidx2)] + else: + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + self.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + self.assert_numpy_array_equal(result, expected) + + def test_comparisons_coverage(self): + rng = timedelta_range('1 days', periods=10) + + result = rng < rng[3] + exp = np.array([True, True, True] + [False] * 7) + self.assert_numpy_array_equal(result, exp) + + # raise TypeError for now + self.assertRaises(TypeError, rng.__lt__, rng[3].value) + + result = rng == list(rng) + exp = rng == rng + self.assert_numpy_array_equal(result, exp) + + def test_total_seconds(self): + # GH 10939 + # test index + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + expt = [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456. / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456. / 1e9] + tm.assert_almost_equal(rng.total_seconds(), np.array(expt)) + + # test Series + s = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(s.dt.total_seconds(), s_expt) + + # with nat + s[1] = np.nan + s_expt = Series([1 * 86400 + 10 * 3600 + 11 * 60 + + 12 + 100123456. / 1e9, np.nan], index=[0, 1]) + tm.assert_series_equal(s.dt.total_seconds(), s_expt) + + # with both nat + s = Series([np.nan, np.nan], dtype='timedelta64[ns]') + tm.assert_series_equal(s.dt.total_seconds(), + Series([np.nan, np.nan], index=[0, 1])) + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range('1 days', '10 days') + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + self.assert_numpy_array_equal(idx.values, expected.values) + + def test_pickle(self): + + rng = timedelta_range('1 days', periods=10) + rng_p = self.round_trip_pickle(rng) + tm.assert_index_equal(rng, rng_p) + + def test_hash_error(self): + index = timedelta_range('1 days', periods=10) + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(index).__name__): + hash(index) + + def test_append_join_nondatetimeindex(self): + rng = timedelta_range('1 days', periods=10) + idx = Index(['a', 'b', 'c', 'd']) + + result = rng.append(idx) + tm.assertIsInstance(result[0], Timedelta) + + # it works + rng.join(idx, how='outer') + + def test_append_numpy_bug_1681(self): + + td = timedelta_range('1 days', '10 days', freq='2D') + a = DataFrame() + c = DataFrame({'A': 'foo', 'B': td}, index=td) + str(c) + + result = a.append(c) + self.assertTrue((result['B'] == td).all()) + + def test_fields(self): + rng = timedelta_range('1 days, 10:11:12.100123456', periods=2, + freq='s') + self.assert_numpy_array_equal(rng.days, np.array( + [1, 1], dtype='int64')) + self.assert_numpy_array_equal( + rng.seconds, + np.array([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], + dtype='int64')) + self.assert_numpy_array_equal(rng.microseconds, np.array( + [100 * 1000 + 123, 100 * 1000 + 123], dtype='int64')) + self.assert_numpy_array_equal(rng.nanoseconds, np.array( + [456, 456], dtype='int64')) + + self.assertRaises(AttributeError, lambda: rng.hours) + self.assertRaises(AttributeError, lambda: rng.minutes) + self.assertRaises(AttributeError, lambda: rng.milliseconds) + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal(s.dt.seconds, Series( + [10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1])) + + def test_freq_conversion(self): + + # doc example + + # series + td = Series(date_range('20130101', periods=4)) - \ + Series(date_range('20121201', periods=4)) + td[2] += timedelta(minutes=5, seconds=3) + td[3] = np.nan + + result = td / np.timedelta64(1, 'D') + expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan + ]) + assert_series_equal(result, expected) + + result = td.astype('timedelta64[D]') + expected = Series([31, 31, 31, np.nan]) + assert_series_equal(result, expected) + + result = td / np.timedelta64(1, 's') + expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, + np.nan]) + assert_series_equal(result, expected) + + result = td.astype('timedelta64[s]') + assert_series_equal(result, expected) + + # tdi + td = TimedeltaIndex(td) + + result = td / np.timedelta64(1, 'D') + expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) + assert_index_equal(result, expected) + + result = td.astype('timedelta64[D]') + expected = Index([31, 31, 31, np.nan]) + assert_index_equal(result, expected) + + result = td / np.timedelta64(1, 's') + expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, + np.nan]) + assert_index_equal(result, expected) + + result = td.astype('timedelta64[s]') + assert_index_equal(result, expected) + + +class TestSlicing(tm.TestCase): + + def test_timedelta(self): + # this is valid too + index = date_range('1/1/2000', periods=50, freq='B') + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + self.assertTrue(tm.equalContents(index, back)) + self.assertEqual(shifted.freq, index.freq) + self.assertEqual(shifted.freq, back.freq) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH4134, buggy with timedeltas + rng = date_range('2013', '2014') + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) + + +class TestTimeSeries(tm.TestCase): + _multiprocess_can_split_ = True + + def test_series_box_timedelta(self): + rng = timedelta_range('1 day 1 s', periods=5, freq='h') + s = Series(rng) + tm.assertIsInstance(s[1], Timedelta) + tm.assertIsInstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py new file mode 100644 index 0000000000000..8bd56b5885bba --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -0,0 +1,51 @@ +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas.tseries.offsets import Day, Second +from pandas import to_timedelta, timedelta_range +from pandas.util.testing import assert_frame_equal + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5), unit='D') + result = timedelta_range('0 days', periods=5, freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11), unit='D') + result = timedelta_range('0 days', '10 days', freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() + result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', + freq='D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) + result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(50), unit='T') * 30 + result = timedelta_range('0 days', freq='30T', periods=50) + tm.assert_index_equal(result, expected) + + # GH 11776 + arr = np.arange(10).reshape(2, 5) + df = pd.DataFrame(np.arange(10).reshape(2, 5)) + for arg in (arr, df): + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_timedelta(arg) + for errors in ['ignore', 'raise', 'coerce']: + with tm.assertRaisesRegexp(TypeError, "1-d array"): + to_timedelta(arg, errors=errors) + + # issue10583 + df = pd.DataFrame(np.random.normal(size=(10, 4))) + df.index = pd.timedelta_range(start='0s', periods=10, freq='s') + expected = df.loc[pd.Timedelta('0s'):, :] + result = df.loc['0s':, :] + assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py new file mode 100644 index 0000000000000..2442051547312 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -0,0 +1,201 @@ +from datetime import time, timedelta +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import assert_series_equal +from pandas import (Series, Timedelta, to_timedelta, tslib, isnull, + TimedeltaIndex) + + +class TestTimedeltas(tm.TestCase): + _multiprocess_can_split_ = True + + def test_to_timedelta(self): + def conv(v): + return v.astype('m8[ns]') + + d1 = np.timedelta64(1, 'D') + + self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), + conv(d1 + np.timedelta64(6 * 3600 + + 5 * 60 + 1, 's') + + np.timedelta64(30, 'us'))) + self.assertEqual(to_timedelta('15.5us', box=False), + conv(np.timedelta64(15500, 'ns'))) + + # empty string + result = to_timedelta('', box=False) + self.assertEqual(result.astype('int64'), tslib.iNaT) + + result = to_timedelta(['', '']) + self.assertTrue(isnull(result).all()) + + # pass thru + result = to_timedelta(np.array([np.timedelta64(1, 's')])) + expected = pd.Index(np.array([np.timedelta64(1, 's')])) + tm.assert_index_equal(result, expected) + + # ints + result = np.timedelta64(0, 'ns') + expected = to_timedelta(0, box=False) + self.assertEqual(result, expected) + + # Series + expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + result = to_timedelta(Series(['1d', '1days 00:00:01'])) + tm.assert_series_equal(result, expected) + + # with units + result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( + 10, 's').astype('m8[ns]')]) + expected = to_timedelta([0, 10], unit='s') + tm.assert_index_equal(result, expected) + + # single element conversion + v = timedelta(seconds=1) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + self.assertEqual(result, expected) + + v = np.timedelta64(timedelta(seconds=1)) + result = to_timedelta(v, box=False) + expected = np.timedelta64(timedelta(seconds=1)) + self.assertEqual(result, expected) + + # arrays of various dtypes + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='s') + expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='m') + expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='int64') + result = to_timedelta(arr, unit='h') + expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='timedelta64[s]') + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype='timedelta64[D]') + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) + tm.assert_index_equal(result, expected) + + # Test with lists as input when box=false + expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') + result = to_timedelta(range(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + result = to_timedelta(np.arange(3), unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + result = to_timedelta([0, 1, 2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + # Tests with fractional seconds as input: + expected = np.array( + [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') + result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) + tm.assert_numpy_array_equal(expected, result) + + def test_to_timedelta_invalid(self): + + # bad value for errors parameter + msg = "errors must be one of" + tm.assertRaisesRegexp(ValueError, msg, to_timedelta, + ['foo'], errors='never') + + # these will error + self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) + self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) + + # time not supported ATM + self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) + self.assertTrue(to_timedelta( + time(second=1), errors='coerce') is pd.NaT) + + self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) + tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), + to_timedelta(['foo', 'bar'], errors='coerce')) + + tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), + to_timedelta(['1 day', 'bar', '1 min'], + errors='coerce')) + + # gh-13613: these should not error because errors='ignore' + invalid_data = 'apple' + self.assertEqual(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + invalid_data = ['apple', '1 days'] + tm.assert_numpy_array_equal( + np.array(invalid_data, dtype=object), + to_timedelta(invalid_data, errors='ignore')) + + invalid_data = pd.Index(['apple', '1 days']) + tm.assert_index_equal(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + invalid_data = Series(['apple', '1 days']) + tm.assert_series_equal(invalid_data, to_timedelta( + invalid_data, errors='ignore')) + + def test_to_timedelta_via_apply(self): + # GH 5458 + expected = Series([np.timedelta64(1, 's')]) + result = Series(['00:00:01']).apply(to_timedelta) + tm.assert_series_equal(result, expected) + + result = Series([to_timedelta('00:00:01')]) + tm.assert_series_equal(result, expected) + + def test_to_timedelta_on_missing_values(self): + # GH5438 + timedelta_NaT = np.timedelta64('NaT') + + actual = pd.to_timedelta(Series(['00:00:01', np.nan])) + expected = Series([np.timedelta64(1000000000, 'ns'), + timedelta_NaT], dtype=' r + + self.assertFalse(l == r) + self.assertTrue(l != r) diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index f686f1aa6dc47..e90b794c20ee4 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -1635,3 +1635,48 @@ def test_woy_boundary(self): for args in [(2000, 1, 1), (2000, 1, 2), ( 2005, 1, 1), (2005, 1, 2)]]) self.assertTrue((result == [52, 52, 53, 53]).all()) + + +class TestTsUtil(tm.TestCase): + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + self.assertEqual( + Timestamp(Timestamp.max.to_pydatetime()).value / 1000, + Timestamp.max.value / 1000) + + exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + self.assertEqual( + Timestamp(Timestamp.min.to_pydatetime()).value / 1000, + Timestamp.min.value / 1000) + + +class TestTslib(tm.TestCase): + + def test_round(self): + stamp = Timestamp('2000-01-05 05:09:15.13') + + def _check_round(freq, expected): + result = stamp.round(freq=freq) + self.assertEqual(result, expected) + + for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), + ('H', Timestamp('2000-01-05 05:00:00')), + ('S', Timestamp('2000-01-05 05:09:15'))]: + _check_round(freq, expected) + + msg = pd.tseries.frequencies._INVALID_FREQ_ERROR + with self.assertRaisesRegexp(ValueError, msg): + stamp.round('foo') diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 2ff06517f175a..ac56182da0f9a 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -2,10 +2,8 @@ from datetime import timedelta import numpy as np import pandas as pd -from pandas import (Series, Index, Int64Index, Timestamp, Period, - DatetimeIndex, PeriodIndex, TimedeltaIndex, - Timedelta, timedelta_range, date_range, Float64Index, - _np_version_under1p10) +from pandas import (Series, Index, Period, DatetimeIndex, PeriodIndex, + Timedelta, _np_version_under1p10) import pandas.tslib as tslib import pandas.tseries.period as period @@ -14,845 +12,6 @@ from pandas.tests.test_base import Ops -class TestTimedeltaIndexOps(Ops): - def setUp(self): - super(TestTimedeltaIndexOps, self).setUp() - mask = lambda x: isinstance(x, TimedeltaIndex) - self.is_valid_objs = [o for o in self.objs if mask(o)] - self.not_valid_objs = [] - - def test_ops_properties(self): - self.check_ops_properties(['days', 'hours', 'minutes', 'seconds', - 'milliseconds']) - self.check_ops_properties(['microseconds', 'nanoseconds']) - - def test_asobject_tolist(self): - idx = timedelta_range(start='1 days', periods=4, freq='D', name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), - Timedelta('3 days'), Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), pd.NaT, - timedelta(days=4)], name='idx') - expected_list = [Timedelta('1 days'), Timedelta('2 days'), pd.NaT, - Timedelta('4 days')] - expected = pd.Index(expected_list, dtype=object, name='idx') - result = idx.asobject - self.assertTrue(isinstance(result, Index)) - self.assertEqual(result.dtype, object) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(idx.tolist(), expected_list) - - def test_minmax(self): - - # monotonic - idx1 = TimedeltaIndex(['1 days', '2 days', '3 days']) - self.assertTrue(idx1.is_monotonic) - - # non-monotonic - idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT']) - self.assertFalse(idx2.is_monotonic) - - for idx in [idx1, idx2]: - self.assertEqual(idx.min(), Timedelta('1 days')), - self.assertEqual(idx.max(), Timedelta('3 days')), - self.assertEqual(idx.argmin(), 0) - self.assertEqual(idx.argmax(), 2) - - for op in ['min', 'max']: - # Return NaT - obj = TimedeltaIndex([]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = TimedeltaIndex([pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - self.assertTrue(pd.isnull(getattr(obj, op)())) - - def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - td = TimedeltaIndex(np.asarray(dr)) - - self.assertEqual(np.min(td), Timedelta('16815 days')) - self.assertEqual(np.max(td), Timedelta('16820 days')) - - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.min, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.max, td, out=0) - - self.assertEqual(np.argmin(td), 0) - self.assertEqual(np.argmax(td), 5) - - if not _np_version_under1p10: - errmsg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, errmsg, np.argmin, td, out=0) - tm.assertRaisesRegexp(ValueError, errmsg, np.argmax, td, out=0) - - def test_round(self): - td = pd.timedelta_range(start='16801 days', periods=5, freq='30Min') - elt = td[1] - - expected_rng = TimedeltaIndex([ - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 00:00:00'), - Timedelta('16801 days 01:00:00'), - Timedelta('16801 days 02:00:00'), - Timedelta('16801 days 02:00:00'), - ]) - expected_elt = expected_rng[1] - - tm.assert_index_equal(td.round(freq='H'), expected_rng) - self.assertEqual(elt.round(freq='H'), expected_elt) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - td.round(freq='foo') - with tm.assertRaisesRegexp(ValueError, msg): - elt.round(freq='foo') - - msg = " is a non-fixed frequency" - tm.assertRaisesRegexp(ValueError, msg, td.round, freq='M') - tm.assertRaisesRegexp(ValueError, msg, elt.round, freq='M') - - def test_representation(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')""" - - exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', " - "freq='D')") - - exp3 = ("TimedeltaIndex(['1 days', '2 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')") - - exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " - "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)") - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - for func in ['__repr__', '__unicode__', '__str__']: - result = getattr(idx, func)() - self.assertEqual(result, expected) - - def test_representation_to_series(self): - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """Series([], dtype: timedelta64[ns])""" - - exp2 = """0 1 days -dtype: timedelta64[ns]""" - - exp3 = """0 1 days -1 2 days -dtype: timedelta64[ns]""" - - exp4 = """0 1 days -1 2 days -2 3 days -dtype: timedelta64[ns]""" - - exp5 = """0 1 days 00:00:01 -1 2 days 00:00:00 -2 3 days 00:00:00 -dtype: timedelta64[ns]""" - - with pd.option_context('display.width', 300): - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = repr(pd.Series(idx)) - self.assertEqual(result, expected) - - def test_summary(self): - # GH9116 - idx1 = TimedeltaIndex([], freq='D') - idx2 = TimedeltaIndex(['1 days'], freq='D') - idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D') - idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D') - idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days']) - - exp1 = """TimedeltaIndex: 0 entries -Freq: D""" - - exp2 = """TimedeltaIndex: 1 entries, 1 days to 1 days -Freq: D""" - - exp3 = """TimedeltaIndex: 2 entries, 1 days to 2 days -Freq: D""" - - exp4 = """TimedeltaIndex: 3 entries, 1 days to 3 days -Freq: D""" - - exp5 = ("TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days " - "00:00:00") - - for idx, expected in zip([idx1, idx2, idx3, idx4, idx5], - [exp1, exp2, exp3, exp4, exp5]): - result = idx.summary() - self.assertEqual(result, expected) - - def test_add_iadd(self): - - # only test adding/sub offsets as + is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng + delta - expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', - freq='D') - tm.assert_index_equal(result, expected) - rng += delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng + 1 - expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng += 1 - tm.assert_index_equal(rng, expected) - - def test_sub_isub(self): - # only test adding/sub offsets as - is now numeric - - # offset - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - for delta in offsets: - rng = timedelta_range('1 days', '10 days') - result = rng - delta - expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') - tm.assert_index_equal(result, expected) - rng -= delta - tm.assert_index_equal(rng, expected) - - # int - rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) - result = rng - 1 - expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) - tm.assert_index_equal(result, expected) - rng -= 1 - tm.assert_index_equal(rng, expected) - - idx = TimedeltaIndex(['1 day', '2 day']) - msg = "cannot subtract a datelike from a TimedeltaIndex" - with tm.assertRaisesRegexp(TypeError, msg): - idx - Timestamp('2011-01-01') - - result = Timestamp('2011-01-01') + idx - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) - tm.assert_index_equal(result, expected) - - def test_ops_compat(self): - - offsets = [pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)] - - rng = timedelta_range('1 days', '10 days', name='foo') - - # multiply - for offset in offsets: - self.assertRaises(TypeError, lambda: rng * offset) - - # divide - expected = Int64Index((np.arange(10) + 1) * 12, name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected, exact=False) - - # divide with nats - rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - expected = Float64Index([12, np.nan, 24], name='foo') - for offset in offsets: - result = rng / offset - tm.assert_index_equal(result, expected) - - # don't allow division by NaT (make could in the future) - self.assertRaises(TypeError, lambda: rng / pd.NaT) - - def test_subtraction_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - self.assertRaises(TypeError, lambda: tdi - dt) - self.assertRaises(TypeError, lambda: tdi - dti) - self.assertRaises(TypeError, lambda: td - dt) - self.assertRaises(TypeError, lambda: td - dti) - - result = dt - dti - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = dti - dt - expected = TimedeltaIndex(['0 days', '1 days', '2 days'], name='bar') - tm.assert_index_equal(result, expected) - - result = tdi - td - expected = TimedeltaIndex(['0 days', pd.NaT, '1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = td - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '-1 days'], name='foo') - tm.assert_index_equal(result, expected, check_names=False) - - result = dti - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], name='bar') - tm.assert_index_equal(result, expected, check_names=False) - - result = dt - tdi - expected = DatetimeIndex(['20121231', pd.NaT, '20121230'], name='foo') - tm.assert_index_equal(result, expected) - - def test_subtraction_ops_with_tz(self): - - # check that dt/dti subtraction ops with tz are validated - dti = date_range('20130101', periods=3) - ts = Timestamp('20130101') - dt = ts.to_pydatetime() - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - ts_tz = Timestamp('20130101').tz_localize('US/Eastern') - ts_tz2 = Timestamp('20130101').tz_localize('CET') - dt_tz = ts_tz.to_pydatetime() - td = Timedelta('1 days') - - def _check(result, expected): - self.assertEqual(result, expected) - self.assertIsInstance(result, Timedelta) - - # scalars - result = ts - ts - expected = Timedelta('0 days') - _check(result, expected) - - result = dt_tz - ts_tz - expected = Timedelta('0 days') - _check(result, expected) - - result = ts_tz - dt_tz - expected = Timedelta('0 days') - _check(result, expected) - - # tz mismatches - self.assertRaises(TypeError, lambda: dt_tz - ts) - self.assertRaises(TypeError, lambda: dt_tz - dt) - self.assertRaises(TypeError, lambda: dt_tz - ts_tz2) - self.assertRaises(TypeError, lambda: dt - dt_tz) - self.assertRaises(TypeError, lambda: ts - dt_tz) - self.assertRaises(TypeError, lambda: ts_tz2 - ts) - self.assertRaises(TypeError, lambda: ts_tz2 - dt) - self.assertRaises(TypeError, lambda: ts_tz - ts_tz2) - - # with dti - self.assertRaises(TypeError, lambda: dti - ts_tz) - self.assertRaises(TypeError, lambda: dti_tz - ts) - self.assertRaises(TypeError, lambda: dti_tz - ts_tz2) - - result = dti_tz - dt_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = dt_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = dti_tz - ts_tz - expected = TimedeltaIndex(['0 days', '1 days', '2 days']) - tm.assert_index_equal(result, expected) - - result = ts_tz - dti_tz - expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) - tm.assert_index_equal(result, expected) - - result = td - td - expected = Timedelta('0 days') - _check(result, expected) - - result = dti_tz - td - expected = DatetimeIndex( - ['20121231', '20130101', '20130102'], tz='US/Eastern') - tm.assert_index_equal(result, expected) - - def test_dti_tdi_numeric_ops(self): - - # These are normally union/diff set-like ops - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - - # TODO(wesm): unused? - # td = Timedelta('1 days') - # dt = Timestamp('20130101') - - result = tdi - tdi - expected = TimedeltaIndex(['0 days', pd.NaT, '0 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '4 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = dti - tdi # name will be reset - expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) - tm.assert_index_equal(result, expected) - - def test_sub_period(self): - # GH 13078 - # not supported, check TypeError - p = pd.Period('2011-01-01', freq='D') - - for freq in [None, 'H']: - idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) - - with tm.assertRaises(TypeError): - idx - p - - with tm.assertRaises(TypeError): - p - idx - - def test_addition_ops(self): - - # with datetimes/timedelta and tdi/dti - tdi = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') - dti = date_range('20130101', periods=3, name='bar') - td = Timedelta('1 days') - dt = Timestamp('20130101') - - result = tdi + dt - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = dt + tdi - expected = DatetimeIndex(['20130102', pd.NaT, '20130103'], name='foo') - tm.assert_index_equal(result, expected) - - result = td + tdi - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - result = tdi + td - expected = TimedeltaIndex(['2 days', pd.NaT, '3 days'], name='foo') - tm.assert_index_equal(result, expected) - - # unequal length - self.assertRaises(ValueError, lambda: tdi + dti[0:1]) - self.assertRaises(ValueError, lambda: tdi[0:1] + dti) - - # random indexes - self.assertRaises(TypeError, lambda: tdi + Int64Index([1, 2, 3])) - - # this is a union! - # self.assertRaises(TypeError, lambda : Int64Index([1,2,3]) + tdi) - - result = tdi + dti # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dti + tdi # name will be reset - expected = DatetimeIndex(['20130102', pd.NaT, '20130105']) - tm.assert_index_equal(result, expected) - - result = dt + td - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - result = td + dt - expected = Timestamp('20130102') - self.assertEqual(result, expected) - - def test_comp_nat(self): - left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')]) - right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) - - for l, r in [(left, right), (left.asobject, right.asobject)]: - result = l == r - expected = np.array([False, False, True]) - tm.assert_numpy_array_equal(result, expected) - - result = l != r - expected = np.array([True, True, False]) - tm.assert_numpy_array_equal(result, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l == pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT == r, expected) - - expected = np.array([True, True, True]) - tm.assert_numpy_array_equal(l != pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT != l, expected) - - expected = np.array([False, False, False]) - tm.assert_numpy_array_equal(l < pd.NaT, expected) - tm.assert_numpy_array_equal(pd.NaT > l, expected) - - def test_value_counts_unique(self): - # GH 7735 - - idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) - # create repeated values, 'n'th element is repeated by n+1 times - idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) - - exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) - expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) - tm.assert_index_equal(idx.unique(), expected) - - idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', - '1 days 09:00:00', '1 days 08:00:00', - '1 days 08:00:00', pd.NaT]) - - exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) - expected = Series([3, 2], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(), expected) - - exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', - pd.NaT]) - expected = Series([3, 2, 1], index=exp_idx) - - for obj in [idx, Series(idx)]: - tm.assert_series_equal(obj.value_counts(dropna=False), expected) - - tm.assert_index_equal(idx.unique(), exp_idx) - - def test_nonunique_contains(self): - # GH 9512 - for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1], - ['00:01:00', '00:01:00', '00:02:00'], - ['00:01:00', '00:01:00', '00:00:01'])): - tm.assertIn(idx[0], idx) - - def test_unknown_attribute(self): - # GH 9680 - tdi = pd.timedelta_range(start=0, periods=10, freq='1s') - ts = pd.Series(np.random.normal(size=10), index=tdi) - self.assertNotIn('foo', ts.__dict__.keys()) - self.assertRaises(AttributeError, lambda: ts.foo) - - def test_order(self): - # GH 10295 - idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', - name='idx') - idx2 = TimedeltaIndex( - ['1 hour', '2 hour', '3 hour'], freq='H', name='idx') - - for idx in [idx1, idx2]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, idx) - self.assertEqual(ordered.freq, idx.freq) - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - self.assert_index_equal(ordered, expected) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, idx) - self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2]), - check_dtype=False) - self.assertEqual(ordered.freq, idx.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, idx[::-1]) - self.assertEqual(ordered.freq, expected.freq) - self.assertEqual(ordered.freq.n, -1) - - idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', - '2 hour ', '1 hour'], name='idx1') - exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', - '3 hour', '5 hour'], name='idx1') - - idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', - '2 day', '1 day'], name='idx2') - - # TODO(wesm): unused? - # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', - # '3 day', '5 day'], name='idx2') - - # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', - # '2 minute', pd.NaT], name='idx3') - # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', - # '5 minute'], name='idx3') - - for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: - ordered = idx.sort_values() - self.assert_index_equal(ordered, expected) - self.assertIsNone(ordered.freq) - - ordered = idx.sort_values(ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True) - self.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - ordered, indexer = idx.sort_values(return_indexer=True, - ascending=False) - self.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 4, 0]) - self.assert_numpy_array_equal(indexer, exp, check_dtype=False) - self.assertIsNone(ordered.freq) - - def test_getitem(self): - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx[0] - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx[0:5] - expected = pd.timedelta_range('1 day', '5 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[0:10:2] - expected = pd.timedelta_range('1 day', '9 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[-20:-5:3] - expected = pd.timedelta_range('12 day', '24 day', freq='3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx[4::-1] - expected = TimedeltaIndex(['5 day', '4 day', '3 day', - '2 day', '1 day'], - freq='-1D', name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - def test_drop_duplicates_metadata(self): - # GH 10115 - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - result = idx.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertEqual(idx.freq, result.freq) - - idx_dup = idx.append(idx) - self.assertIsNone(idx_dup.freq) # freq is reset - result = idx_dup.drop_duplicates() - self.assert_index_equal(idx, result) - self.assertIsNone(result.freq) - - def test_drop_duplicates(self): - # to check Index/Series compat - base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep='last') - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep='last') - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) - - def test_take(self): - # GH 10295 - idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - - for idx in [idx1]: - result = idx.take([0]) - self.assertEqual(result, pd.Timedelta('1 day')) - - result = idx.take([-1]) - self.assertEqual(result, pd.Timedelta('31 day')) - - result = idx.take([0, 1, 2]) - expected = pd.timedelta_range('1 day', '3 day', freq='D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([0, 2, 4]) - expected = pd.timedelta_range('1 day', '5 day', freq='2D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([7, 4, 1]) - expected = pd.timedelta_range('8 day', '2 day', freq='-3D', - name='idx') - self.assert_index_equal(result, expected) - self.assertEqual(result.freq, expected.freq) - - result = idx.take([3, 2, 5]) - expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - result = idx.take([-3, 2, 5]) - expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') - self.assert_index_equal(result, expected) - self.assertIsNone(result.freq) - - def test_take_invalid_kwargs(self): - idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') - indices = [1, 6, 5, 9, 10, 13, 15, 3] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, idx.take, - indices, mode='clip') - - def test_infer_freq(self): - # GH 11018 - for freq in ['D', '3D', '-3D', 'H', '2H', '-2H', 'T', '2T', 'S', '-3S' - ]: - idx = pd.timedelta_range('1', freq=freq, periods=10) - result = pd.TimedeltaIndex(idx.asi8, freq='infer') - tm.assert_index_equal(idx, result) - self.assertEqual(result.freq, freq) - - def test_nat_new(self): - - idx = pd.timedelta_range('1', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - - def test_shift(self): - # GH 9903 - idx = pd.TimedeltaIndex([], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - tm.assert_index_equal(idx.shift(3, freq='H'), idx) - - idx = pd.TimedeltaIndex(['5 hours', '6 hours', '9 hours'], name='xxx') - tm.assert_index_equal(idx.shift(0, freq='H'), idx) - exp = pd.TimedeltaIndex(['8 hours', '9 hours', '12 hours'], name='xxx') - tm.assert_index_equal(idx.shift(3, freq='H'), exp) - exp = pd.TimedeltaIndex(['2 hours', '3 hours', '6 hours'], name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='H'), exp) - - tm.assert_index_equal(idx.shift(0, freq='T'), idx) - exp = pd.TimedeltaIndex(['05:03:00', '06:03:00', '9:03:00'], - name='xxx') - tm.assert_index_equal(idx.shift(3, freq='T'), exp) - exp = pd.TimedeltaIndex(['04:57:00', '05:57:00', '8:57:00'], - name='xxx') - tm.assert_index_equal(idx.shift(-3, freq='T'), exp) - - def test_repeat(self): - index = pd.timedelta_range('1 days', periods=2, freq='D') - exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - index = TimedeltaIndex(['1 days', 'NaT', '3 days']) - exp = TimedeltaIndex(['1 days', '1 days', '1 days', - 'NaT', 'NaT', 'NaT', - '3 days', '3 days', '3 days']) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - self.assertIsNone(res.freq) - - def test_nat(self): - self.assertIs(pd.TimedeltaIndex._na_value, pd.NaT) - self.assertIs(pd.TimedeltaIndex([])._na_value, pd.NaT) - - idx = pd.TimedeltaIndex(['1 days', '2 days']) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - self.assertFalse(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.intp)) - - idx = pd.TimedeltaIndex(['1 days', 'NaT']) - self.assertTrue(idx._can_hold_na) - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - self.assertTrue(idx.hasnans) - tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.intp)) - - def test_equals(self): - # GH 13107 - idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT']) - self.assertTrue(idx.equals(idx)) - self.assertTrue(idx.equals(idx.copy())) - self.assertTrue(idx.equals(idx.asobject)) - self.assertTrue(idx.asobject.equals(idx)) - self.assertTrue(idx.asobject.equals(idx.asobject)) - self.assertFalse(idx.equals(list(idx))) - self.assertFalse(idx.equals(pd.Series(idx))) - - idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT']) - self.assertFalse(idx.equals(idx2)) - self.assertFalse(idx.equals(idx2.copy())) - self.assertFalse(idx.equals(idx2.asobject)) - self.assertFalse(idx.asobject.equals(idx2)) - self.assertFalse(idx.asobject.equals(idx2.asobject)) - self.assertFalse(idx.equals(list(idx2))) - self.assertFalse(idx.equals(pd.Series(idx2))) - - class TestPeriodIndexOps(Ops): def setUp(self): super(TestPeriodIndexOps, self).setUp() diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py deleted file mode 100644 index 209e6e40d5cf0..0000000000000 --- a/pandas/tseries/tests/test_daterange.py +++ /dev/null @@ -1,818 +0,0 @@ -from datetime import datetime -from pandas.compat import range -import numpy as np - -from pandas.core.index import Index -from pandas.tseries.index import DatetimeIndex - -from pandas import Timestamp -from pandas.tseries.offsets import (BDay, BMonthEnd, CDay, MonthEnd, - generate_range, DateOffset, Minute) -from pandas.tseries.index import cdate_range, bdate_range, date_range - -from pandas.core import common as com -from pandas.util.testing import assertRaisesRegexp -import pandas.util.testing as tm - - -def eq_gen_range(kwargs, expected): - rng = generate_range(**kwargs) - assert (np.array_equal(list(rng), expected)) - - -START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) - - -class TestGenRangeGeneration(tm.TestCase): - - def test_generate(self): - rng1 = list(generate_range(START, END, offset=BDay())) - rng2 = list(generate_range(START, END, time_rule='B')) - self.assertEqual(rng1, rng2) - - def test_generate_cday(self): - rng1 = list(generate_range(START, END, offset=CDay())) - rng2 = list(generate_range(START, END, time_rule='C')) - self.assertEqual(rng1, rng2) - - def test_1(self): - eq_gen_range(dict(start=datetime(2009, 3, 25), periods=2), - [datetime(2009, 3, 25), datetime(2009, 3, 26)]) - - def test_2(self): - eq_gen_range(dict(start=datetime(2008, 1, 1), - end=datetime(2008, 1, 3)), - [datetime(2008, 1, 1), - datetime(2008, 1, 2), - datetime(2008, 1, 3)]) - - def test_3(self): - eq_gen_range(dict(start=datetime(2008, 1, 5), - end=datetime(2008, 1, 6)), - []) - - def test_precision_finer_than_offset(self): - # GH 9907 - result1 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2016-04-22 00:00:00', freq='Q') - result2 = DatetimeIndex(start='2015-04-15 00:00:03', - end='2015-06-22 00:00:04', freq='W') - expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03', - '2015-12-31 00:00:03', '2016-03-31 00:00:03'] - expected2_list = ['2015-04-19 00:00:03', '2015-04-26 00:00:03', - '2015-05-03 00:00:03', '2015-05-10 00:00:03', - '2015-05-17 00:00:03', '2015-05-24 00:00:03', - '2015-05-31 00:00:03', '2015-06-07 00:00:03', - '2015-06-14 00:00:03', '2015-06-21 00:00:03'] - expected1 = DatetimeIndex(expected1_list, dtype='datetime64[ns]', - freq='Q-DEC', tz=None) - expected2 = DatetimeIndex(expected2_list, dtype='datetime64[ns]', - freq='W-SUN', tz=None) - self.assert_index_equal(result1, expected1) - self.assert_index_equal(result2, expected2) - - -class TestDateRange(tm.TestCase): - def setUp(self): - self.rng = bdate_range(START, END) - - def test_constructor(self): - bdate_range(START, END, freq=BDay()) - bdate_range(START, periods=20, freq=BDay()) - bdate_range(end=START, periods=20, freq=BDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - self.assertRaises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') - - def test_naive_aware_conflicts(self): - naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), - tz="Asia/Hong_Kong") - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", naive.join, aware) - assertRaisesRegexp(TypeError, "tz-naive.*tz-aware", aware.join, naive) - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, offset=BDay()) - DatetimeIndex._cached_range(START, periods=20, offset=BDay()) - DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - - assertRaisesRegexp(TypeError, "offset", DatetimeIndex._cached_range, - START, END) - - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) - - assertRaisesRegexp(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) - - assertRaisesRegexp(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) - - def test_cached_range_bug(self): - rng = date_range('2010-09-01 05:00:00', periods=50, - freq=DateOffset(hours=6)) - self.assertEqual(len(rng), 50) - self.assertEqual(rng[0], datetime(2010, 9, 1, 5)) - - def test_timezone_comparaison_bug(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - try: - date_range(start, periods=2, tz='US/Eastern') - except AssertionError: - self.fail() - - def test_timezone_comparaison_assert(self): - start = Timestamp('20130220 10:00', tz='US/Eastern') - self.assertRaises(AssertionError, date_range, start, periods=2, - tz='Europe/Berlin') - - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, BDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=BDay()) - self.assertEqual(shifted[0], rng[0] + BDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - tm.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_union_not_cacheable(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_union = rng1.union(rng2) - self.assert_index_equal(the_union, rng) - - rng1 = rng[10:] - rng2 = rng[15:35] - the_union = rng1.union(rng2) - expected = rng[10:] - self.assert_index_equal(the_union, expected) - - def test_intersection(self): - rng = date_range('1/1/2000', periods=50, freq=Minute()) - rng1 = rng[10:] - rng2 = rng[:25] - the_int = rng1.intersection(rng2) - expected = rng[10:25] - self.assert_index_equal(the_int, expected) - tm.assertIsInstance(the_int, DatetimeIndex) - self.assertEqual(the_int.offset, rng.offset) - - the_int = rng1.intersection(rng2.view(DatetimeIndex)) - self.assert_index_equal(the_int, expected) - - # non-overlapping - the_int = rng[:10].intersection(rng[10:]) - expected = DatetimeIndex([]) - self.assert_index_equal(the_int, expected) - - def test_intersection_bug(self): - # GH #771 - a = bdate_range('11/30/2011', '12/31/2011') - b = bdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - bdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = bdate_range(end=end, periods=20) - firstDate = end - 19 * BDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, bdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, bdate_range, badly_formed_date, - badly_formed_date) - - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_identical(self): - t1 = self.rng.copy() - t2 = self.rng.copy() - self.assertTrue(t1.identical(t2)) - - # name - t1 = t1.rename('foo') - self.assertTrue(t1.equals(t2)) - self.assertFalse(t1.identical(t2)) - t2 = t2.rename('foo') - self.assertTrue(t1.identical(t2)) - - # freq - t2v = Index(t2.values) - self.assertTrue(t1.equals(t2v)) - self.assertFalse(t1.identical(t2v)) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = bdate_range('12/5/2011', '12/5/2011') - rng2 = bdate_range('12/2/2011', '12/5/2011') - rng2.offset = BDay() - - result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) - - def test_error_with_zero_monthends(self): - self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', - freq=MonthEnd(0)) - - def test_range_bug(self): - # GH #770 - offset = DateOffset(months=3) - result = date_range("2011-1-1", "2012-1-31", freq=offset) - - start = datetime(2011, 1, 1) - exp_values = [start + i * offset for i in range(5)] - tm.assert_index_equal(result, DatetimeIndex(exp_values)) - - def test_range_tz_pytz(self): - # GH 2906 - tm._skip_if_no_pytz() - from pytz import timezone - - tz = timezone('US/Eastern') - start = tz.localize(datetime(2011, 1, 1)) - end = tz.localize(datetime(2011, 1, 3)) - - dr = date_range(start=start, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - dr = date_range(end=end, periods=3) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - dr = date_range(start=start, end=end) - self.assertEqual(dr.tz.zone, tz.zone) - self.assertEqual(dr[0], start) - self.assertEqual(dr[2], end) - - def test_range_tz_dst_straddle_pytz(self): - - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), - tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), - tz.localize(datetime(2013, 11, 6)))] - for (start, end) in dates: - dr = date_range(start, end, freq='D') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - dr = date_range(start, end, freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - dr = date_range(start.replace(tzinfo=None), end.replace( - tzinfo=None), freq='D', tz='US/Eastern') - self.assertEqual(dr[0], start) - self.assertEqual(dr[-1], end) - self.assertEqual(np.all(dr.hour == 0), True) - - def test_range_tz_dateutil(self): - # GH 2906 - tm._skip_if_no_dateutil() - # Use maybe_get_tz to fix filename in tz under dateutil. - from pandas.tslib import maybe_get_tz - tz = lambda x: maybe_get_tz('dateutil/' + x) - - start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) - end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) - - dr = date_range(start=start, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - dr = date_range(end=end, periods=3) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - dr = date_range(start=start, end=end) - self.assertTrue(dr.tz == tz('US/Eastern')) - self.assertTrue(dr[0] == start) - self.assertTrue(dr[2] == end) - - def test_month_range_union_tz_pytz(self): - tm._skip_if_no_pytz() - from pytz import timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_month_range_union_tz_dateutil(self): - tm._skip_if_windows_python_3() - tm._skip_if_no_dateutil() - from pandas.tslib import _dateutil_gettz as timezone - tz = timezone('US/Eastern') - - early_start = datetime(2011, 1, 1) - early_end = datetime(2011, 3, 1) - - late_start = datetime(2011, 3, 1) - late_end = datetime(2011, 5, 1) - - early_dr = date_range(start=early_start, end=early_end, tz=tz, - freq=MonthEnd()) - late_dr = date_range(start=late_start, end=late_end, tz=tz, - freq=MonthEnd()) - - early_dr.union(late_dr) - - def test_range_closed(self): - begin = datetime(2011, 1, 1) - end = datetime(2014, 1, 1) - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_with_tz_aware_start_end(self): - # GH12409, GH12684 - begin = Timestamp('2011/1/1', tz='US/Eastern') - end = Timestamp('2014/1/1', tz='US/Eastern') - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq) - left = date_range(begin, end, closed="left", freq=freq) - right = date_range(begin, end, closed="right", freq=freq) - expected_left = left - expected_right = right - - if end == closed[-1]: - expected_left = closed[:-1] - if begin == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - begin = Timestamp('2011/1/1') - end = Timestamp('2014/1/1') - begintz = Timestamp('2011/1/1', tz='US/Eastern') - endtz = Timestamp('2014/1/1', tz='US/Eastern') - - for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: - closed = date_range(begin, end, closed=None, freq=freq, - tz='US/Eastern') - left = date_range(begin, end, closed="left", freq=freq, - tz='US/Eastern') - right = date_range(begin, end, closed="right", freq=freq, - tz='US/Eastern') - expected_left = left - expected_right = right - - if endtz == closed[-1]: - expected_left = closed[:-1] - if begintz == closed[0]: - expected_right = closed[1:] - - self.assert_index_equal(expected_left, left) - self.assert_index_equal(expected_right, right) - - def test_range_closed_boundary(self): - # GH 11804 - for closed in ['right', 'left', None]: - right_boundary = date_range('2015-09-12', '2015-12-01', - freq='QS-MAR', closed=closed) - left_boundary = date_range('2015-09-01', '2015-09-12', - freq='QS-MAR', closed=closed) - both_boundary = date_range('2015-09-01', '2015-12-01', - freq='QS-MAR', closed=closed) - expected_right = expected_left = expected_both = both_boundary - - if closed == 'right': - expected_left = both_boundary[1:] - if closed == 'left': - expected_right = both_boundary[:-1] - if closed is None: - expected_right = both_boundary[1:] - expected_left = both_boundary[:-1] - - self.assert_index_equal(right_boundary, expected_right) - self.assert_index_equal(left_boundary, expected_left) - self.assert_index_equal(both_boundary, expected_both) - - def test_years_only(self): - # GH 6961 - dr = date_range('2014', '2015', freq='M') - self.assertEqual(dr[0], datetime(2014, 1, 31)) - self.assertEqual(dr[-1], datetime(2014, 12, 31)) - - def test_freq_divides_end_in_nanos(self): - # GH 10885 - result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', - freq='345min') - result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', - freq='345min') - expected_1 = DatetimeIndex(['2005-01-12 10:00:00', - '2005-01-12 15:45:00'], - dtype='datetime64[ns]', freq='345T', - tz=None) - expected_2 = DatetimeIndex(['2005-01-13 10:00:00', - '2005-01-13 15:45:00'], - dtype='datetime64[ns]', freq='345T', - tz=None) - self.assert_index_equal(result_1, expected_1) - self.assert_index_equal(result_2, expected_2) - - -class TestCustomDateRange(tm.TestCase): - def setUp(self): - self.rng = cdate_range(START, END) - - def test_constructor(self): - cdate_range(START, END, freq=CDay()) - cdate_range(START, periods=20, freq=CDay()) - cdate_range(end=START, periods=20, freq=CDay()) - self.assertRaises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - self.assertRaises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') - - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, offset=CDay()) - DatetimeIndex._cached_range(START, periods=20, - offset=CDay()) - DatetimeIndex._cached_range(end=START, periods=20, - offset=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, START, END) - - self.assertRaises(Exception, DatetimeIndex._cached_range, START, - freq=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, end=END, - freq=CDay()) - - self.assertRaises(Exception, DatetimeIndex._cached_range, periods=20, - freq=CDay()) - - def test_comparison(self): - d = self.rng[10] - - comp = self.rng > d - self.assertTrue(comp[11]) - self.assertFalse(comp[9]) - - def test_copy(self): - cp = self.rng.copy() - repr(cp) - self.assert_index_equal(cp, self.rng) - - def test_repr(self): - # only really care that it works - repr(self.rng) - - def test_getitem(self): - smaller = self.rng[:5] - exp = DatetimeIndex(self.rng.view(np.ndarray)[:5]) - self.assert_index_equal(smaller, exp) - self.assertEqual(smaller.offset, self.rng.offset) - - sliced = self.rng[::5] - self.assertEqual(sliced.offset, CDay() * 5) - - fancy_indexed = self.rng[[4, 3, 2, 1, 0]] - self.assertEqual(len(fancy_indexed), 5) - tm.assertIsInstance(fancy_indexed, DatetimeIndex) - self.assertIsNone(fancy_indexed.freq) - - # 32-bit vs. 64-bit platforms - self.assertEqual(self.rng[4], self.rng[np.int_(4)]) - - def test_getitem_matplotlib_hackaround(self): - values = self.rng[:, None] - expected = self.rng.values[:, None] - self.assert_numpy_array_equal(values, expected) - - def test_shift(self): - - shifted = self.rng.shift(5) - self.assertEqual(shifted[0], self.rng[5]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(-5) - self.assertEqual(shifted[5], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - shifted = self.rng.shift(0) - self.assertEqual(shifted[0], self.rng[0]) - self.assertEqual(shifted.offset, self.rng.offset) - - with tm.assert_produces_warning(com.PerformanceWarning): - rng = date_range(START, END, freq=BMonthEnd()) - shifted = rng.shift(1, freq=CDay()) - self.assertEqual(shifted[0], rng[0] + CDay()) - - def test_pickle_unpickle(self): - unpickled = self.round_trip_pickle(self.rng) - self.assertIsNotNone(unpickled.offset) - - def test_union(self): - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_union = left.union(right) - tm.assertIsInstance(the_union, Index) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_union = left.union(right) - tm.assertIsInstance(the_union, DatetimeIndex) - - # order does not matter - self.assert_index_equal(right.union(left), the_union) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_union = self.rng.union(rng) - tm.assertIsInstance(the_union, DatetimeIndex) - - def test_outer_join(self): - # should just behave as union - - # overlapping - left = self.rng[:10] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # non-overlapping, gap in middle - left = self.rng[:5] - right = self.rng[10:] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - # non-overlapping, no gap - left = self.rng[:5] - right = self.rng[5:10] - - the_join = left.join(right, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - - # overlapping, but different offset - rng = date_range(START, END, freq=BMonthEnd()) - - the_join = self.rng.join(rng, how='outer') - tm.assertIsInstance(the_join, DatetimeIndex) - self.assertIsNone(the_join.freq) - - def test_intersection_bug(self): - # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') - result = a.intersection(b) - self.assert_index_equal(result, b) - - def test_summary(self): - self.rng.summary() - self.rng[2:2].summary() - - def test_summary_pytz(self): - tm._skip_if_no_pytz() - import pytz - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() - - def test_summary_dateutil(self): - tm._skip_if_no_dateutil() - import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() - - def test_misc(self): - end = datetime(2009, 5, 13) - dr = cdate_range(end=end, periods=20) - firstDate = end - 19 * CDay() - - assert len(dr) == 20 - assert dr[0] == firstDate - assert dr[-1] == end - - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - self.assertRaises(ValueError, Timestamp, badly_formed_date) - - self.assertRaises(ValueError, cdate_range, start=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, end=badly_formed_date, - periods=10) - self.assertRaises(ValueError, cdate_range, badly_formed_date, - badly_formed_date) - - def test_equals(self): - self.assertFalse(self.rng.equals(list(self.rng))) - - def test_daterange_bug_456(self): - # GH #456 - rng1 = cdate_range('12/5/2011', '12/5/2011') - rng2 = cdate_range('12/2/2011', '12/5/2011') - rng2.offset = CDay() - - result = rng1.union(rng2) - tm.assertIsInstance(result, DatetimeIndex) - - def test_cdaterange(self): - rng = cdate_range('2013-05-01', periods=3) - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_weekmask(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu') - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_holidays(self): - rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) - self.assert_index_equal(xp, rng) - - def test_cdaterange_weekmask_and_holidays(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu', - holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) - self.assert_index_equal(xp, rng) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index fdc067a827a5b..b48628342ac88 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -6,27 +6,25 @@ """ +import numpy as np +from numpy.random import randn from datetime import datetime, date, timedelta -from pandas import Timestamp, _period -from pandas.tseries.frequencies import MONTHS, DAYS, _period_code_map -from pandas.tseries.period import Period, PeriodIndex, period_range -from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas.tseries.tools import to_datetime +import pandas as pd +import pandas.util.testing as tm import pandas.tseries.period as period import pandas.tseries.offsets as offsets - -import pandas as pd -import numpy as np -from numpy.random import randn +from pandas.tseries.tools import to_datetime +from pandas.tseries.period import Period, PeriodIndex, period_range +from pandas.tseries.index import DatetimeIndex, date_range, Index +from pandas._period import period_ordinal, period_asfreq from pandas.compat import range, lrange, lmap, zip, text_type, PY3, iteritems from pandas.compat.numpy import np_datetime64_compat - -from pandas import (Series, DataFrame, +from pandas.tseries.frequencies import (MONTHS, DAYS, _period_code_map, + get_freq) +from pandas import (Series, DataFrame, Timestamp, _period, tslib, _np_version_under1p9, _np_version_under1p10, _np_version_under1p12) -from pandas import tslib -import pandas.util.testing as tm class TestPeriodProperties(tm.TestCase): @@ -4967,3 +4965,98 @@ def test_get_period_field_raises_on_out_of_range(self): def test_get_period_field_array_raises_on_out_of_range(self): self.assertRaises(ValueError, _period.get_period_field_arr, -1, np.empty(1), 0) + + +class TestTslib(tm.TestCase): + def test_intraday_conversion_factors(self): + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('H'), False), 24) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('T'), False), 1440) + self.assertEqual(period_asfreq( + 1, get_freq('D'), get_freq('S'), False), 86400) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('L'), False), 86400000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('U'), False), 86400000000) + self.assertEqual(period_asfreq(1, get_freq( + 'D'), get_freq('N'), False), 86400000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('T'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('H'), get_freq('S'), False), 3600) + self.assertEqual(period_asfreq(1, get_freq('H'), + get_freq('L'), False), 3600000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('U'), False), 3600000000) + self.assertEqual(period_asfreq(1, get_freq( + 'H'), get_freq('N'), False), 3600000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('S'), False), 60) + self.assertEqual(period_asfreq( + 1, get_freq('T'), get_freq('L'), False), 60000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('U'), False), 60000000) + self.assertEqual(period_asfreq(1, get_freq( + 'T'), get_freq('N'), False), 60000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('S'), get_freq('L'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('S'), + get_freq('U'), False), 1000000) + self.assertEqual(period_asfreq(1, get_freq( + 'S'), get_freq('N'), False), 1000000000) + + self.assertEqual(period_asfreq( + 1, get_freq('L'), get_freq('U'), False), 1000) + self.assertEqual(period_asfreq(1, get_freq('L'), + get_freq('N'), False), 1000000) + + self.assertEqual(period_asfreq( + 1, get_freq('U'), get_freq('N'), False), 1000) + + def test_period_ordinal_start_values(self): + # information for 1.1.1970 + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('A'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('M'))) + self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('D'))) + self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, + get_freq('B'))) + + def test_period_ordinal_week(self): + self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, + get_freq('W'))) + + self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('W'))) + self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('W'))) + + def test_period_ordinal_business_day(self): + # Thursday + self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, + get_freq('B'))) + # Friday + self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, + get_freq('B'))) + # Saturday + self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, + get_freq('B'))) + # Sunday + self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, + get_freq('B'))) + # Monday + self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, + get_freq('B'))) + # Tuesday + self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, + get_freq('B'))) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py deleted file mode 100644 index 13263259e0b8a..0000000000000 --- a/pandas/tseries/tests/test_timedeltas.py +++ /dev/null @@ -1,2052 +0,0 @@ -# pylint: disable-msg=E1101,W0612 - -from __future__ import division -from datetime import timedelta, time - -from distutils.version import LooseVersion -import numpy as np -import pandas as pd - -from pandas import (Index, Series, DataFrame, Timestamp, Timedelta, - TimedeltaIndex, isnull, date_range, - timedelta_range, Int64Index) -from pandas.compat import range -from pandas import compat, to_timedelta, tslib -from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type as ct -from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assert_almost_equal, assert_index_equal) -from pandas.tseries.offsets import Day, Second -import pandas.util.testing as tm -from numpy.random import randn -from pandas import _np_version_under1p8 - -iNaT = tslib.iNaT - - -class TestTimedeltas(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - pass - - def test_get_loc_nat(self): - tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) - - self.assertEqual(tidx.get_loc(pd.NaT), 1) - self.assertEqual(tidx.get_loc(None), 1) - self.assertEqual(tidx.get_loc(float('nan')), 1) - self.assertEqual(tidx.get_loc(np.nan), 1) - - def test_contains(self): - # Checking for any NaT-like objects - # GH 13603 - td = to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertFalse((v in td)) - - td = to_timedelta([pd.NaT]) - for v in [pd.NaT, None, float('nan'), np.nan]: - self.assertTrue((v in td)) - - def test_construction(self): - - expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10, unit='d').value, expected) - self.assertEqual(Timedelta(10.0, unit='d').value, expected) - self.assertEqual(Timedelta('10 days').value, expected) - self.assertEqual(Timedelta(days=10).value, expected) - self.assertEqual(Timedelta(days=10.0).value, expected) - - expected += np.timedelta64(10, 's').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta('10 days 00:00:10').value, expected) - self.assertEqual(Timedelta(days=10, seconds=10).value, expected) - self.assertEqual( - Timedelta(days=10, milliseconds=10 * 1000).value, expected) - self.assertEqual( - Timedelta(days=10, microseconds=10 * 1000 * 1000).value, expected) - - # test construction with np dtypes - # GH 8757 - timedelta_kwargs = {'days': 'D', - 'seconds': 's', - 'microseconds': 'us', - 'milliseconds': 'ms', - 'minutes': 'm', - 'hours': 'h', - 'weeks': 'W'} - npdtypes = [np.int64, np.int32, np.int16, np.float64, np.float32, - np.float16] - for npdtype in npdtypes: - for pykwarg, npkwarg in timedelta_kwargs.items(): - expected = np.timedelta64(1, - npkwarg).astype('m8[ns]').view('i8') - self.assertEqual( - Timedelta(**{pykwarg: npdtype(1)}).value, expected) - - # rounding cases - self.assertEqual(Timedelta(82739999850000).value, 82739999850000) - self.assertTrue('0 days 22:58:59.999850' in str(Timedelta( - 82739999850000))) - self.assertEqual(Timedelta(123072001000000).value, 123072001000000) - self.assertTrue('1 days 10:11:12.001' in str(Timedelta( - 123072001000000))) - - # string conversion with/without leading zero - # GH 9570 - self.assertEqual(Timedelta('0:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('00:00:00'), timedelta(hours=0)) - self.assertEqual(Timedelta('-1:00:00'), -timedelta(hours=1)) - self.assertEqual(Timedelta('-01:00:00'), -timedelta(hours=1)) - - # more strings & abbrevs - # GH 8190 - self.assertEqual(Timedelta('1 h'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hour'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hr'), timedelta(hours=1)) - self.assertEqual(Timedelta('1 hours'), timedelta(hours=1)) - self.assertEqual(Timedelta('-1 hours'), -timedelta(hours=1)) - self.assertEqual(Timedelta('1 m'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1.5 m'), timedelta(seconds=90)) - self.assertEqual(Timedelta('1 minute'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 minutes'), timedelta(minutes=1)) - self.assertEqual(Timedelta('1 s'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 second'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 seconds'), timedelta(seconds=1)) - self.assertEqual(Timedelta('1 ms'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 milli'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 millisecond'), timedelta(milliseconds=1)) - self.assertEqual(Timedelta('1 us'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 micros'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1 microsecond'), timedelta(microseconds=1)) - self.assertEqual(Timedelta('1.5 microsecond'), - Timedelta('00:00:00.000001500')) - self.assertEqual(Timedelta('1 ns'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nano'), Timedelta('00:00:00.000000001')) - self.assertEqual(Timedelta('1 nanosecond'), - Timedelta('00:00:00.000000001')) - - # combos - self.assertEqual(Timedelta('10 days 1 hour'), - timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h'), timedelta(days=10, hours=1)) - self.assertEqual(Timedelta('10 days 1 h 1m 1s'), timedelta( - days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s'), - - timedelta(days=10, hours=1, minutes=1, seconds=1)) - self.assertEqual(Timedelta('-10 days 1 h 1m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=1, microseconds=3)) - self.assertEqual(Timedelta('-10 days 1 h 1.5m 1s 3us'), - - timedelta(days=10, hours=1, minutes=1, - seconds=31, microseconds=3)) - - # currently invalid as it has a - on the hhmmdd part (only allowed on - # the days) - self.assertRaises(ValueError, - lambda: Timedelta('-10 days -1 h 1.5m 1s 3us')) - - # only leading neg signs are allowed - self.assertRaises(ValueError, - lambda: Timedelta('10 days -1 h 1.5m 1s 3us')) - - # no units specified - self.assertRaises(ValueError, lambda: Timedelta('3.1415')) - - # invalid construction - tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assertRaisesRegexp(ValueError, "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assertRaisesRegexp(ValueError, - "cannot construct a Timedelta from the passed " - "arguments, allowed keywords are ", - lambda: Timedelta(day=10)) - - # roundtripping both for string and value - for v in ['1s', '-1s', '1us', '-1us', '1 day', '-1 day', - '-23:59:59.999999', '-1 days +23:59:59.999999', '-1ns', - '1ns', '-23:59:59.999999999']: - - td = Timedelta(v) - self.assertEqual(Timedelta(td.value), td) - - # str does not normally display nanos - if not td.nanoseconds: - self.assertEqual(Timedelta(str(td)), td) - self.assertEqual(Timedelta(td._repr_base(format='all')), td) - - # floats - expected = np.timedelta64( - 10, 's').astype('m8[ns]').view('i8') + np.timedelta64( - 500, 'ms').astype('m8[ns]').view('i8') - self.assertEqual(Timedelta(10.5, unit='s').value, expected) - - # nat - self.assertEqual(Timedelta('').value, iNaT) - self.assertEqual(Timedelta('nat').value, iNaT) - self.assertEqual(Timedelta('NAT').value, iNaT) - self.assertEqual(Timedelta(None).value, iNaT) - self.assertEqual(Timedelta(np.nan).value, iNaT) - self.assertTrue(isnull(Timedelta('nat'))) - - # offset - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Hour(2)), - Timedelta('0 days, 02:00:00')) - self.assertEqual(Timedelta(pd.offsets.Second(2)), - Timedelta('0 days, 00:00:02')) - - # unicode - # GH 11995 - expected = Timedelta('1H') - result = pd.Timedelta(u'1H') - self.assertEqual(result, expected) - self.assertEqual(to_timedelta(pd.offsets.Hour(2)), - Timedelta(u'0 days, 02:00:00')) - - self.assertRaises(ValueError, lambda: Timedelta(u'foo bar')) - - def test_round(self): - - t1 = Timedelta('1 days 02:34:56.789123456') - t2 = Timedelta('-1 days 02:34:56.789123456') - - for (freq, s1, s2) in [('N', t1, t2), - ('U', Timedelta('1 days 02:34:56.789123000'), - Timedelta('-1 days 02:34:56.789123000')), - ('L', Timedelta('1 days 02:34:56.789000000'), - Timedelta('-1 days 02:34:56.789000000')), - ('S', Timedelta('1 days 02:34:57'), - Timedelta('-1 days 02:34:57')), - ('2S', Timedelta('1 days 02:34:56'), - Timedelta('-1 days 02:34:56')), - ('5S', Timedelta('1 days 02:34:55'), - Timedelta('-1 days 02:34:55')), - ('T', Timedelta('1 days 02:35:00'), - Timedelta('-1 days 02:35:00')), - ('12T', Timedelta('1 days 02:36:00'), - Timedelta('-1 days 02:36:00')), - ('H', Timedelta('1 days 03:00:00'), - Timedelta('-1 days 03:00:00')), - ('d', Timedelta('1 days'), - Timedelta('-1 days'))]: - r1 = t1.round(freq) - self.assertEqual(r1, s1) - r2 = t2.round(freq) - self.assertEqual(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') - t2 = -1 * t1 - t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') - t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') - - # note that negative times round DOWN! so don't give whole numbers - for (freq, s1, s2) in [('N', t1, t2), - ('U', t1, t2), - ('L', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('S', t1a, - TimedeltaIndex(['-1 days +00:00:00', - '-2 days +23:58:58', - '-2 days +23:57:56'], - dtype='timedelta64[ns]', - freq=None) - ), - ('12T', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('H', t1c, - TimedeltaIndex(['-1 days', - '-1 days', - '-1 days'], - dtype='timedelta64[ns]', - freq=None) - ), - ('d', t1c, - pd.TimedeltaIndex([-1, -1, -1], unit='D') - )]: - - r1 = t1.round(freq) - tm.assert_index_equal(r1, s1) - r2 = t2.round(freq) - tm.assert_index_equal(r2, s2) - - # invalid - for freq in ['Y', 'M', 'foobar']: - self.assertRaises(ValueError, lambda: t1.round(freq)) - - def test_repr(self): - - self.assertEqual(repr(Timedelta(10, unit='d')), - "Timedelta('10 days 00:00:00')") - self.assertEqual(repr(Timedelta(10, unit='s')), - "Timedelta('0 days 00:00:10')") - self.assertEqual(repr(Timedelta(10, unit='ms')), - "Timedelta('0 days 00:00:00.010000')") - self.assertEqual(repr(Timedelta(-10, unit='ms')), - "Timedelta('-1 days +23:59:59.990000')") - - def test_identity(self): - - td = Timedelta(10, unit='d') - self.assertTrue(isinstance(td, Timedelta)) - self.assertTrue(isinstance(td, timedelta)) - - def test_conversion(self): - - for td in [Timedelta(10, unit='d'), - Timedelta('1 days, 10:11:12.012345')]: - pydt = td.to_pytimedelta() - self.assertTrue(td == Timedelta(pydt)) - self.assertEqual(td, pydt) - self.assertTrue(isinstance(pydt, timedelta) and not isinstance( - pydt, Timedelta)) - - self.assertEqual(td, np.timedelta64(td.value, 'ns')) - td64 = td.to_timedelta64() - self.assertEqual(td64, np.timedelta64(td.value, 'ns')) - self.assertEqual(td, td64) - self.assertTrue(isinstance(td64, np.timedelta64)) - - # this is NOT equal and cannot be roundtriped (because of the nanos) - td = Timedelta('1 days, 10:11:12.012345678') - self.assertTrue(td != td.to_pytimedelta()) - - def test_ops(self): - - td = Timedelta(10, unit='d') - self.assertEqual(-td, Timedelta(-10, unit='d')) - self.assertEqual(+td, Timedelta(10, unit='d')) - self.assertEqual(td - td, Timedelta(0, unit='ns')) - self.assertTrue((td - pd.NaT) is pd.NaT) - self.assertEqual(td + td, Timedelta(20, unit='d')) - self.assertTrue((td + pd.NaT) is pd.NaT) - self.assertEqual(td * 2, Timedelta(20, unit='d')) - self.assertTrue((td * pd.NaT) is pd.NaT) - self.assertEqual(td / 2, Timedelta(5, unit='d')) - self.assertEqual(abs(td), td) - self.assertEqual(abs(-td), td) - self.assertEqual(td / td, 1) - self.assertTrue((td / pd.NaT) is np.nan) - - # invert - self.assertEqual(-td, Timedelta('-10d')) - self.assertEqual(td * -1, Timedelta('-10d')) - self.assertEqual(-1 * td, Timedelta('-10d')) - self.assertEqual(abs(-td), Timedelta('10d')) - - # invalid - self.assertRaises(TypeError, lambda: Timedelta(11, unit='d') // 2) - - # invalid multiply with another timedelta - self.assertRaises(TypeError, lambda: td * td) - - # can't operate with integers - self.assertRaises(TypeError, lambda: td + 2) - self.assertRaises(TypeError, lambda: td - 2) - - def test_ops_offsets(self): - td = Timedelta(10, unit='d') - self.assertEqual(Timedelta(241, unit='h'), td + pd.offsets.Hour(1)) - self.assertEqual(Timedelta(241, unit='h'), pd.offsets.Hour(1) + td) - self.assertEqual(240, td / pd.offsets.Hour(1)) - self.assertEqual(1 / 240.0, pd.offsets.Hour(1) / td) - self.assertEqual(Timedelta(239, unit='h'), td - pd.offsets.Hour(1)) - self.assertEqual(Timedelta(-239, unit='h'), pd.offsets.Hour(1) - td) - - def test_freq_conversion(self): - - td = Timedelta('1 days 2 hours 3 ns') - result = td / np.timedelta64(1, 'D') - self.assertEqual(result, td.value / float(86400 * 1e9)) - result = td / np.timedelta64(1, 's') - self.assertEqual(result, td.value / float(1e9)) - result = td / np.timedelta64(1, 'ns') - self.assertEqual(result, td.value) - - def test_ops_ndarray(self): - td = Timedelta('1 day') - - # timedelta, timedelta - other = pd.to_timedelta(['1 day']).values - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - self.assertRaises(TypeError, lambda: td + np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) + td) - - expected = pd.to_timedelta(['0 days']).values - self.assert_numpy_array_equal(td - other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(-other + td, expected) - self.assertRaises(TypeError, lambda: td - np.array([1])) - self.assertRaises(TypeError, lambda: np.array([1]) - td) - - expected = pd.to_timedelta(['2 days']).values - self.assert_numpy_array_equal(td * np.array([2]), expected) - self.assert_numpy_array_equal(np.array([2]) * td, expected) - self.assertRaises(TypeError, lambda: td * other) - self.assertRaises(TypeError, lambda: other * td) - - self.assert_numpy_array_equal(td / other, - np.array([1], dtype=np.float64)) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other / td, - np.array([1], dtype=np.float64)) - - # timedelta, datetime - other = pd.to_datetime(['2000-01-01']).values - expected = pd.to_datetime(['2000-01-02']).values - self.assert_numpy_array_equal(td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other + td, expected) - - expected = pd.to_datetime(['1999-12-31']).values - self.assert_numpy_array_equal(-td + other, expected) - if LooseVersion(np.__version__) >= '1.8': - self.assert_numpy_array_equal(other - td, expected) - - def test_ops_series(self): - # regression test for GH8813 - td = Timedelta('1 day') - other = pd.Series([1, 2]) - expected = pd.Series(pd.to_timedelta(['1 day', '2 days'])) - tm.assert_series_equal(expected, td * other) - tm.assert_series_equal(expected, other * td) - - def test_ops_series_object(self): - # GH 13043 - s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), - pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), - pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - # object series & object series - s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), - pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], - name='xxx') - self.assertEqual(s2.dtype, object) - exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], - name='xxx') - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], - name='xxx', dtype=object) - self.assertEqual(s.dtype, object) - - exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) - tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) - - def test_compare_timedelta_series(self): - # regresssion test for GH5963 - s = pd.Series([timedelta(days=1), timedelta(days=2)]) - actual = s > timedelta(days=1) - expected = pd.Series([False, True]) - tm.assert_series_equal(actual, expected) - - def test_compare_timedelta_ndarray(self): - # GH11835 - periods = [Timedelta('0 days 01:00:00'), Timedelta('0 days 01:00:00')] - arr = np.array(periods) - result = arr[0] > arr - expected = np.array([False, False]) - self.assert_numpy_array_equal(result, expected) - - def test_ops_notimplemented(self): - class Other: - pass - - other = Other() - - td = Timedelta('1 day') - self.assertTrue(td.__add__(other) is NotImplemented) - self.assertTrue(td.__sub__(other) is NotImplemented) - self.assertTrue(td.__truediv__(other) is NotImplemented) - self.assertTrue(td.__mul__(other) is NotImplemented) - self.assertTrue(td.__floordiv__(td) is NotImplemented) - - def test_ops_error_str(self): - # GH 13624 - td = Timedelta('1 day') - - for l, r in [(td, 'a'), ('a', td)]: - - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - self.assertFalse(l == r) - self.assertTrue(l != r) - - def test_fields(self): - def check(value): - # that we are int/long like - self.assertTrue(isinstance(value, (int, compat.long))) - - # compat to datetime.timedelta - rng = to_timedelta('1 days, 10:11:12') - self.assertEqual(rng.days, 1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 0) - self.assertEqual(rng.nanoseconds, 0) - - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # GH 10050 - check(rng.days) - check(rng.seconds) - check(rng.microseconds) - check(rng.nanoseconds) - - td = Timedelta('-1 days, 10:11:12') - self.assertEqual(abs(td), Timedelta('13:48:48')) - self.assertTrue(str(td) == "-1 days +10:11:12") - self.assertEqual(-td, Timedelta('0 days 13:48:48')) - self.assertEqual(-Timedelta('-1 days, 10:11:12').value, 49728000000000) - self.assertEqual(Timedelta('-1 days, 10:11:12').value, -49728000000000) - - rng = to_timedelta('-1 days, 10:11:12.100123456') - self.assertEqual(rng.days, -1) - self.assertEqual(rng.seconds, 10 * 3600 + 11 * 60 + 12) - self.assertEqual(rng.microseconds, 100 * 1000 + 123) - self.assertEqual(rng.nanoseconds, 456) - self.assertRaises(AttributeError, lambda: rng.hours) - self.assertRaises(AttributeError, lambda: rng.minutes) - self.assertRaises(AttributeError, lambda: rng.milliseconds) - - # components - tup = pd.to_timedelta(-1, 'us').components - self.assertEqual(tup.days, -1) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - # GH 10050 - check(tup.days) - check(tup.hours) - check(tup.minutes) - check(tup.seconds) - check(tup.milliseconds) - check(tup.microseconds) - check(tup.nanoseconds) - - tup = Timedelta('-1 days 1 us').components - self.assertEqual(tup.days, -2) - self.assertEqual(tup.hours, 23) - self.assertEqual(tup.minutes, 59) - self.assertEqual(tup.seconds, 59) - self.assertEqual(tup.milliseconds, 999) - self.assertEqual(tup.microseconds, 999) - self.assertEqual(tup.nanoseconds, 0) - - def test_timedelta_range(self): - - expected = to_timedelta(np.arange(5), unit='D') - result = timedelta_range('0 days', periods=5, freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(11), unit='D') - result = timedelta_range('0 days', '10 days', freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day() - result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', - freq='D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2) - result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D') - tm.assert_index_equal(result, expected) - - expected = to_timedelta(np.arange(50), unit='T') * 30 - result = timedelta_range('0 days', freq='30T', periods=50) - tm.assert_index_equal(result, expected) - - # GH 11776 - arr = np.arange(10).reshape(2, 5) - df = pd.DataFrame(np.arange(10).reshape(2, 5)) - for arg in (arr, df): - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_timedelta(arg) - for errors in ['ignore', 'raise', 'coerce']: - with tm.assertRaisesRegexp(TypeError, "1-d array"): - to_timedelta(arg, errors=errors) - - # issue10583 - df = pd.DataFrame(np.random.normal(size=(10, 4))) - df.index = pd.timedelta_range(start='0s', periods=10, freq='s') - expected = df.loc[pd.Timedelta('0s'):, :] - result = df.loc['0s':, :] - assert_frame_equal(expected, result) - - def test_numeric_conversions(self): - self.assertEqual(ct(0), np.timedelta64(0, 'ns')) - self.assertEqual(ct(10), np.timedelta64(10, 'ns')) - self.assertEqual(ct(10, unit='ns'), np.timedelta64( - 10, 'ns').astype('m8[ns]')) - - self.assertEqual(ct(10, unit='us'), np.timedelta64( - 10, 'us').astype('m8[ns]')) - self.assertEqual(ct(10, unit='ms'), np.timedelta64( - 10, 'ms').astype('m8[ns]')) - self.assertEqual(ct(10, unit='s'), np.timedelta64( - 10, 's').astype('m8[ns]')) - self.assertEqual(ct(10, unit='d'), np.timedelta64( - 10, 'D').astype('m8[ns]')) - - def test_timedelta_conversions(self): - self.assertEqual(ct(timedelta(seconds=1)), - np.timedelta64(1, 's').astype('m8[ns]')) - self.assertEqual(ct(timedelta(microseconds=1)), - np.timedelta64(1, 'us').astype('m8[ns]')) - self.assertEqual(ct(timedelta(days=1)), - np.timedelta64(1, 'D').astype('m8[ns]')) - - def test_short_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - self.assertEqual(ct('10'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('10ns'), np.timedelta64(10, 'ns')) - self.assertEqual(ct('100'), np.timedelta64(100, 'ns')) - self.assertEqual(ct('100ns'), np.timedelta64(100, 'ns')) - - self.assertEqual(ct('1000'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000ns'), np.timedelta64(1000, 'ns')) - self.assertEqual(ct('1000NS'), np.timedelta64(1000, 'ns')) - - self.assertEqual(ct('10us'), np.timedelta64(10000, 'ns')) - self.assertEqual(ct('100us'), np.timedelta64(100000, 'ns')) - self.assertEqual(ct('1000us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000Us'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('1000uS'), np.timedelta64(1000000, 'ns')) - - self.assertEqual(ct('1ms'), np.timedelta64(1000000, 'ns')) - self.assertEqual(ct('10ms'), np.timedelta64(10000000, 'ns')) - self.assertEqual(ct('100ms'), np.timedelta64(100000000, 'ns')) - self.assertEqual(ct('1000ms'), np.timedelta64(1000000000, 'ns')) - - self.assertEqual(ct('-1s'), -np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('1s'), np.timedelta64(1000000000, 'ns')) - self.assertEqual(ct('10s'), np.timedelta64(10000000000, 'ns')) - self.assertEqual(ct('100s'), np.timedelta64(100000000000, 'ns')) - self.assertEqual(ct('1000s'), np.timedelta64(1000000000000, 'ns')) - - self.assertEqual(ct('1d'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('-1d'), -conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('1D'), conv(np.timedelta64(1, 'D'))) - self.assertEqual(ct('10D'), conv(np.timedelta64(10, 'D'))) - self.assertEqual(ct('100D'), conv(np.timedelta64(100, 'D'))) - self.assertEqual(ct('1000D'), conv(np.timedelta64(1000, 'D'))) - self.assertEqual(ct('10000D'), conv(np.timedelta64(10000, 'D'))) - - # space - self.assertEqual(ct(' 10000D '), conv(np.timedelta64(10000, 'D'))) - self.assertEqual(ct(' - 10000D '), -conv(np.timedelta64(10000, 'D'))) - - # invalid - self.assertRaises(ValueError, ct, '1foo') - self.assertRaises(ValueError, ct, 'foo') - - def test_full_format_converters(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(ct('1days'), conv(d1)) - self.assertEqual(ct('1days,'), conv(d1)) - self.assertEqual(ct('- 1days,'), -conv(d1)) - - self.assertEqual(ct('00:00:01'), conv(np.timedelta64(1, 's'))) - self.assertEqual(ct('06:00:01'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.0'), conv( - np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('06:00:01.01'), conv( - np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - self.assertEqual(ct('- 1days, 00:00:01'), - conv(-d1 + np.timedelta64(1, 's'))) - self.assertEqual(ct('1days, 06:00:01'), conv( - d1 + np.timedelta64(6 * 3600 + 1, 's'))) - self.assertEqual(ct('1days, 06:00:01.01'), conv( - d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, 'ms'))) - - # invalid - self.assertRaises(ValueError, ct, '- 1days, 00') - - def test_nat_converters(self): - self.assertEqual(to_timedelta( - 'nat', box=False).astype('int64'), tslib.iNaT) - self.assertEqual(to_timedelta( - 'nan', box=False).astype('int64'), tslib.iNaT) - - def test_to_timedelta(self): - def conv(v): - return v.astype('m8[ns]') - - d1 = np.timedelta64(1, 'D') - - self.assertEqual(to_timedelta('1 days 06:05:01.00003', box=False), - conv(d1 + np.timedelta64(6 * 3600 + - 5 * 60 + 1, 's') + - np.timedelta64(30, 'us'))) - self.assertEqual(to_timedelta('15.5us', box=False), - conv(np.timedelta64(15500, 'ns'))) - - # empty string - result = to_timedelta('', box=False) - self.assertEqual(result.astype('int64'), tslib.iNaT) - - result = to_timedelta(['', '']) - self.assertTrue(isnull(result).all()) - - # pass thru - result = to_timedelta(np.array([np.timedelta64(1, 's')])) - expected = pd.Index(np.array([np.timedelta64(1, 's')])) - tm.assert_index_equal(result, expected) - - # ints - result = np.timedelta64(0, 'ns') - expected = to_timedelta(0, box=False) - self.assertEqual(result, expected) - - # Series - expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) - result = to_timedelta(Series(['1d', '1days 00:00:01'])) - tm.assert_series_equal(result, expected) - - # with units - result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( - 10, 's').astype('m8[ns]')]) - expected = to_timedelta([0, 10], unit='s') - tm.assert_index_equal(result, expected) - - # single element conversion - v = timedelta(seconds=1) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) - - v = np.timedelta64(timedelta(seconds=1)) - result = to_timedelta(v, box=False) - expected = np.timedelta64(timedelta(seconds=1)) - self.assertEqual(result, expected) - - # arrays of various dtypes - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='s') - expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='m') - expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='int64') - result = to_timedelta(arr, unit='h') - expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='timedelta64[s]') - result = to_timedelta(arr) - expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) - tm.assert_index_equal(result, expected) - - arr = np.array([1] * 5, dtype='timedelta64[D]') - result = to_timedelta(arr) - expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) - tm.assert_index_equal(result, expected) - - # Test with lists as input when box=false - expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') - result = to_timedelta(range(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - result = to_timedelta(np.arange(3), unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - result = to_timedelta([0, 1, 2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - # Tests with fractional seconds as input: - expected = np.array( - [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') - result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) - tm.assert_numpy_array_equal(expected, result) - - def testit(unit, transform): - - # array - result = to_timedelta(np.arange(5), unit=unit) - expected = TimedeltaIndex([np.timedelta64(i, transform(unit)) - for i in np.arange(5).tolist()]) - tm.assert_index_equal(result, expected) - - # scalar - result = to_timedelta(2, unit=unit) - expected = Timedelta(np.timedelta64(2, transform(unit)).astype( - 'timedelta64[ns]')) - self.assertEqual(result, expected) - - # validate all units - # GH 6855 - for unit in ['Y', 'M', 'W', 'D', 'y', 'w', 'd']: - testit(unit, lambda x: x.upper()) - for unit in ['days', 'day', 'Day', 'Days']: - testit(unit, lambda x: 'D') - for unit in ['h', 'm', 's', 'ms', 'us', 'ns', 'H', 'S', 'MS', 'US', - 'NS']: - testit(unit, lambda x: x.lower()) - - # offsets - - # m - testit('T', lambda x: 'm') - - # ms - testit('L', lambda x: 'ms') - - def test_to_timedelta_invalid(self): - - # bad value for errors parameter - msg = "errors must be one of" - tm.assertRaisesRegexp(ValueError, msg, to_timedelta, - ['foo'], errors='never') - - # these will error - self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) - self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo')) - - # time not supported ATM - self.assertRaises(ValueError, lambda: to_timedelta(time(second=1))) - self.assertTrue(to_timedelta( - time(second=1), errors='coerce') is pd.NaT) - - self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar'])) - tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]), - to_timedelta(['foo', 'bar'], errors='coerce')) - - tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']), - to_timedelta(['1 day', 'bar', '1 min'], - errors='coerce')) - - # gh-13613: these should not error because errors='ignore' - invalid_data = 'apple' - self.assertEqual(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - invalid_data = ['apple', '1 days'] - tm.assert_numpy_array_equal( - np.array(invalid_data, dtype=object), - to_timedelta(invalid_data, errors='ignore')) - - invalid_data = pd.Index(['apple', '1 days']) - tm.assert_index_equal(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - invalid_data = Series(['apple', '1 days']) - tm.assert_series_equal(invalid_data, to_timedelta( - invalid_data, errors='ignore')) - - def test_to_timedelta_via_apply(self): - # GH 5458 - expected = Series([np.timedelta64(1, 's')]) - result = Series(['00:00:01']).apply(to_timedelta) - tm.assert_series_equal(result, expected) - - result = Series([to_timedelta('00:00:01')]) - tm.assert_series_equal(result, expected) - - def test_timedelta_ops(self): - # GH4984 - # make sure ops return Timedelta - s = Series([Timestamp('20130101') + timedelta(seconds=i * i) - for i in range(10)]) - td = s.diff() - - result = td.mean() - expected = to_timedelta(timedelta(seconds=9)) - self.assertEqual(result, expected) - - result = td.to_frame().mean() - self.assertEqual(result[0], expected) - - result = td.quantile(.1) - expected = Timedelta(np.timedelta64(2600, 'ms')) - self.assertEqual(result, expected) - - result = td.median() - expected = to_timedelta('00:00:09') - self.assertEqual(result, expected) - - result = td.to_frame().median() - self.assertEqual(result[0], expected) - - # GH 6462 - # consistency in returned values for sum - result = td.sum() - expected = to_timedelta('00:01:21') - self.assertEqual(result, expected) - - result = td.to_frame().sum() - self.assertEqual(result[0], expected) - - # std - result = td.std() - expected = to_timedelta(Series(td.dropna().values).std()) - self.assertEqual(result, expected) - - result = td.to_frame().std() - self.assertEqual(result[0], expected) - - # invalid ops - for op in ['skew', 'kurt', 'sem', 'prod']: - self.assertRaises(TypeError, getattr(td, op)) - - # GH 10040 - # make sure NaT is properly handled by median() - s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')]) - self.assertEqual(s.diff().median(), timedelta(days=4)) - - s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'), - Timestamp('2015-02-15')]) - self.assertEqual(s.diff().median(), timedelta(days=6)) - - def test_overflow(self): - # GH 9442 - s = Series(pd.date_range('20130101', periods=100000, freq='H')) - s[0] += pd.Timedelta('1s 1ms') - - # mean - result = (s - s.min()).mean() - expected = pd.Timedelta((pd.DatetimeIndex((s - s.min())).asi8 / len(s) - ).sum()) - - # the computation is converted to float so might be some loss of - # precision - self.assertTrue(np.allclose(result.value / 1000, expected.value / - 1000)) - - # sum - self.assertRaises(ValueError, lambda: (s - s.min()).sum()) - s1 = s[0:10000] - self.assertRaises(ValueError, lambda: (s1 - s1.min()).sum()) - s2 = s[0:1000] - result = (s2 - s2.min()).sum() - - def test_overflow_on_construction(self): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - value = pd.Timedelta('1day').value * 20169940 - self.assertRaises(OverflowError, pd.Timedelta, value) - - def test_timedelta_ops_scalar(self): - # GH 6808 - base = pd.to_datetime('20130101 09:01:12.123456') - expected_add = pd.to_datetime('20130101 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10), - np.timedelta64(10, 's'), - np.timedelta64(10000000000, 'ns'), - pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - base = pd.to_datetime('20130102 09:01:12.123456') - expected_add = pd.to_datetime('20130103 09:01:22.123456') - expected_sub = pd.to_datetime('20130101 09:01:02.123456') - - for offset in [pd.to_timedelta('1 day, 00:00:10'), - pd.to_timedelta('1 days, 00:00:10'), - timedelta(days=1, seconds=10), - np.timedelta64(1, 'D') + np.timedelta64(10, 's'), - pd.offsets.Day() + pd.offsets.Second(10)]: - result = base + offset - self.assertEqual(result, expected_add) - - result = base - offset - self.assertEqual(result, expected_sub) - - def test_to_timedelta_on_missing_values(self): - # GH5438 - timedelta_NaT = np.timedelta64('NaT') - - actual = pd.to_timedelta(Series(['00:00:01', np.nan])) - expected = Series([np.timedelta64(1000000000, 'ns'), - timedelta_NaT], dtype=' idx1 - expected = np.array([True, False, False, False, True, False]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 <= idx2 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx2 >= idx1 - expected = np.array([True, False, False, False, True, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 == idx2 - expected = np.array([False, False, False, False, False, True]) - self.assert_numpy_array_equal(result, expected) - - result = idx1 != idx2 - expected = np.array([True, True, True, True, True, False]) - self.assert_numpy_array_equal(result, expected) - - def test_ops_error_str(self): - # GH 13624 - tdi = TimedeltaIndex(['1 day', '2 days']) - - for l, r in [(tdi, 'a'), ('a', tdi)]: - with tm.assertRaises(TypeError): - l + r - - with tm.assertRaises(TypeError): - l > r - - with tm.assertRaises(TypeError): - l == r - - with tm.assertRaises(TypeError): - l != r - - def test_map(self): - - rng = timedelta_range('1 day', periods=10) - - f = lambda x: x.days - result = rng.map(f) - exp = Int64Index([f(x) for x in rng]) - tm.assert_index_equal(result, exp) - - def test_misc_coverage(self): - - rng = timedelta_range('1 day', periods=5) - result = rng.groupby(rng.days) - tm.assertIsInstance(list(result.values())[0][0], Timedelta) - - idx = TimedeltaIndex(['3d', '1d', '2d']) - self.assertFalse(idx.equals(list(idx))) - - non_td = Index(list('abc')) - self.assertFalse(idx.equals(list(non_td))) - - def test_union(self): - - i1 = timedelta_range('1day', periods=5) - i2 = timedelta_range('3day', periods=5) - result = i1.union(i2) - expected = timedelta_range('1day', periods=7) - self.assert_index_equal(result, expected) - - i1 = Int64Index(np.arange(0, 20, 2)) - i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') - i1.union(i2) # Works - i2.union(i1) # Fails with "AttributeError: can't set attribute" - - def test_union_coverage(self): - - idx = TimedeltaIndex(['3d', '1d', '2d']) - ordered = TimedeltaIndex(idx.sort_values(), freq='infer') - result = ordered.union(idx) - self.assert_index_equal(result, ordered) - - result = ordered[:0].union(ordered) - self.assert_index_equal(result, ordered) - self.assertEqual(result.freq, ordered.freq) - - def test_union_bug_1730(self): - - rng_a = timedelta_range('1 day', periods=4, freq='3H') - rng_b = timedelta_range('1 day', periods=4, freq='4H') - - result = rng_a.union(rng_b) - exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) - self.assert_index_equal(result, exp) - - def test_union_bug_1745(self): - - left = TimedeltaIndex(['1 day 15:19:49.695000']) - right = TimedeltaIndex(['2 day 13:04:21.322000', - '1 day 15:27:24.873000', - '1 day 15:31:05.350000']) - - result = left.union(right) - exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) - - def test_union_bug_4564(self): - - left = timedelta_range("1 day", "30d") - right = left + pd.offsets.Minute(15) - - result = left.union(right) - exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) - self.assert_index_equal(result, exp) - - def test_intersection_bug_1708(self): - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = index_1 + pd.offsets.Hour(5) - - result = index_1 & index_2 - self.assertEqual(len(result), 0) - - index_1 = timedelta_range('1 day', periods=4, freq='h') - index_2 = index_1 + pd.offsets.Hour(1) - - result = index_1 & index_2 - expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') - tm.assert_index_equal(result, expected) - - def test_get_duplicates(self): - idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', - '4day']) - - result = idx.get_duplicates() - ex = TimedeltaIndex(['2 day', '3day']) - self.assert_index_equal(result, ex) - - def test_argmin_argmax(self): - idx = TimedeltaIndex(['1 day 00:00:05', '1 day 00:00:01', - '1 day 00:00:02']) - self.assertEqual(idx.argmin(), 1) - self.assertEqual(idx.argmax(), 0) - - def test_sort_values(self): - - idx = TimedeltaIndex(['4d', '1d', '2d']) - - ordered = idx.sort_values() - self.assertTrue(ordered.is_monotonic) - - ordered = idx.sort_values(ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - - ordered, dexer = idx.sort_values(return_indexer=True) - self.assertTrue(ordered.is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([1, 2, 0]), - check_dtype=False) - - ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - self.assertTrue(ordered[::-1].is_monotonic) - self.assert_numpy_array_equal(dexer, - np.array([0, 2, 1]), - check_dtype=False) - - def test_insert(self): - - idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') - - result = idx.insert(2, timedelta(days=5)) - exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') - self.assert_index_equal(result, exp) - - # insertion of non-datetime should coerce to object index - result = idx.insert(1, 'inserted') - expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), - Timedelta('2day')], name='idx') - self.assertNotIsInstance(result, TimedeltaIndex) - tm.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - - idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') - - # preserve freq - expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', - '1day 00:00:03'], - name='idx', freq='s') - expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:04'], - name='idx', freq='s') - - # reset freq to None - expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', - '1day 00:00:02', '1day 00:00:03'], - name='idx', freq=None) - expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', - '1day 00:00:03', '1day 00:00:05'], - name='idx', freq=None) - - cases = [(0, Timedelta('1day'), expected_0), - (-3, Timedelta('1day'), expected_0), - (3, Timedelta('1day 00:00:04'), expected_3), - (1, Timedelta('1day 00:00:01'), expected_1_nofreq), - (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] - - for n, d, expected in cases: - result = idx.insert(n, d) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - def test_delete(self): - idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') - - # prserve freq - expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', - name='idx') - expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', - name='idx') - - # reset freq to None - expected_1 = TimedeltaIndex( - ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') - - cases = {0: expected_0, - -5: expected_0, - -1: expected_4, - 4: expected_4, - 1: expected_1} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) - - def test_delete_slice(self): - idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') - - # prserve freq - expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', - name='idx') - expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', - name='idx') - - # reset freq to None - expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', - '7 d', '8 d', '9 d', '10d'], - freq=None, name='idx') - - cases = {(0, 1, 2): expected_0_2, - (7, 8, 9): expected_7_9, - (3, 4, 5): expected_3_5} - for n, expected in compat.iteritems(cases): - result = idx.delete(n) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - result = idx.delete(slice(n[0], n[-1] + 1)) - self.assert_index_equal(result, expected) - self.assertEqual(result.name, expected.name) - self.assertEqual(result.freq, expected.freq) - - def test_take(self): - - tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] - idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') - expected = TimedeltaIndex(tds, freq=None, name='idx') - - taken1 = idx.take([2, 4, 10]) - taken2 = idx[[2, 4, 10]] - - for taken in [taken1, taken2]: - self.assert_index_equal(taken, expected) - tm.assertIsInstance(taken, TimedeltaIndex) - self.assertIsNone(taken.freq) - self.assertEqual(taken.name, expected.name) - - def test_take_fill_value(self): - # GH 12631 - idx = pd.TimedeltaIndex(['1 days', '2 days', '3 days'], - name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'], - name='xxx') - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.TimedeltaIndex(['2 days', '1 days', '3 days'], - name='xxx') - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) - - def test_isin(self): - - index = tm.makeTimedeltaIndex(4) - result = index.isin(index) - self.assertTrue(result.all()) - - result = index.isin(list(index)) - self.assertTrue(result.all()) - - assert_almost_equal(index.isin([index[2], 5]), - np.array([False, False, True, False])) - - def test_does_not_convert_mixed_integer(self): - df = tm.makeCustomDataframe(10, 10, - data_gen_f=lambda *args, **kwargs: randn(), - r_idx_type='i', c_idx_type='td') - str(df) - - cols = df.columns.join(df.index, how='outer') - joined = cols.join(df.columns) - self.assertEqual(cols.dtype, np.dtype('O')) - self.assertEqual(cols.dtype, joined.dtype) - tm.assert_index_equal(cols, joined) - - def test_slice_keeps_name(self): - - # GH4226 - dr = pd.timedelta_range('1d', '5d', freq='H', name='timebucket') - self.assertEqual(dr[1:].name, dr.name) - - def test_join_self(self): - - index = timedelta_range('1 day', periods=10) - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = index.join(index, how=kind) - tm.assert_index_equal(index, joined) - - def test_factorize(self): - idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day', - '3 day']) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = TimedeltaIndex(['1 day', '2 day', '3 day']) - - arr, idx = idx1.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) - - arr, idx = idx1.factorize(sort=True) - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, exp_idx) - - # freq must be preserved - idx3 = timedelta_range('1 day', periods=4, freq='s') - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - arr, idx = idx3.factorize() - self.assert_numpy_array_equal(arr, exp_arr) - self.assert_index_equal(idx, idx3) - - -class TestSlicing(tm.TestCase): - def test_partial_slice(self): - rng = timedelta_range('1 day 10:11:12', freq='h', periods=500) - s = Series(np.arange(len(rng)), index=rng) - - result = s['5 day':'6 day'] - expected = s.iloc[86:134] - assert_series_equal(result, expected) - - result = s['5 day':] - expected = s.iloc[86:] - assert_series_equal(result, expected) - - result = s[:'6 day'] - expected = s.iloc[:134] - assert_series_equal(result, expected) - - result = s['6 days, 23:11:12'] - self.assertEqual(result, s.iloc[133]) - - self.assertRaises(KeyError, s.__getitem__, '50 days') - - def test_partial_slice_high_reso(self): - - # higher reso - rng = timedelta_range('1 day 10:11:12', freq='us', periods=2000) - s = Series(np.arange(len(rng)), index=rng) - - result = s['1 day 10:11:12':] - expected = s.iloc[0:] - assert_series_equal(result, expected) - - result = s['1 day 10:11:12.001':] - expected = s.iloc[1000:] - assert_series_equal(result, expected) - - result = s['1 days, 10:11:12.001001'] - self.assertEqual(result, s.iloc[1001]) - - def test_slice_with_negative_step(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - SLC = pd.IndexSlice - - def assert_slices_equivalent(l_slc, i_slc): - assert_series_equal(ts[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) - - assert_slices_equivalent(SLC[Timedelta(hours=7)::-1], SLC[7::-1]) - assert_slices_equivalent(SLC['7 hours'::-1], SLC[7::-1]) - - assert_slices_equivalent(SLC[:Timedelta(hours=7):-1], SLC[:6:-1]) - assert_slices_equivalent(SLC[:'7 hours':-1], SLC[:6:-1]) - - assert_slices_equivalent(SLC['15 hours':'7 hours':-1], SLC[15:6:-1]) - assert_slices_equivalent(SLC[Timedelta(hours=15):Timedelta(hours=7):- - 1], SLC[15:6:-1]) - assert_slices_equivalent(SLC['15 hours':Timedelta(hours=7):-1], - SLC[15:6:-1]) - assert_slices_equivalent(SLC[Timedelta(hours=15):'7 hours':-1], - SLC[15:6:-1]) - - assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) - - def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - self.assertRaisesRegexp(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - - def test_tdi_ops_attributes(self): - rng = timedelta_range('2 days', periods=5, freq='2D', name='x') - - result = rng + 1 - exp = timedelta_range('4 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng - 2 - exp = timedelta_range('-2 days', periods=5, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - result = rng * 2 - exp = timedelta_range('4 days', periods=5, freq='4D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4D') - - result = rng / 2 - exp = timedelta_range('1 days', periods=5, freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - result = -rng - exp = timedelta_range('-2 days', periods=5, freq='-2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2D') - - rng = pd.timedelta_range('-2 days', periods=5, freq='D', name='x') - - result = abs(rng) - exp = TimedeltaIndex(['2 days', '1 days', '0 days', '1 days', - '2 days'], name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_add_overflow(self): - # see gh-14068 - msg = "too (big|large) to convert" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta(106580, 'D') - - _NaT = int(pd.NaT) + 1 - msg = "Overflow in int64 addition" - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assertRaisesRegexp(OverflowError, msg): - Timestamp('2000') + to_timedelta([106580], 'D') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assertRaisesRegexp(OverflowError, msg): - (to_timedelta([_NaT, '5 days', '1 hours']) - - to_timedelta(['7 seconds', _NaT, '4 hours'])) - - # These should not overflow! - exp = TimedeltaIndex([pd.NaT]) - result = to_timedelta([pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(['4 days', pd.NaT]) - result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days') - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours']) - result = (to_timedelta([pd.NaT, '5 days', '1 hours']) + - to_timedelta(['7 seconds', pd.NaT, '4 hours'])) - tm.assert_index_equal(result, exp) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 00b60ba620c4b..aac1a41580e65 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1,23 +1,20 @@ # pylint: disable-msg=E1101,W0612 -from datetime import datetime, timedelta, tzinfo, date -import numpy as np import pytz +import numpy as np from distutils.version import LooseVersion -from pandas.types.dtypes import DatetimeTZDtype -from pandas import (Index, Series, DataFrame, isnull, Timestamp) - -from pandas import DatetimeIndex, to_datetime, NaT -from pandas import tslib - -import pandas.tseries.offsets as offsets -from pandas.tseries.index import bdate_range, date_range -import pandas.tseries.tools as tools +from datetime import datetime, timedelta, tzinfo, date from pytz import NonExistentTimeError import pandas.util.testing as tm +import pandas.tseries.tools as tools +import pandas.tseries.offsets as offsets +from pandas.compat import lrange, zip +from pandas.tseries.index import bdate_range, date_range +from pandas.types.dtypes import DatetimeTZDtype +from pandas import (Index, Series, DataFrame, isnull, Timestamp, tslib, NaT, + DatetimeIndex, to_datetime) from pandas.util.testing import (assert_frame_equal, assert_series_equal, set_timezone) -from pandas.compat import lrange, zip try: import pytz # noqa @@ -1681,3 +1678,52 @@ def test_nat(self): idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] self.assert_index_equal(idx, DatetimeIndex(expected, tz='US/Eastern')) + + +class TestTslib(tm.TestCase): + + def test_tslib_tz_convert(self): + def compare_utc_to_local(tz_didx, utc_didx): + f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) + result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + result_single = np.vectorize(f)(tz_didx.asi8) + self.assert_numpy_array_equal(result, result_single) + + def compare_local_to_utc(tz_didx, utc_didx): + f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') + result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + result_single = np.vectorize(f)(utc_didx.asi8) + self.assert_numpy_array_equal(result, result_single) + + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: + # US: 2014-03-09 - 2014-11-11 + # MOSCOW: 2014-10-26 / 2014-12-31 + tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) + utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') + compare_utc_to_local(tz_didx, utc_didx) + # local tz to UTC can be differ in hourly (or higher) freqs because + # of DST + compare_local_to_utc(tz_didx, utc_didx) + + tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) + utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) + utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') + compare_utc_to_local(tz_didx, utc_didx) + compare_local_to_utc(tz_didx, utc_didx) + + # Check empty array + result = tslib.tz_convert(np.array([], dtype=np.int64), + tslib.maybe_get_tz('US/Eastern'), + tslib.maybe_get_tz('Asia/Tokyo')) + self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + # Check all-NaT array + result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), + tslib.maybe_get_tz('US/Eastern'), + tslib.maybe_get_tz('Asia/Tokyo')) + self.assert_numpy_array_equal(result, np.array( + [tslib.iNaT], dtype=np.int64)) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py deleted file mode 100644 index 20e91a6f5bc44..0000000000000 --- a/pandas/tseries/tests/test_tslib.py +++ /dev/null @@ -1,691 +0,0 @@ -import datetime -import numpy as np -from distutils.version import LooseVersion - -import pandas as pd -import pandas.util.testing as tm -from pandas import tslib, lib, compat -from pandas.tseries import offsets, tools -from pandas.tseries.frequencies import get_freq -from pandas.tseries.index import date_range, DatetimeIndex -from pandas.util.testing import _skip_if_has_locale -from pandas._period import period_ordinal, period_asfreq -from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.api import Timestamp, to_datetime, Index, Series - - -class TestTsUtil(tm.TestCase): - - def test_try_parse_dates(self): - from dateutil.parser import parse - arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) - - result = lib.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] - self.assertTrue(np.array_equal(result, expected)) - - def test_min_valid(self): - # Ensure that Timestamp.min is a valid Timestamp - Timestamp(Timestamp.min) - - def test_max_valid(self): - # Ensure that Timestamp.max is a valid Timestamp - Timestamp(Timestamp.max) - - def test_to_datetime_bijective(self): - # Ensure that converting to datetime and back only loses precision - # by going from nanoseconds to microseconds. - exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.max.to_pydatetime()).value / 1000, - Timestamp.max.value / 1000) - - exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): - self.assertEqual( - Timestamp(Timestamp.min.to_pydatetime()).value / 1000, - Timestamp.min.value / 1000) - - -class TestDatetimeParsingWrappers(tm.TestCase): - def test_does_not_convert_mixed_integer(self): - bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T') - - for bad_date_string in bad_date_strings: - self.assertFalse(tslib._does_string_look_like_datetime( - bad_date_string)) - - good_date_strings = ('2012-01-01', - '01/01/2012', - 'Mon Sep 16, 2013', - '01012012', - '0101', - '1-1', ) - - for good_date_string in good_date_strings: - self.assertTrue(tslib._does_string_look_like_datetime( - good_date_string)) - - def test_parsers(self): - - # https://github.com/dateutil/dateutil/issues/217 - import dateutil - yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') - - cases = {'2011-01-01': datetime.datetime(2011, 1, 1), - '2Q2005': datetime.datetime(2005, 4, 1), - '2Q05': datetime.datetime(2005, 4, 1), - '2005Q1': datetime.datetime(2005, 1, 1), - '05Q1': datetime.datetime(2005, 1, 1), - '2011Q3': datetime.datetime(2011, 7, 1), - '11Q3': datetime.datetime(2011, 7, 1), - '3Q2011': datetime.datetime(2011, 7, 1), - '3Q11': datetime.datetime(2011, 7, 1), - - # quarterly without space - '2000Q4': datetime.datetime(2000, 10, 1), - '00Q4': datetime.datetime(2000, 10, 1), - '4Q2000': datetime.datetime(2000, 10, 1), - '4Q00': datetime.datetime(2000, 10, 1), - '2000q4': datetime.datetime(2000, 10, 1), - '2000-Q4': datetime.datetime(2000, 10, 1), - '00-Q4': datetime.datetime(2000, 10, 1), - '4Q-2000': datetime.datetime(2000, 10, 1), - '4Q-00': datetime.datetime(2000, 10, 1), - '00q4': datetime.datetime(2000, 10, 1), - '2005': datetime.datetime(2005, 1, 1), - '2005-11': datetime.datetime(2005, 11, 1), - '2005 11': datetime.datetime(2005, 11, 1), - '11-2005': datetime.datetime(2005, 11, 1), - '11 2005': datetime.datetime(2005, 11, 1), - '200511': datetime.datetime(2020, 5, 11), - '20051109': datetime.datetime(2005, 11, 9), - '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), - '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), - '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), - '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), - "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, - 36, 28), - "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), - "Sep 25 2003": datetime.datetime(2003, 9, 25), - "January 1 2014": datetime.datetime(2014, 1, 1), - - # GH 10537 - '2014-06': datetime.datetime(2014, 6, 1), - '06-2014': datetime.datetime(2014, 6, 1), - '2014-6': datetime.datetime(2014, 6, 1), - '6-2014': datetime.datetime(2014, 6, 1), - - '20010101 12': datetime.datetime(2001, 1, 1, 12), - '20010101 1234': datetime.datetime(2001, 1, 1, 12, 34), - '20010101 123456': datetime.datetime(2001, 1, 1, 12, 34, 56), - } - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, - yearfirst=yearfirst) - result2 = to_datetime(date_str, yearfirst=yearfirst) - result3 = to_datetime([date_str], yearfirst=yearfirst) - # result5 is used below - result4 = to_datetime(np.array([date_str], dtype=object), - yearfirst=yearfirst) - result6 = DatetimeIndex([date_str], yearfirst=yearfirst) - # result7 is used below - result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) - result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) - - for res in [result1, result2]: - self.assertEqual(res, expected) - for res in [result3, result4, result6, result8, result9]: - exp = DatetimeIndex([pd.Timestamp(expected)]) - tm.assert_index_equal(res, exp) - - # these really need to have yearfist, but we don't support - if not yearfirst: - result5 = Timestamp(date_str) - self.assertEqual(result5, expected) - result7 = date_range(date_str, freq='S', periods=1, - yearfirst=yearfirst) - self.assertEqual(result7, expected) - - # NaT - result1, _, _ = tools.parse_time_string('NaT') - result2 = to_datetime('NaT') - result3 = Timestamp('NaT') - result4 = DatetimeIndex(['NaT'])[0] - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - self.assertTrue(result1 is tslib.NaT) - - def test_parsers_quarter_invalid(self): - - cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.'] - for case in cases: - self.assertRaises(ValueError, tools.parse_time_string, case) - - def test_parsers_dayfirst_yearfirst(self): - tm._skip_if_no_dateutil() - - # OK - # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 - # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 - # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 - - # OK - # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 - - # bug fix in 2.5.2 - # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 - # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 - # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 - - # OK - # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 - - # revert of bug in 2.5.2 - # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 - # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 - # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 - - # OK - # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - - import dateutil - is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') - - # str : dayfirst, yearfirst, expected - cases = {'10-11-12': [(False, False, - datetime.datetime(2012, 10, 11)), - (True, False, - datetime.datetime(2012, 11, 10)), - (False, True, - datetime.datetime(2010, 11, 12)), - (True, True, - datetime.datetime(2010, 12, 11))], - '20/12/21': [(False, False, - datetime.datetime(2021, 12, 20)), - (True, False, - datetime.datetime(2021, 12, 20)), - (False, True, - datetime.datetime(2020, 12, 21)), - (True, True, - datetime.datetime(2020, 12, 21))]} - - from dateutil.parser import parse - for date_str, values in compat.iteritems(cases): - for dayfirst, yearfirst, expected in values: - - # odd comparisons across version - # let's just skip - if dayfirst and yearfirst and is_lt_253: - continue - - # compare with dateutil result - dateutil_result = parse(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - self.assertEqual(dateutil_result, expected) - - result1, _, _ = tools.parse_time_string(date_str, - dayfirst=dayfirst, - yearfirst=yearfirst) - - # we don't support dayfirst/yearfirst here: - if not dayfirst and not yearfirst: - result2 = Timestamp(date_str) - self.assertEqual(result2, expected) - - result3 = to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - - result4 = DatetimeIndex([date_str], dayfirst=dayfirst, - yearfirst=yearfirst)[0] - - self.assertEqual(result1, expected) - self.assertEqual(result3, expected) - self.assertEqual(result4, expected) - - def test_parsers_timestring(self): - tm._skip_if_no_dateutil() - from dateutil.parser import parse - - # must be the same as dateutil result - cases = {'10:15': (parse('10:15'), datetime.datetime(1, 1, 1, 10, 15)), - '9:05': (parse('9:05'), datetime.datetime(1, 1, 1, 9, 5))} - - for date_str, (exp_now, exp_def) in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str) - result2 = to_datetime(date_str) - result3 = to_datetime([date_str]) - result4 = Timestamp(date_str) - result5 = DatetimeIndex([date_str])[0] - # parse time string return time string based on default date - # others are not, and can't be changed because it is used in - # time series plot - self.assertEqual(result1, exp_def) - self.assertEqual(result2, exp_now) - self.assertEqual(result3, exp_now) - self.assertEqual(result4, exp_now) - self.assertEqual(result5, exp_now) - - def test_parsers_time(self): - # GH11818 - _skip_if_has_locale() - strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500", - "2:15:00pm", "021500pm", datetime.time(14, 15)] - expected = datetime.time(14, 15) - - for time_string in strings: - self.assertEqual(tools.to_time(time_string), expected) - - new_string = "14.15" - self.assertRaises(ValueError, tools.to_time, new_string) - self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected) - - arg = ["14:15", "20:20"] - expected_arr = [datetime.time(14, 15), datetime.time(20, 20)] - self.assertEqual(tools.to_time(arg), expected_arr) - self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr) - self.assertEqual(tools.to_time(arg, infer_time_format=True), - expected_arr) - self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"), - [None, None]) - - res = tools.to_time(arg, format="%I:%M%p", errors="ignore") - self.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) - - with tm.assertRaises(ValueError): - tools.to_time(arg, format="%I:%M%p", errors="raise") - - self.assert_series_equal(tools.to_time(Series(arg, name="test")), - Series(expected_arr, name="test")) - - res = tools.to_time(np.array(arg)) - self.assertIsInstance(res, list) - self.assert_equal(res, expected_arr) - - def test_parsers_monthfreq(self): - cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0), - '200005': datetime.datetime(2000, 5, 1, 0, 0)} - - for date_str, expected in compat.iteritems(cases): - result1, _, _ = tools.parse_time_string(date_str, freq='M') - self.assertEqual(result1, expected) - - def test_parsers_quarterly_with_freq(self): - msg = ('Incorrect quarterly string is given, quarter ' - 'must be between 1 and 4: 2013Q5') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q5') - - # GH 5418 - msg = ('Unable to retrieve month information from given freq: ' - 'INVLD-L-DEC-SAT') - with tm.assertRaisesRegexp(tslib.DateParseError, msg): - tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') - - cases = {('2013Q2', None): datetime.datetime(2013, 4, 1), - ('2013Q2', 'A-APR'): datetime.datetime(2012, 8, 1), - ('2013-Q2', 'A-DEC'): datetime.datetime(2013, 4, 1)} - - for (date_str, freq), exp in compat.iteritems(cases): - result, _, _ = tools.parse_time_string(date_str, freq=freq) - self.assertEqual(result, exp) - - def test_parsers_timezone_minute_offsets_roundtrip(self): - # GH11708 - base = to_datetime("2013-01-01 00:00:00") - dt_strings = [ - ('2013-01-01 05:45+0545', - "Asia/Katmandu", - "Timestamp('2013-01-01 05:45:00+0545', tz='Asia/Katmandu')"), - ('2013-01-01 05:30+0530', - "Asia/Kolkata", - "Timestamp('2013-01-01 05:30:00+0530', tz='Asia/Kolkata')") - ] - - for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string) - self.assertEqual(base, dt_time) - converted_time = dt_time.tz_localize('UTC').tz_convert(tz) - self.assertEqual(dt_string_repr, repr(converted_time)) - - def test_parsers_iso8601(self): - # GH 12060 - # test only the iso parser - flexibility to different - # separators and leadings 0s - # Timestamp construction falls back to dateutil - cases = {'2011-01-02': datetime.datetime(2011, 1, 2), - '2011-1-2': datetime.datetime(2011, 1, 2), - '2011-01': datetime.datetime(2011, 1, 1), - '2011-1': datetime.datetime(2011, 1, 1), - '2011 01 02': datetime.datetime(2011, 1, 2), - '2011.01.02': datetime.datetime(2011, 1, 2), - '2011/01/02': datetime.datetime(2011, 1, 2), - '2011\\01\\02': datetime.datetime(2011, 1, 2), - '2013-01-01 05:30:00': datetime.datetime(2013, 1, 1, 5, 30), - '2013-1-1 5:30:00': datetime.datetime(2013, 1, 1, 5, 30)} - for date_str, exp in compat.iteritems(cases): - actual = tslib._test_parse_iso8601(date_str) - self.assertEqual(actual, exp) - - # seperators must all match - YYYYMM not valid - invalid_cases = ['2011-01/02', '2011^11^11', - '201401', '201111', '200101', - # mixed separated and unseparated - '2005-0101', '200501-01', - '20010101 12:3456', '20010101 1234:56', - # HHMMSS must have two digits in each component - # if unseparated - '20010101 1', '20010101 123', '20010101 12345', - '20010101 12345Z', - # wrong separator for HHMMSS - '2001-01-01 12-34-56'] - for date_str in invalid_cases: - with tm.assertRaises(ValueError): - tslib._test_parse_iso8601(date_str) - # If no ValueError raised, let me know which case failed. - raise Exception(date_str) - - -class TestArrayToDatetime(tm.TestCase): - def test_parsing_valid_dates(self): - arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - '2013-01-02T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-09-16T00:00:00.000000000-0000', - '2013-09-17T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_number_looking_strings_not_into_datetime(self): - # #4601 - # These strings don't look like datetimes so they shouldn't be - # attempted to be converted - arr = np.array(['-352.737091', '183.575577'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - def test_coercing_dates_outside_of_datetime64_ns_bounds(self): - invalid_dates = [ - datetime.date(1000, 1, 1), - datetime.datetime(1000, 1, 1), - '1000-01-01', - 'Jan 1, 1000', - np.datetime64('1000-01-01'), - ] - - for invalid_date in invalid_dates: - self.assertRaises(ValueError, - tslib.array_to_datetime, - np.array( - [invalid_date], dtype='object'), - errors='raise', ) - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), - errors='coerce'), - np.array([tslib.iNaT], dtype='M8[ns]') - ) - - arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - tslib.iNaT, - '2000-01-01T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_coerce_of_invalid_datetimes(self): - arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) - - # Without coercing, the presence of any invalid dates prevents - # any values from being converted - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - # With coercing, the invalid dates becomes iNaT - self.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT - ], - dtype='M8[ns]' - ) - ) - - def test_parsing_timezone_offsets(self): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added - dt_strings = [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00' - ] - - expected_output = tslib.array_to_datetime(np.array( - ['01-01-2013 00:00:00'], dtype=object)) - - for dt_string in dt_strings: - self.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([dt_string], dtype=object) - ), - expected_output - ) - - -class TestTslib(tm.TestCase): - def test_intraday_conversion_factors(self): - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('H'), False), 24) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('T'), False), 1440) - self.assertEqual(period_asfreq( - 1, get_freq('D'), get_freq('S'), False), 86400) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('L'), False), 86400000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('U'), False), 86400000000) - self.assertEqual(period_asfreq(1, get_freq( - 'D'), get_freq('N'), False), 86400000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('T'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('H'), get_freq('S'), False), 3600) - self.assertEqual(period_asfreq(1, get_freq('H'), - get_freq('L'), False), 3600000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('U'), False), 3600000000) - self.assertEqual(period_asfreq(1, get_freq( - 'H'), get_freq('N'), False), 3600000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('S'), False), 60) - self.assertEqual(period_asfreq( - 1, get_freq('T'), get_freq('L'), False), 60000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('U'), False), 60000000) - self.assertEqual(period_asfreq(1, get_freq( - 'T'), get_freq('N'), False), 60000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('S'), get_freq('L'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('S'), - get_freq('U'), False), 1000000) - self.assertEqual(period_asfreq(1, get_freq( - 'S'), get_freq('N'), False), 1000000000) - - self.assertEqual(period_asfreq( - 1, get_freq('L'), get_freq('U'), False), 1000) - self.assertEqual(period_asfreq(1, get_freq('L'), - get_freq('N'), False), 1000000) - - self.assertEqual(period_asfreq( - 1, get_freq('U'), get_freq('N'), False), 1000) - - def test_period_ordinal_start_values(self): - # information for 1.1.1970 - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('A'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('M'))) - self.assertEqual(1, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('D'))) - self.assertEqual(0, period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_period_ordinal_week(self): - self.assertEqual(1, period_ordinal(1970, 1, 4, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2, period_ordinal(1970, 1, 5, 0, 0, 0, 0, 0, - get_freq('W'))) - - self.assertEqual(2284, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('W'))) - self.assertEqual(2285, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('W'))) - - def test_period_ordinal_business_day(self): - # Thursday - self.assertEqual(11415, period_ordinal(2013, 10, 3, 0, 0, 0, 0, 0, - get_freq('B'))) - # Friday - self.assertEqual(11416, period_ordinal(2013, 10, 4, 0, 0, 0, 0, 0, - get_freq('B'))) - # Saturday - self.assertEqual(11417, period_ordinal(2013, 10, 5, 0, 0, 0, 0, 0, - get_freq('B'))) - # Sunday - self.assertEqual(11417, period_ordinal(2013, 10, 6, 0, 0, 0, 0, 0, - get_freq('B'))) - # Monday - self.assertEqual(11417, period_ordinal(2013, 10, 7, 0, 0, 0, 0, 0, - get_freq('B'))) - # Tuesday - self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, - get_freq('B'))) - - def test_tslib_tz_convert(self): - def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) - result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) - result_single = np.vectorize(f)(tz_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') - result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') - result_single = np.vectorize(f)(utc_didx.asi8) - self.assert_numpy_array_equal(result, result_single) - - for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']: - # US: 2014-03-09 - 2014-11-11 - # MOSCOW: 2014-10-26 / 2014-12-31 - tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz) - utc_didx = date_range('2014-03-01', '2015-01-10', freq='H') - compare_utc_to_local(tz_didx, utc_didx) - # local tz to UTC can be differ in hourly (or higher) freqs because - # of DST - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz) - utc_didx = date_range('2000-01-01', '2020-01-01', freq='D') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz) - utc_didx = date_range('2000-01-01', '2100-01-01', freq='A') - compare_utc_to_local(tz_didx, utc_didx) - compare_local_to_utc(tz_didx, utc_didx) - - # Check empty array - result = tslib.tz_convert(np.array([], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) - - # Check all-NaT array - result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), - tslib.maybe_get_tz('US/Eastern'), - tslib.maybe_get_tz('Asia/Tokyo')) - self.assert_numpy_array_equal(result, np.array( - [tslib.iNaT], dtype=np.int64)) - - def test_shift_months(self): - s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp( - '2000-01-31 00:23:00'), Timestamp('2000-01-01'), Timestamp( - '2000-02-29'), Timestamp('2000-12-31')]) - for years in [-1, 0, 1]: - for months in [-2, 0, 2]: - actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + - months)) - expected = DatetimeIndex([x + offsets.DateOffset( - years=years, months=months) for x in s]) - tm.assert_index_equal(actual, expected) - - def test_round(self): - stamp = Timestamp('2000-01-05 05:09:15.13') - - def _check_round(freq, expected): - result = stamp.round(freq=freq) - self.assertEqual(result, expected) - - for freq, expected in [('D', Timestamp('2000-01-05 00:00:00')), - ('H', Timestamp('2000-01-05 05:00:00')), - ('S', Timestamp('2000-01-05 05:09:15'))]: - _check_round(freq, expected) - - msg = pd.tseries.frequencies._INVALID_FREQ_ERROR - with self.assertRaisesRegexp(ValueError, msg): - stamp.round('foo') diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py deleted file mode 100644 index 3feffe924c291..0000000000000 --- a/pandas/tseries/tests/test_util.py +++ /dev/null @@ -1,126 +0,0 @@ -from pandas.compat import range - -import numpy as np - -from pandas import Series, date_range -import pandas.util.testing as tm - -from datetime import datetime, date - -from pandas.tseries.tools import normalize_date -from pandas.tseries.util import pivot_annual, isleapyear - - -class TestPivotAnnual(tm.TestCase): - """ - New pandas of scikits.timeseries pivot_annual - """ - - def test_daily(self): - rng = date_range('1/1/2000', '12/31/2004', freq='D') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'D') - - doy = ts.index.dayofyear - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - doy[(~isleapyear(ts.index.year)) & (doy >= 60)] += 1 - - for i in range(1, 367): - subset = ts[doy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - # check leap days - leaps = ts[(ts.index.month == 2) & (ts.index.day == 29)] - day = leaps.index.dayofyear[0] - leaps.index = leaps.index.year - leaps.name = 60 - tm.assert_series_equal(annual[day].dropna(), leaps) - - def test_hourly(self): - rng_hourly = date_range('1/1/1994', periods=(18 * 8760 + 4 * 24), - freq='H') - data_hourly = np.random.randint(100, 350, rng_hourly.size) - ts_hourly = Series(data_hourly, index=rng_hourly) - - grouped = ts_hourly.groupby(ts_hourly.index.year) - hoy = grouped.apply(lambda x: x.reset_index(drop=True)) - hoy = hoy.index.droplevel(0).values - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - hoy[~isleapyear(ts_hourly.index.year) & (hoy >= 1416)] += 24 - hoy += 1 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts_hourly) - - ts_hourly = ts_hourly.astype(float) - for i in [1, 1416, 1417, 1418, 1439, 1440, 1441, 8784]: - subset = ts_hourly[hoy == i] - subset.index = [x.year for x in subset.index] - - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - leaps = ts_hourly[(ts_hourly.index.month == 2) & ( - ts_hourly.index.day == 29) & (ts_hourly.index.hour == 0)] - hour = leaps.index.dayofyear[0] * 24 - 23 - leaps.index = leaps.index.year - leaps.name = 1417 - tm.assert_series_equal(annual[hour].dropna(), leaps) - - def test_weekly(self): - pass - - def test_monthly(self): - rng = date_range('1/1/2000', '12/31/2004', freq='M') - ts = Series(np.random.randn(len(rng)), index=rng) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - annual = pivot_annual(ts, 'M') - - month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = [x.year for x in subset.index] - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - self.assertEqual(result.name, i) - - def test_period_monthly(self): - pass - - def test_period_daily(self): - pass - - def test_period_weekly(self): - pass - - def test_isleapyear_deprecate(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2000)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertFalse(isleapyear(2001)) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(isleapyear(2004)) - - -def test_normalize_date(): - value = date(2012, 9, 7) - - result = normalize_date(value) - assert (result == datetime(2012, 9, 7)) - - value = datetime(2012, 9, 7, 12) - - result = normalize_date(value) - assert (result == datetime(2012, 9, 7))