diff --git a/pandas/tests/arithmetic/__init__.py b/pandas/tests/arithmetic/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py
new file mode 100644
index 0000000000000..1e25d4a5224ee
--- /dev/null
+++ b/pandas/tests/arithmetic/conftest.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+import pytest
+
+import numpy as np
+import pandas as pd
+
+
+@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
+def one(request):
+    # zero-dim integer array behaves like an integer
+    return request.param
+
+
+# ------------------------------------------------------------------
+
+@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame],
+                ids=lambda x: x.__name__)
+def box(request):
+    """
+    Several array-like containers that should have effectively identical
+    behavior with respect to arithmetic operations.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[
+    pd.Index,
+    pd.Series,
+    pytest.param(pd.DataFrame,
+                 marks=pytest.mark.xfail(reason="Tries to broadcast "
+                                                "incorrectly",
+                                         strict=True, raises=ValueError))
+], ids=lambda x: x.__name__)
+def box_df_broadcast_failure(request):
+    """
+    Fixture equivalent to `box` but with the common failing case where
+    the DataFrame operation tries to broadcast incorrectly.
+    """
+    return request.param
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
new file mode 100644
index 0000000000000..b09d3c3183803
--- /dev/null
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -0,0 +1,1386 @@
+# -*- coding: utf-8 -*-
+# Arithmetc tests for DataFrame/Series/Index/Array classes that should
+# behave identically.
+# Specifically for datetime64 and datetime64tz dtypes
+import operator
+from datetime import datetime, timedelta
+import warnings
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+
+from pandas.compat.numpy import np_datetime64_compat
+from pandas.errors import PerformanceWarning, NullFrequencyError
+
+from pandas._libs.tslibs.conversion import localize_pydatetime
+from pandas._libs.tslibs.offsets import shift_months
+
+from pandas.core import ops
+
+from pandas import (
+    Timestamp, Timedelta, Period, Series, date_range,
+    DatetimeIndex, TimedeltaIndex)
+
+
+# ------------------------------------------------------------------
+# Fixtures
+
+@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2),
+                        np.timedelta64(2, 'h'), Timedelta(hours=2)],
+                ids=str)
+def delta(request):
+    # Several ways of representing two hours
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        datetime(2011, 1, 1),
+        DatetimeIndex(['2011-01-01', '2011-01-02']),
+        DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'),
+        np.datetime64('2011-01-01'),
+        Timestamp('2011-01-01')],
+    ids=lambda x: type(x).__name__)
+def addend(request):
+    return request.param
+
+
+# ------------------------------------------------------------------
+# Comparisons
+
+class TestDatetime64DataFrameComparison(object):
+    @pytest.mark.parametrize('timestamps', [
+        [pd.Timestamp('2012-01-01 13:00:00+00:00')] * 2,
+        [pd.Timestamp('2012-01-01 13:00:00')] * 2])
+    def test_tz_aware_scalar_comparison(self, timestamps):
+        # GH#15966
+        df = pd.DataFrame({'test': timestamps})
+        expected = pd.DataFrame({'test': [False, False]})
+        tm.assert_frame_equal(df == -1, expected)
+
+
+class TestDatetime64SeriesComparison(object):
+    def test_dt64_ser_cmp_date_warning(self):
+        # https://github.com/pandas-dev/pandas/issues/21359
+        # Remove this test and enble invalid test below
+        ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
+        date = ser.iloc[0].to_pydatetime().date()
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            result = ser == date
+        expected = pd.Series([True] + [False] * 9, name='dates')
+        tm.assert_series_equal(result, expected)
+        assert "Comparing Series of datetimes " in str(m[0].message)
+        assert "will not compare equal" in str(m[0].message)
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            result = ser != date
+        tm.assert_series_equal(result, ~expected)
+        assert "will not compare equal" in str(m[0].message)
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            result = ser <= date
+        tm.assert_series_equal(result, expected)
+        assert "a TypeError will be raised" in str(m[0].message)
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            result = ser < date
+        tm.assert_series_equal(result, pd.Series([False] * 10, name='dates'))
+        assert "a TypeError will be raised" in str(m[0].message)
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            result = ser >= date
+        tm.assert_series_equal(result, pd.Series([True] * 10, name='dates'))
+        assert "a TypeError will be raised" in str(m[0].message)
+
+        with tm.assert_produces_warning(FutureWarning) as m:
+            result = ser > date
+        tm.assert_series_equal(result, pd.Series([False] + [True] * 9,
+                                                 name='dates'))
+        assert "a TypeError will be raised" in str(m[0].message)
+
+    @pytest.mark.skip(reason="GH#21359")
+    def test_dt64ser_cmp_date_invalid(self):
+        # GH#19800 datetime.date comparison raises to
+        # match DatetimeIndex/Timestamp.  This also matches the behavior
+        # of stdlib datetime.datetime
+        ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
+        date = ser.iloc[0].to_pydatetime().date()
+        assert not (ser == date).any()
+        assert (ser != date).all()
+        with pytest.raises(TypeError):
+            ser > date
+        with pytest.raises(TypeError):
+            ser < date
+        with pytest.raises(TypeError):
+            ser >= date
+        with pytest.raises(TypeError):
+            ser <= date
+
+    def test_dt64ser_cmp_period_scalar(self):
+        ser = Series(pd.period_range('2000-01-01', periods=10, freq='D'))
+        val = Period('2000-01-04', freq='D')
+        result = ser > val
+        expected = Series([x > val for x in ser])
+        tm.assert_series_equal(result, expected)
+
+        val = ser[5]
+        result = ser > val
+        expected = Series([x > val for x in ser])
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("left,right", [
+        ("lt", "gt"),
+        ("le", "ge"),
+        ("eq", "eq"),
+        ("ne", "ne"),
+    ])
+    def test_timestamp_compare_series(self, left, right):
+        # see gh-4982
+        # Make sure we can compare Timestamps on the right AND left hand side.
+        ser = pd.Series(pd.date_range("20010101", periods=10), name="dates")
+        s_nat = ser.copy(deep=True)
+
+        ser[0] = pd.Timestamp("nat")
+        ser[3] = pd.Timestamp("nat")
+
+        left_f = getattr(operator, left)
+        right_f = getattr(operator, right)
+
+        # No NaT
+        expected = left_f(ser, pd.Timestamp("20010109"))
+        result = right_f(pd.Timestamp("20010109"), ser)
+        tm.assert_series_equal(result, expected)
+
+        # NaT
+        expected = left_f(ser, pd.Timestamp("nat"))
+        result = right_f(pd.Timestamp("nat"), ser)
+        tm.assert_series_equal(result, expected)
+
+        # Compare to Timestamp with series containing NaT
+        expected = left_f(s_nat, pd.Timestamp("20010109"))
+        result = right_f(pd.Timestamp("20010109"), s_nat)
+        tm.assert_series_equal(result, expected)
+
+        # Compare to NaT with series containing NaT
+        expected = left_f(s_nat, pd.Timestamp("nat"))
+        result = right_f(pd.Timestamp("nat"), s_nat)
+        tm.assert_series_equal(result, expected)
+
+    def test_timestamp_equality(self):
+        # GH#11034
+        ser = pd.Series([pd.Timestamp('2000-01-29 01:59:00'), 'NaT'])
+        result = ser != ser
+        tm.assert_series_equal(result, pd.Series([False, True]))
+        result = ser != ser[0]
+        tm.assert_series_equal(result, pd.Series([False, True]))
+        result = ser != ser[1]
+        tm.assert_series_equal(result, pd.Series([True, True]))
+
+        result = ser == ser
+        tm.assert_series_equal(result, pd.Series([True, False]))
+        result = ser == ser[0]
+        tm.assert_series_equal(result, pd.Series([True, False]))
+        result = ser == ser[1]
+        tm.assert_series_equal(result, pd.Series([False, False]))
+
+
+class TestDatetimeIndexComparisons(object):
+    @pytest.mark.parametrize('other', [datetime(2016, 1, 1),
+                                       Timestamp('2016-01-01'),
+                                       np.datetime64('2016-01-01')])
+    def test_dti_cmp_datetimelike(self, other, tz_naive_fixture):
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
+        if tz is not None:
+            if isinstance(other, np.datetime64):
+                # no tzaware version available
+                return
+            other = localize_pydatetime(other, dti.tzinfo)
+
+        result = dti == other
+        expected = np.array([True, False])
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = dti > other
+        expected = np.array([False, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = dti >= other
+        expected = np.array([True, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = dti < other
+        expected = np.array([False, False])
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = dti <= other
+        expected = np.array([True, False])
+        tm.assert_numpy_array_equal(result, expected)
+
+    def dti_cmp_non_datetime(self, tz_naive_fixture):
+        # GH#19301 by convention datetime.date is not considered comparable
+        # to Timestamp or DatetimeIndex.  This may change in the future.
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
+
+        other = datetime(2016, 1, 1).date()
+        assert not (dti == other).any()
+        assert (dti != other).all()
+        with pytest.raises(TypeError):
+            dti < other
+        with pytest.raises(TypeError):
+            dti <= other
+        with pytest.raises(TypeError):
+            dti > other
+        with pytest.raises(TypeError):
+            dti >= other
+
+    @pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
+    def test_dti_eq_null_scalar(self, other, tz_naive_fixture):
+        # GH#19301
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
+        assert not (dti == other).any()
+
+    @pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
+    def test_dti_ne_null_scalar(self, other, tz_naive_fixture):
+        # GH#19301
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
+        assert (dti != other).all()
+
+    @pytest.mark.parametrize('other', [None, np.nan])
+    def test_dti_cmp_null_scalar_inequality(self, tz_naive_fixture, other):
+        # GH#19301
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
+
+        with pytest.raises(TypeError):
+            dti < other
+        with pytest.raises(TypeError):
+            dti <= other
+        with pytest.raises(TypeError):
+            dti > other
+        with pytest.raises(TypeError):
+            dti >= other
+
+    def test_dti_cmp_nat(self):
+        left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT,
+                                 pd.Timestamp('2011-01-03')])
+        right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')])
+
+        for lhs, rhs in [(left, right),
+                         (left.astype(object), right.astype(object))]:
+            result = rhs == lhs
+            expected = np.array([False, False, True])
+            tm.assert_numpy_array_equal(result, expected)
+
+            result = lhs != rhs
+            expected = np.array([True, True, False])
+            tm.assert_numpy_array_equal(result, expected)
+
+            expected = np.array([False, False, False])
+            tm.assert_numpy_array_equal(lhs == pd.NaT, expected)
+            tm.assert_numpy_array_equal(pd.NaT == rhs, expected)
+
+            expected = np.array([True, True, True])
+            tm.assert_numpy_array_equal(lhs != pd.NaT, expected)
+            tm.assert_numpy_array_equal(pd.NaT != lhs, expected)
+
+            expected = np.array([False, False, False])
+            tm.assert_numpy_array_equal(lhs < pd.NaT, expected)
+            tm.assert_numpy_array_equal(pd.NaT > lhs, expected)
+
+    def test_dti_cmp_nat_behaves_like_float_cmp_nan(self):
+        fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0])
+        fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0])
+
+        didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT,
+                                  '2014-05-01', '2014-07-01'])
+        didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT,
+                                  '2014-06-01', '2014-07-01'])
+        darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'),
+                         np_datetime64_compat('2014-03-01 00:00Z'),
+                         np_datetime64_compat('nat'), np.datetime64('nat'),
+                         np_datetime64_compat('2014-06-01 00:00Z'),
+                         np_datetime64_compat('2014-07-01 00:00Z')])
+
+        cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]
+
+        # Check pd.NaT is handles as the same as np.nan
+        with tm.assert_produces_warning(None):
+            for idx1, idx2 in cases:
+
+                result = idx1 < idx2
+                expected = np.array([True, False, False, False, True, False])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx2 > idx1
+                expected = np.array([True, False, False, False, True, False])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 <= idx2
+                expected = np.array([True, False, False, False, True, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx2 >= idx1
+                expected = np.array([True, False, False, False, True, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 == idx2
+                expected = np.array([False, False, False, False, False, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 != idx2
+                expected = np.array([True, True, True, True, True, False])
+                tm.assert_numpy_array_equal(result, expected)
+
+        with tm.assert_produces_warning(None):
+            for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]:
+                result = idx1 < val
+                expected = np.array([False, False, False, False, False, False])
+                tm.assert_numpy_array_equal(result, expected)
+                result = idx1 > val
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 <= val
+                tm.assert_numpy_array_equal(result, expected)
+                result = idx1 >= val
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 == val
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 != val
+                expected = np.array([True, True, True, True, True, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+        # Check pd.NaT is handles as the same as np.nan
+        with tm.assert_produces_warning(None):
+            for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]:
+                result = idx1 < val
+                expected = np.array([True, False, False, False, False, False])
+                tm.assert_numpy_array_equal(result, expected)
+                result = idx1 > val
+                expected = np.array([False, False, False, False, True, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 <= val
+                expected = np.array([True, False, True, False, False, False])
+                tm.assert_numpy_array_equal(result, expected)
+                result = idx1 >= val
+                expected = np.array([False, False, True, False, True, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 == val
+                expected = np.array([False, False, True, False, False, False])
+                tm.assert_numpy_array_equal(result, expected)
+
+                result = idx1 != val
+                expected = np.array([True, True, False, True, True, True])
+                tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize('op', [operator.eq, operator.ne,
+                                    operator.gt, operator.ge,
+                                    operator.lt, operator.le])
+    def test_comparison_tzawareness_compat(self, op):
+        # GH#18162
+        dr = pd.date_range('2016-01-01', periods=6)
+        dz = dr.tz_localize('US/Pacific')
+
+        with pytest.raises(TypeError):
+            op(dr, dz)
+        with pytest.raises(TypeError):
+            op(dr, list(dz))
+        with pytest.raises(TypeError):
+            op(dz, dr)
+        with pytest.raises(TypeError):
+            op(dz, list(dr))
+
+        # Check that there isn't a problem aware-aware and naive-naive do not
+        # raise
+        assert (dr == dr).all()
+        assert (dr == list(dr)).all()
+        assert (dz == dz).all()
+        assert (dz == list(dz)).all()
+
+        # Check comparisons against scalar Timestamps
+        ts = pd.Timestamp('2000-03-14 01:59')
+        ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam')
+
+        assert (dr > ts).all()
+        with pytest.raises(TypeError):
+            op(dr, ts_tz)
+
+        assert (dz > ts_tz).all()
+        with pytest.raises(TypeError):
+            op(dz, ts)
+
+        # GH#12601: Check comparison against Timestamps and DatetimeIndex
+        with pytest.raises(TypeError):
+            op(ts, dz)
+
+    @pytest.mark.parametrize('op', [operator.eq, operator.ne,
+                                    operator.gt, operator.ge,
+                                    operator.lt, operator.le])
+    @pytest.mark.parametrize('other', [datetime(2016, 1, 1),
+                                       Timestamp('2016-01-01'),
+                                       np.datetime64('2016-01-01')])
+    def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture):
+        tz = tz_aware_fixture
+        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
+        with pytest.raises(TypeError):
+            op(dti, other)
+        with pytest.raises(TypeError):
+            op(other, dti)
+
+    @pytest.mark.parametrize('op', [operator.eq, operator.ne,
+                                    operator.gt, operator.ge,
+                                    operator.lt, operator.le])
+    def test_nat_comparison_tzawareness(self, op):
+        # GH#19276
+        # tzaware DatetimeIndex should not raise when compared to NaT
+        dti = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT,
+                                '2014-05-01', '2014-07-01'])
+        expected = np.array([op == operator.ne] * len(dti))
+        result = op(dti, pd.NaT)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = op(dti.tz_localize('US/Pacific'), pd.NaT)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_dti_cmp_str(self, tz_naive_fixture):
+        # GH#22074
+        # regardless of tz, we expect these comparisons are valid
+        tz = tz_naive_fixture
+        rng = date_range('1/1/2000', periods=10, tz=tz)
+        other = '1/1/2000'
+
+        result = rng == other
+        expected = np.array([True] + [False] * 9)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = rng != other
+        expected = np.array([False] + [True] * 9)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = rng < other
+        expected = np.array([False] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = rng <= other
+        expected = np.array([True] + [False] * 9)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = rng > other
+        expected = np.array([False] + [True] * 9)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = rng >= other
+        expected = np.array([True] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize('other', ['foo', 99, 4.0,
+                                       object(), timedelta(days=2)])
+    def test_dti_cmp_scalar_invalid(self, other, tz_naive_fixture):
+        # GH#22074
+        tz = tz_naive_fixture
+        rng = date_range('1/1/2000', periods=10, tz=tz)
+
+        result = rng == other
+        expected = np.array([False] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = rng != other
+        expected = np.array([True] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            rng < other
+        with pytest.raises(TypeError):
+            rng <= other
+        with pytest.raises(TypeError):
+            rng > other
+        with pytest.raises(TypeError):
+            rng >= other
+
+    def test_dti_cmp_list(self):
+        rng = date_range('1/1/2000', periods=10)
+
+        result = rng == list(rng)
+        expected = rng == rng
+        tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize('other', [
+        pd.timedelta_range('1D', periods=10),
+        pd.timedelta_range('1D', periods=10).to_series(),
+        pd.timedelta_range('1D', periods=10).asi8.view('m8[ns]')
+    ], ids=lambda x: type(x).__name__)
+    def test_dti_cmp_tdi_tzawareness(self, other):
+        # GH#22074
+        # reversion test that we _don't_ call _assert_tzawareness_compat
+        # when comparing against TimedeltaIndex
+        dti = date_range('2000-01-01', periods=10, tz='Asia/Tokyo')
+
+        result = dti == other
+        expected = np.array([False] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = dti != other
+        expected = np.array([True] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            dti < other
+        with pytest.raises(TypeError):
+            dti <= other
+        with pytest.raises(TypeError):
+            dti > other
+        with pytest.raises(TypeError):
+            dti >= other
+
+    def test_dti_cmp_object_dtype(self):
+        # GH#22074
+        dti = date_range('2000-01-01', periods=10, tz='Asia/Tokyo')
+
+        other = dti.astype('O')
+
+        result = dti == other
+        expected = np.array([True] * 10)
+        tm.assert_numpy_array_equal(result, expected)
+
+        other = dti.tz_localize(None)
+        with pytest.raises(TypeError):
+            # tzawareness failure
+            dti != other
+
+        other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5)
+        result = dti == other
+        expected = np.array([True] * 5 + [False] * 5)
+        tm.assert_numpy_array_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            dti >= other
+
+
+# ------------------------------------------------------------------
+# Arithmetic
+
+class TestFrameArithmetic(object):
+
+    @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
+                       strict=True)
+    def test_df_sub_datetime64_not_ns(self):
+        df = pd.DataFrame(pd.date_range('20130101', periods=3))
+        dt64 = np.datetime64('2013-01-01')
+        assert dt64.dtype == 'datetime64[D]'
+        res = df - dt64
+        expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1),
+                                 pd.Timedelta(days=2)])
+        tm.assert_frame_equal(res, expected)
+
+
+class TestTimestampSeriesArithmetic(object):
+    def test_timestamp_sub_series(self):
+        ser = pd.Series(pd.date_range('2014-03-17', periods=2, freq='D',
+                                      tz='US/Eastern'))
+        ts = ser[0]
+
+        delta_series = pd.Series([np.timedelta64(0, 'D'),
+                                  np.timedelta64(1, 'D')])
+        tm.assert_series_equal(ser - ts, delta_series)
+        tm.assert_series_equal(ts - ser, -delta_series)
+
+    def test_dt64ser_sub_datetime_dtype(self):
+        ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00))
+        dt = datetime(1993, 6, 22, 13, 30)
+        ser = Series([ts])
+        result = pd.to_timedelta(np.abs(ser - dt))
+        assert result.dtype == 'timedelta64[ns]'
+
+
+class TestDatetimeIndexArithmetic(object):
+
+    # -------------------------------------------------------------
+    # Invalid Operations
+
+    @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
+    @pytest.mark.parametrize('op', [operator.add, ops.radd,
+                                    operator.sub, ops.rsub])
+    def test_dti_add_sub_float(self, op, other):
+        dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
+        with pytest.raises(TypeError):
+            op(dti, other)
+
+    def test_dti_add_timestamp_raises(self):
+        idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
+        msg = "cannot add DatetimeIndex and Timestamp"
+        with tm.assert_raises_regex(TypeError, msg):
+            idx + Timestamp('2011-01-01')
+
+    def test_dti_radd_timestamp_raises(self):
+        idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
+        msg = "cannot add DatetimeIndex and Timestamp"
+        with tm.assert_raises_regex(TypeError, msg):
+            Timestamp('2011-01-01') + idx
+
+    # -------------------------------------------------------------
+    # Binary operations DatetimeIndex and int
+
+    def test_dti_add_int(self, tz_naive_fixture, one):
+        # Variants of `one` for #19012
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01 09:00', freq='H',
+                            periods=10, tz=tz)
+        result = rng + one
+        expected = pd.date_range('2000-01-01 10:00', freq='H',
+                                 periods=10, tz=tz)
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_iadd_int(self, tz_naive_fixture, one):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01 09:00', freq='H',
+                            periods=10, tz=tz)
+        expected = pd.date_range('2000-01-01 10:00', freq='H',
+                                 periods=10, tz=tz)
+        rng += one
+        tm.assert_index_equal(rng, expected)
+
+    def test_dti_sub_int(self, tz_naive_fixture, one):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01 09:00', freq='H',
+                            periods=10, tz=tz)
+        result = rng - one
+        expected = pd.date_range('2000-01-01 08:00', freq='H',
+                                 periods=10, tz=tz)
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_isub_int(self, tz_naive_fixture, one):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01 09:00', freq='H',
+                            periods=10, tz=tz)
+        expected = pd.date_range('2000-01-01 08:00', freq='H',
+                                 periods=10, tz=tz)
+        rng -= one
+        tm.assert_index_equal(rng, expected)
+
+    # -------------------------------------------------------------
+    # __add__/__sub__ with integer arrays
+
+    @pytest.mark.parametrize('freq', ['H', 'D'])
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    def test_dti_add_intarray_tick(self, box, freq):
+        # GH#19959
+        dti = pd.date_range('2016-01-01', periods=2, freq=freq)
+        other = box([4, -1])
+        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))])
+        result = dti + other
+        tm.assert_index_equal(result, expected)
+        result = other + dti
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('freq', ['W', 'M', 'MS', 'Q'])
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    def test_dti_add_intarray_non_tick(self, box, freq):
+        # GH#19959
+        dti = pd.date_range('2016-01-01', periods=2, freq=freq)
+        other = box([4, -1])
+        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))])
+        with tm.assert_produces_warning(PerformanceWarning):
+            result = dti + other
+        tm.assert_index_equal(result, expected)
+        with tm.assert_produces_warning(PerformanceWarning):
+            result = other + dti
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    def test_dti_add_intarray_no_freq(self, box):
+        # GH#19959
+        dti = pd.DatetimeIndex(['2016-01-01', 'NaT', '2017-04-05 06:07:08'])
+        other = box([9, 4, -1])
+        with pytest.raises(NullFrequencyError):
+            dti + other
+        with pytest.raises(NullFrequencyError):
+            other + dti
+        with pytest.raises(NullFrequencyError):
+            dti - other
+        with pytest.raises(TypeError):
+            other - dti
+
+    # -------------------------------------------------------------
+    # Binary operations DatetimeIndex and timedelta-like
+
+    def test_dti_add_timedeltalike(self, tz_naive_fixture, delta):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
+        result = rng + delta
+        expected = pd.date_range('2000-01-01 02:00',
+                                 '2000-02-01 02:00', tz=tz)
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
+        expected = pd.date_range('2000-01-01 02:00',
+                                 '2000-02-01 02:00', tz=tz)
+        rng += delta
+        tm.assert_index_equal(rng, expected)
+
+    def test_dti_sub_timedeltalike(self, tz_naive_fixture, delta):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
+        expected = pd.date_range('1999-12-31 22:00',
+                                 '2000-01-31 22:00', tz=tz)
+        result = rng - delta
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_isub_timedeltalike(self, tz_naive_fixture, delta):
+        tz = tz_naive_fixture
+        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
+        expected = pd.date_range('1999-12-31 22:00',
+                                 '2000-01-31 22:00', tz=tz)
+        rng -= delta
+        tm.assert_index_equal(rng, expected)
+
+    # -------------------------------------------------------------
+    # Binary operations DatetimeIndex and TimedeltaIndex/array
+    def test_dti_add_tdi(self, tz_naive_fixture):
+        # GH#17558
+        tz = tz_naive_fixture
+        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        tdi = pd.timedelta_range('0 days', periods=10)
+        expected = pd.date_range('2017-01-01', periods=10, tz=tz)
+
+        # add with TimdeltaIndex
+        result = dti + tdi
+        tm.assert_index_equal(result, expected)
+
+        result = tdi + dti
+        tm.assert_index_equal(result, expected)
+
+        # add with timedelta64 array
+        result = dti + tdi.values
+        tm.assert_index_equal(result, expected)
+
+        result = tdi.values + dti
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_iadd_tdi(self, tz_naive_fixture):
+        # GH#17558
+        tz = tz_naive_fixture
+        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        tdi = pd.timedelta_range('0 days', periods=10)
+        expected = pd.date_range('2017-01-01', periods=10, tz=tz)
+
+        # iadd with TimdeltaIndex
+        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        result += tdi
+        tm.assert_index_equal(result, expected)
+
+        result = pd.timedelta_range('0 days', periods=10)
+        result += dti
+        tm.assert_index_equal(result, expected)
+
+        # iadd with timedelta64 array
+        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        result += tdi.values
+        tm.assert_index_equal(result, expected)
+
+        result = pd.timedelta_range('0 days', periods=10)
+        result += dti
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_sub_tdi(self, tz_naive_fixture):
+        # GH#17558
+        tz = tz_naive_fixture
+        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        tdi = pd.timedelta_range('0 days', periods=10)
+        expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D')
+
+        # sub with TimedeltaIndex
+        result = dti - tdi
+        tm.assert_index_equal(result, expected)
+
+        msg = 'cannot subtract .*TimedeltaIndex'
+        with tm.assert_raises_regex(TypeError, msg):
+            tdi - dti
+
+        # sub with timedelta64 array
+        result = dti - tdi.values
+        tm.assert_index_equal(result, expected)
+
+        msg = 'cannot subtract DatetimeIndex from'
+        with tm.assert_raises_regex(TypeError, msg):
+            tdi.values - dti
+
+    def test_dti_isub_tdi(self, tz_naive_fixture):
+        # GH#17558
+        tz = tz_naive_fixture
+        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        tdi = pd.timedelta_range('0 days', periods=10)
+        expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D')
+
+        # isub with TimedeltaIndex
+        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        result -= tdi
+        tm.assert_index_equal(result, expected)
+
+        msg = 'cannot subtract .*TimedeltaIndex'
+        with tm.assert_raises_regex(TypeError, msg):
+            tdi -= dti
+
+        # isub with timedelta64 array
+        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
+        result -= tdi.values
+        tm.assert_index_equal(result, expected)
+
+        msg = '|'.join(['cannot perform __neg__ with this index type:',
+                        'ufunc subtract cannot use operands with types',
+                        'cannot subtract DatetimeIndex from'])
+        with tm.assert_raises_regex(TypeError, msg):
+            tdi.values -= dti
+
+    # -------------------------------------------------------------
+    # Binary Operations DatetimeIndex and datetime-like
+    # TODO: A couple other tests belong in this section.  Move them in
+    # A PR where there isn't already a giant diff.
+
+    def test_add_datetimelike_and_dti(self, addend):
+        # GH#9631
+        dti = DatetimeIndex(['2011-01-01', '2011-01-02'])
+        msg = 'cannot add DatetimeIndex and {0}'.format(
+            type(addend).__name__)
+        with tm.assert_raises_regex(TypeError, msg):
+            dti + addend
+        with tm.assert_raises_regex(TypeError, msg):
+            addend + dti
+
+    def test_add_datetimelike_and_dti_tz(self, addend):
+        # GH#9631
+        dti_tz = DatetimeIndex(['2011-01-01',
+                                '2011-01-02']).tz_localize('US/Eastern')
+        msg = 'cannot add DatetimeIndex and {0}'.format(
+            type(addend).__name__)
+        with tm.assert_raises_regex(TypeError, msg):
+            dti_tz + addend
+        with tm.assert_raises_regex(TypeError, msg):
+            addend + dti_tz
+
+    # -------------------------------------------------------------
+    # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64]
+
+    def test_dti_add_dt64_array_raises(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
+        dtarr = dti.values
+
+        with pytest.raises(TypeError):
+            dti + dtarr
+        with pytest.raises(TypeError):
+            dtarr + dti
+
+    def test_dti_sub_dt64_array_naive(self):
+        dti = pd.date_range('2016-01-01', periods=3, tz=None)
+        dtarr = dti.values
+
+        expected = dti - dti
+        result = dti - dtarr
+        tm.assert_index_equal(result, expected)
+        result = dtarr - dti
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_sub_dt64_array_aware_raises(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        if tz is None:
+            return
+        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
+        dtarr = dti.values
+
+        with pytest.raises(TypeError):
+            dti - dtarr
+        with pytest.raises(TypeError):
+            dtarr - dti
+
+    def test_dti_add_td64_array(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
+        tdi = pd.TimedeltaIndex(['-1 Day', '-1 Day', '-1 Day'])
+        tdarr = tdi.values
+
+        expected = dti + tdi
+        result = dti + tdarr
+        tm.assert_index_equal(result, expected)
+        result = tdarr + dti
+        tm.assert_index_equal(result, expected)
+
+    def test_dti_sub_td64_array(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
+        tdi = pd.TimedeltaIndex(['-1 Day', '-1 Day', '-1 Day'])
+        tdarr = tdi.values
+
+        expected = dti - tdi
+        result = dti - tdarr
+        tm.assert_index_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            tdarr - dti
+
+    # -------------------------------------------------------------
+
+    def test_sub_dti_dti(self):
+        # previously performed setop (deprecated in 0.16.0), now changed to
+        # return subtraction -> TimeDeltaIndex (GH ...)
+
+        dti = date_range('20130101', periods=3)
+        dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern')
+        dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC')
+        expected = TimedeltaIndex([0, 0, 0])
+
+        result = dti - dti
+        tm.assert_index_equal(result, expected)
+
+        result = dti_tz - dti_tz
+        tm.assert_index_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            dti_tz - dti
+
+        with pytest.raises(TypeError):
+            dti - dti_tz
+
+        with pytest.raises(TypeError):
+            dti_tz - dti_tz2
+
+        # isub
+        dti -= dti
+        tm.assert_index_equal(dti, expected)
+
+        # different length raises ValueError
+        dti1 = date_range('20130101', periods=3)
+        dti2 = date_range('20130101', periods=4)
+        with pytest.raises(ValueError):
+            dti1 - dti2
+
+        # NaN propagation
+        dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03'])
+        dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan])
+        expected = TimedeltaIndex(['1 days', np.nan, np.nan])
+        result = dti2 - dti1
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('freq', [None, 'D'])
+    def test_sub_period(self, freq, box):
+        # GH#13078
+        # not supported, check TypeError
+        p = pd.Period('2011-01-01', freq='D')
+
+        idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq)
+        idx = tm.box_expected(idx, box)
+
+        with pytest.raises(TypeError):
+            idx - p
+
+        with pytest.raises(TypeError):
+            p - idx
+
+    @pytest.mark.parametrize('box', [
+        pd.Index,
+        pd.Series,
+        pytest.param(pd.DataFrame,
+                     marks=pytest.mark.xfail(reason="Tries to broadcast "
+                                                    "incorrectly",
+                                             strict=True,
+                                             raises=ValueError))
+    ], ids=lambda x: x.__name__)
+    @pytest.mark.parametrize('op', [operator.add, ops.radd,
+                                    operator.sub, ops.rsub])
+    @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H'])
+    @pytest.mark.parametrize('dti_freq', [None, 'D'])
+    def test_dti_sub_pi(self, dti_freq, pi_freq, op, box):
+        # GH#20049 subtracting PeriodIndex should raise TypeError
+        dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=dti_freq)
+        pi = dti.to_period(pi_freq)
+
+        dti = tm.box_expected(dti, box)
+        # TODO: Also box pi?
+        with pytest.raises(TypeError):
+            op(dti, pi)
+
+    # -------------------------------------------------------------
+
+    def test_ufunc_coercions(self):
+        idx = date_range('2011-01-01', periods=3, freq='2D', name='x')
+
+        delta = np.timedelta64(1, 'D')
+        for result in [idx + delta, np.add(idx, delta)]:
+            assert isinstance(result, DatetimeIndex)
+            exp = date_range('2011-01-02', periods=3, freq='2D', name='x')
+            tm.assert_index_equal(result, exp)
+            assert result.freq == '2D'
+
+        for result in [idx - delta, np.subtract(idx, delta)]:
+            assert isinstance(result, DatetimeIndex)
+            exp = date_range('2010-12-31', periods=3, freq='2D', name='x')
+            tm.assert_index_equal(result, exp)
+            assert result.freq == '2D'
+
+        delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'),
+                          np.timedelta64(3, 'D')])
+        for result in [idx + delta, np.add(idx, delta)]:
+            assert isinstance(result, DatetimeIndex)
+            exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'],
+                                freq='3D', name='x')
+            tm.assert_index_equal(result, exp)
+            assert result.freq == '3D'
+
+        for result in [idx - delta, np.subtract(idx, delta)]:
+            assert isinstance(result, DatetimeIndex)
+            exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'],
+                                freq='D', name='x')
+            tm.assert_index_equal(result, exp)
+            assert result.freq == 'D'
+
+    def test_datetimeindex_sub_timestamp_overflow(self):
+        dtimax = pd.to_datetime(['now', pd.Timestamp.max])
+        dtimin = pd.to_datetime(['now', pd.Timestamp.min])
+
+        tsneg = Timestamp('1950-01-01')
+        ts_neg_variants = [tsneg,
+                           tsneg.to_pydatetime(),
+                           tsneg.to_datetime64().astype('datetime64[ns]'),
+                           tsneg.to_datetime64().astype('datetime64[D]')]
+
+        tspos = Timestamp('1980-01-01')
+        ts_pos_variants = [tspos,
+                           tspos.to_pydatetime(),
+                           tspos.to_datetime64().astype('datetime64[ns]'),
+                           tspos.to_datetime64().astype('datetime64[D]')]
+
+        for variant in ts_neg_variants:
+            with pytest.raises(OverflowError):
+                dtimax - variant
+
+        expected = pd.Timestamp.max.value - tspos.value
+        for variant in ts_pos_variants:
+            res = dtimax - variant
+            assert res[1].value == expected
+
+        expected = pd.Timestamp.min.value - tsneg.value
+        for variant in ts_neg_variants:
+            res = dtimin - variant
+            assert res[1].value == expected
+
+        for variant in ts_pos_variants:
+            with pytest.raises(OverflowError):
+                dtimin - variant
+
+    @pytest.mark.parametrize('names', [('foo', None, None),
+                                       ('baz', 'bar', None),
+                                       ('bar', 'bar', 'bar')])
+    @pytest.mark.parametrize('tz', [None, 'America/Chicago'])
+    def test_dti_add_series(self, tz, names):
+        # GH#13905
+        index = DatetimeIndex(['2016-06-28 05:30', '2016-06-28 05:31'],
+                              tz=tz, name=names[0])
+        ser = Series([Timedelta(seconds=5)] * 2,
+                     index=index, name=names[1])
+        expected = Series(index + Timedelta(seconds=5),
+                          index=index, name=names[2])
+
+        # passing name arg isn't enough when names[2] is None
+        expected.name = names[2]
+        assert expected.dtype == index.dtype
+        result = ser + index
+        tm.assert_series_equal(result, expected)
+        result2 = index + ser
+        tm.assert_series_equal(result2, expected)
+
+        expected = index + Timedelta(seconds=5)
+        result3 = ser.values + index
+        tm.assert_index_equal(result3, expected)
+        result4 = index + ser.values
+        tm.assert_index_equal(result4, expected)
+
+    def test_dti_add_offset_array(self, tz_naive_fixture):
+        # GH#18849
+        tz = tz_naive_fixture
+        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
+        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = dti + other
+        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
+                                 name=dti.name, freq='infer')
+        tm.assert_index_equal(res, expected)
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res2 = other + dti
+        tm.assert_index_equal(res2, expected)
+
+    @pytest.mark.parametrize('names', [(None, None, None),
+                                       ('foo', 'bar', None),
+                                       ('foo', 'foo', 'foo')])
+    def test_dti_add_offset_index(self, tz_naive_fixture, names):
+        # GH#18849, GH#19744
+        tz = tz_naive_fixture
+        dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
+        other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
+                         name=names[1])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = dti + other
+        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
+                                 name=names[2], freq='infer')
+        tm.assert_index_equal(res, expected)
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res2 = other + dti
+        tm.assert_index_equal(res2, expected)
+
+    def test_dti_sub_offset_array(self, tz_naive_fixture):
+        # GH#18824
+        tz = tz_naive_fixture
+        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
+        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = dti - other
+        expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
+                                 name=dti.name, freq='infer')
+        tm.assert_index_equal(res, expected)
+
+    @pytest.mark.parametrize('names', [(None, None, None),
+                                       ('foo', 'bar', None),
+                                       ('foo', 'foo', 'foo')])
+    def test_dti_sub_offset_index(self, tz_naive_fixture, names):
+        # GH#18824, GH#19744
+        tz = tz_naive_fixture
+        dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
+        other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
+                         name=names[1])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = dti - other
+        expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
+                                 name=names[2], freq='infer')
+        tm.assert_index_equal(res, expected)
+
+    @pytest.mark.parametrize('names', [(None, None, None),
+                                       ('foo', 'bar', None),
+                                       ('foo', 'foo', 'foo')])
+    def test_dti_with_offset_series(self, tz_naive_fixture, names):
+        # GH#18849
+        tz = tz_naive_fixture
+        dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
+        other = Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
+                       name=names[1])
+
+        expected_add = Series([dti[n] + other[n] for n in range(len(dti))],
+                              name=names[2])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = dti + other
+        tm.assert_series_equal(res, expected_add)
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res2 = other + dti
+        tm.assert_series_equal(res2, expected_add)
+
+        expected_sub = Series([dti[n] - other[n] for n in range(len(dti))],
+                              name=names[2])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res3 = dti - other
+        tm.assert_series_equal(res3, expected_sub)
+
+    @pytest.mark.parametrize('box', [
+        pd.Index,
+        pd.Series,
+        pytest.param(pd.DataFrame,
+                     marks=pytest.mark.xfail(reason="Returns object dtype",
+                                             strict=True))
+    ], ids=lambda x: x.__name__)
+    def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
+        timezone = tz_aware_fixture
+        if timezone == 'US/Pacific':
+            dates = date_range('2012-11-01', periods=3, tz=timezone)
+            offset = dates + pd.offsets.Hour(5)
+            assert dates[0] + pd.offsets.Hour(5) == offset[0]
+
+        dates = date_range('2010-11-01 00:00',
+                           periods=3, tz=timezone, freq='H')
+        expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00',
+                                  '2010-11-01 07:00'], freq='H', tz=timezone)
+
+        dates = tm.box_expected(dates, box)
+        expected = tm.box_expected(expected, box)
+
+        # TODO: parametrize over the scalar being added?  radd?  sub?
+        offset = dates + pd.offsets.Hour(5)
+        tm.assert_equal(offset, expected)
+        offset = dates + np.timedelta64(5, 'h')
+        tm.assert_equal(offset, expected)
+        offset = dates + timedelta(hours=5)
+        tm.assert_equal(offset, expected)
+
+
+@pytest.mark.parametrize('klass,assert_func', [
+    (Series, tm.assert_series_equal),
+    (DatetimeIndex, tm.assert_index_equal)])
+def test_dt64_with_offset_array(klass, assert_func):
+    # GH#10699
+    # array of offsets
+    box = Series if klass is Series else pd.Index
+    with tm.assert_produces_warning(PerformanceWarning):
+        s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')])
+        result = s + box([pd.offsets.DateOffset(years=1),
+                          pd.offsets.MonthEnd()])
+        exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29')])
+        assert_func(result, exp)
+
+        # same offset
+        result = s + box([pd.offsets.DateOffset(years=1),
+                          pd.offsets.DateOffset(years=1)])
+        exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')])
+        assert_func(result, exp)
+
+
+@pytest.mark.parametrize('klass,assert_func', [
+    (Series, tm.assert_series_equal),
+    (DatetimeIndex, tm.assert_index_equal)])
+def test_dt64_with_DateOffsets_relativedelta(klass, assert_func):
+    # GH#10699
+    vec = klass([Timestamp('2000-01-05 00:15:00'),
+                 Timestamp('2000-01-31 00:23:00'),
+                 Timestamp('2000-01-01'),
+                 Timestamp('2000-03-31'),
+                 Timestamp('2000-02-29'),
+                 Timestamp('2000-12-31'),
+                 Timestamp('2000-05-15'),
+                 Timestamp('2001-06-15')])
+
+    # DateOffset relativedelta fastpath
+    relative_kwargs = [('years', 2), ('months', 5), ('days', 3),
+                       ('hours', 5), ('minutes', 10), ('seconds', 2),
+                       ('microseconds', 5)]
+    for i, kwd in enumerate(relative_kwargs):
+        op = pd.DateOffset(**dict([kwd]))
+        assert_func(klass([x + op for x in vec]), vec + op)
+        assert_func(klass([x - op for x in vec]), vec - op)
+        op = pd.DateOffset(**dict(relative_kwargs[:i + 1]))
+        assert_func(klass([x + op for x in vec]), vec + op)
+        assert_func(klass([x - op for x in vec]), vec - op)
+
+
+@pytest.mark.parametrize('cls_and_kwargs', [
+    'YearBegin', ('YearBegin', {'month': 5}),
+    'YearEnd', ('YearEnd', {'month': 5}),
+    'MonthBegin', 'MonthEnd',
+    'SemiMonthEnd', 'SemiMonthBegin',
+    'Week', ('Week', {'weekday': 3}),
+    'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin',
+    'CustomBusinessDay', 'CDay', 'CBMonthEnd',
+    'CBMonthBegin', 'BMonthBegin', 'BMonthEnd',
+    'BusinessHour', 'BYearBegin', 'BYearEnd',
+    'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}),
+    ('FY5253Quarter', {'qtr_with_extra_week': 1,
+                       'startingMonth': 1,
+                       'weekday': 2,
+                       'variation': 'nearest'}),
+    ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}),
+    ('WeekOfMonth', {'weekday': 2, 'week': 2}),
+    'Easter', ('DateOffset', {'day': 4}),
+    ('DateOffset', {'month': 5})])
+@pytest.mark.parametrize('normalize', [True, False])
+@pytest.mark.parametrize('klass,assert_func', [
+    (Series, tm.assert_series_equal),
+    (DatetimeIndex, tm.assert_index_equal)])
+def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs):
+    # GH#10699
+    # assert these are equal on a piecewise basis
+    vec = klass([Timestamp('2000-01-05 00:15:00'),
+                 Timestamp('2000-01-31 00:23:00'),
+                 Timestamp('2000-01-01'),
+                 Timestamp('2000-03-31'),
+                 Timestamp('2000-02-29'),
+                 Timestamp('2000-12-31'),
+                 Timestamp('2000-05-15'),
+                 Timestamp('2001-06-15')])
+
+    if isinstance(cls_and_kwargs, tuple):
+        # If cls_name param is a tuple, then 2nd entry is kwargs for
+        # the offset constructor
+        cls_name, kwargs = cls_and_kwargs
+    else:
+        cls_name = cls_and_kwargs
+        kwargs = {}
+
+    offset_cls = getattr(pd.offsets, cls_name)
+
+    with warnings.catch_warnings(record=True):
+        for n in [0, 5]:
+            if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth',
+                             'FY5253Quarter', 'FY5253'] and n == 0):
+                # passing n = 0 is invalid for these offset classes
+                continue
+
+            offset = offset_cls(n, normalize=normalize, **kwargs)
+            assert_func(klass([x + offset for x in vec]), vec + offset)
+            assert_func(klass([x - offset for x in vec]), vec - offset)
+            assert_func(klass([offset + x for x in vec]), offset + vec)
+
+
+@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex],
+                                                  [tm.assert_series_equal,
+                                                   tm.assert_index_equal]))
+def test_datetime64_with_DateOffset(klass, assert_func):
+    # GH#10699
+    s = klass(date_range('2000-01-01', '2000-01-31'), name='a')
+    result = s + pd.DateOffset(years=1)
+    result2 = pd.DateOffset(years=1) + s
+    exp = klass(date_range('2001-01-01', '2001-01-31'), name='a')
+    assert_func(result, exp)
+    assert_func(result2, exp)
+
+    result = s - pd.DateOffset(years=1)
+    exp = klass(date_range('1999-01-01', '1999-01-31'), name='a')
+    assert_func(result, exp)
+
+    s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+               pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
+    result = s + pd.offsets.Day()
+    result2 = pd.offsets.Day() + s
+    exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'),
+                 Timestamp('2000-02-16', tz='US/Central')], name='a')
+    assert_func(result, exp)
+    assert_func(result2, exp)
+
+    s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+               pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
+    result = s + pd.offsets.MonthEnd()
+    result2 = pd.offsets.MonthEnd() + s
+    exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
+                 Timestamp('2000-02-29', tz='US/Central')], name='a')
+    assert_func(result, exp)
+    assert_func(result2, exp)
+
+
+@pytest.mark.parametrize('years', [-1, 0, 1])
+@pytest.mark.parametrize('months', [-2, 0, 2])
+def test_shift_months(years, months):
+    dti = DatetimeIndex([Timestamp('2000-01-05 00:15:00'),
+                         Timestamp('2000-01-31 00:23:00'),
+                         Timestamp('2000-01-01'),
+                         Timestamp('2000-02-29'),
+                         Timestamp('2000-12-31')])
+    actual = DatetimeIndex(shift_months(dti.asi8, years * 12 + months))
+
+    raw = [x + pd.offsets.DateOffset(years=years, months=months)
+           for x in dti]
+    expected = DatetimeIndex(raw)
+    tm.assert_index_equal(actual, expected)
diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
new file mode 100644
index 0000000000000..92123bf48bb47
--- /dev/null
+++ b/pandas/tests/arithmetic/test_period.py
@@ -0,0 +1,1057 @@
+# -*- coding: utf-8 -*-
+# Arithmetc tests for DataFrame/Series/Index/Array classes that should
+# behave identically.
+# Specifically for Period dtype
+import operator
+from datetime import timedelta
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas.util.testing as tm
+
+from pandas.errors import PerformanceWarning
+from pandas._libs.tslibs.period import IncompatibleFrequency
+
+import pandas.core.indexes.period as period
+from pandas.core import ops
+from pandas import (
+    Period, PeriodIndex, period_range, Timedelta, Series,
+    _np_version_under1p10)
+
+
+# ------------------------------------------------------------------
+# Fixtures
+
+_common_mismatch = [pd.offsets.YearBegin(2),
+                    pd.offsets.MonthBegin(1),
+                    pd.offsets.Minute()]
+
+
+@pytest.fixture(params=[timedelta(minutes=30),
+                        np.timedelta64(30, 's'),
+                        Timedelta(seconds=30)] + _common_mismatch)
+def not_hourly(request):
+    """
+    Several timedelta-like and DateOffset instances that are _not_
+    compatible with Hourly frequencies.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[np.timedelta64(4, 'h'),
+                        timedelta(hours=23),
+                        Timedelta('23:00:00')] + _common_mismatch)
+def not_daily(request):
+    """
+    Several timedelta-like and DateOffset instances that are _not_
+    compatible with Daily frequencies.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[np.timedelta64(365, 'D'),
+                        timedelta(365),
+                        Timedelta(days=365)] + _common_mismatch)
+def mismatched(request):
+    """
+    Several timedelta-like and DateOffset instances that are _not_
+    compatible with Monthly or Annual frequencies.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[pd.offsets.Day(3),
+                        timedelta(days=3),
+                        np.timedelta64(3, 'D'),
+                        pd.offsets.Hour(72),
+                        timedelta(minutes=60 * 24 * 3),
+                        np.timedelta64(72, 'h'),
+                        Timedelta('72:00:00')])
+def three_days(request):
+    """
+    Several timedelta-like and DateOffset objects that each represent
+    a 3-day timedelta
+    """
+    return request.param
+
+
+@pytest.fixture(params=[pd.offsets.Hour(2),
+                        timedelta(hours=2),
+                        np.timedelta64(2, 'h'),
+                        pd.offsets.Minute(120),
+                        timedelta(minutes=120),
+                        np.timedelta64(120, 'm')])
+def two_hours(request):
+    """
+    Several timedelta-like and DateOffset objects that each represent
+    a 2-hour timedelta
+    """
+    return request.param
+
+
+# ------------------------------------------------------------------
+# Comparisons
+
+class TestPeriodIndexComparisons(object):
+    def test_pi_cmp_period(self):
+        idx = period_range('2007-01', periods=20, freq='M')
+
+        result = idx < idx[10]
+        exp = idx.values < idx.values[10]
+        tm.assert_numpy_array_equal(result, exp)
+
+    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
+    def test_pi_cmp_pi(self, freq):
+        base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
+                           freq=freq)
+        per = Period('2011-02', freq=freq)
+
+        exp = np.array([False, True, False, False])
+        tm.assert_numpy_array_equal(base == per, exp)
+        tm.assert_numpy_array_equal(per == base, exp)
+
+        exp = np.array([True, False, True, True])
+        tm.assert_numpy_array_equal(base != per, exp)
+        tm.assert_numpy_array_equal(per != base, exp)
+
+        exp = np.array([False, False, True, True])
+        tm.assert_numpy_array_equal(base > per, exp)
+        tm.assert_numpy_array_equal(per < base, exp)
+
+        exp = np.array([True, False, False, False])
+        tm.assert_numpy_array_equal(base < per, exp)
+        tm.assert_numpy_array_equal(per > base, exp)
+
+        exp = np.array([False, True, True, True])
+        tm.assert_numpy_array_equal(base >= per, exp)
+        tm.assert_numpy_array_equal(per <= base, exp)
+
+        exp = np.array([True, True, False, False])
+        tm.assert_numpy_array_equal(base <= per, exp)
+        tm.assert_numpy_array_equal(per >= base, exp)
+
+        idx = PeriodIndex(['2011-02', '2011-01', '2011-03', '2011-05'],
+                          freq=freq)
+
+        exp = np.array([False, False, True, False])
+        tm.assert_numpy_array_equal(base == idx, exp)
+
+        exp = np.array([True, True, False, True])
+        tm.assert_numpy_array_equal(base != idx, exp)
+
+        exp = np.array([False, True, False, False])
+        tm.assert_numpy_array_equal(base > idx, exp)
+
+        exp = np.array([True, False, False, True])
+        tm.assert_numpy_array_equal(base < idx, exp)
+
+        exp = np.array([False, True, True, False])
+        tm.assert_numpy_array_equal(base >= idx, exp)
+
+        exp = np.array([True, False, True, True])
+        tm.assert_numpy_array_equal(base <= idx, exp)
+
+    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
+    def test_pi_cmp_pi_mismatched_freq_raises(self, freq):
+        # different base freq
+        base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
+                           freq=freq)
+
+        msg = "Input has different freq=A-DEC from PeriodIndex"
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            base <= Period('2011', freq='A')
+
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            Period('2011', freq='A') >= base
+
+        idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A')
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            base <= idx
+
+        # Different frequency
+        msg = "Input has different freq=4M from PeriodIndex"
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            base <= Period('2011', freq='4M')
+
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            Period('2011', freq='4M') >= base
+
+        idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M')
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            base <= idx
+
+    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
+    def test_pi_cmp_nat(self, freq):
+        idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq)
+
+        result = idx1 > Period('2011-02', freq=freq)
+        exp = np.array([False, False, False, True])
+        tm.assert_numpy_array_equal(result, exp)
+        result = Period('2011-02', freq=freq) < idx1
+        tm.assert_numpy_array_equal(result, exp)
+
+        result = idx1 == Period('NaT', freq=freq)
+        exp = np.array([False, False, False, False])
+        tm.assert_numpy_array_equal(result, exp)
+        result = Period('NaT', freq=freq) == idx1
+        tm.assert_numpy_array_equal(result, exp)
+
+        result = idx1 != Period('NaT', freq=freq)
+        exp = np.array([True, True, True, True])
+        tm.assert_numpy_array_equal(result, exp)
+        result = Period('NaT', freq=freq) != idx1
+        tm.assert_numpy_array_equal(result, exp)
+
+        idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq=freq)
+        result = idx1 < idx2
+        exp = np.array([True, False, False, False])
+        tm.assert_numpy_array_equal(result, exp)
+
+        result = idx1 == idx2
+        exp = np.array([False, False, False, False])
+        tm.assert_numpy_array_equal(result, exp)
+
+        result = idx1 != idx2
+        exp = np.array([True, True, True, True])
+        tm.assert_numpy_array_equal(result, exp)
+
+        result = idx1 == idx1
+        exp = np.array([True, True, False, True])
+        tm.assert_numpy_array_equal(result, exp)
+
+        result = idx1 != idx1
+        exp = np.array([False, False, True, False])
+        tm.assert_numpy_array_equal(result, exp)
+
+    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
+    def test_pi_cmp_nat_mismatched_freq_raises(self, freq):
+        idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq)
+
+        diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M')
+        msg = "Input has different freq=4M from PeriodIndex"
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            idx1 > diff
+
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            idx1 == diff
+
+    # TODO: De-duplicate with test_pi_cmp_nat
+    @pytest.mark.parametrize('dtype', [object, None])
+    def test_comp_nat(self, dtype):
+        left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT,
+                               pd.Period('2011-01-03')])
+        right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')])
+
+        if dtype is not None:
+            left = left.astype(dtype)
+            right = right.astype(dtype)
+
+        result = left == right
+        expected = np.array([False, False, True])
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = left != right
+        expected = np.array([True, True, False])
+        tm.assert_numpy_array_equal(result, expected)
+
+        expected = np.array([False, False, False])
+        tm.assert_numpy_array_equal(left == pd.NaT, expected)
+        tm.assert_numpy_array_equal(pd.NaT == right, expected)
+
+        expected = np.array([True, True, True])
+        tm.assert_numpy_array_equal(left != pd.NaT, expected)
+        tm.assert_numpy_array_equal(pd.NaT != left, expected)
+
+        expected = np.array([False, False, False])
+        tm.assert_numpy_array_equal(left < pd.NaT, expected)
+        tm.assert_numpy_array_equal(pd.NaT > left, expected)
+
+
+class TestPeriodSeriesComparisons(object):
+    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
+    def test_cmp_series_period_scalar(self, freq):
+        # GH 13200
+        base = Series([Period(x, freq=freq) for x in
+                       ['2011-01', '2011-02', '2011-03', '2011-04']])
+        p = Period('2011-02', freq=freq)
+
+        exp = Series([False, True, False, False])
+        tm.assert_series_equal(base == p, exp)
+        tm.assert_series_equal(p == base, exp)
+
+        exp = Series([True, False, True, True])
+        tm.assert_series_equal(base != p, exp)
+        tm.assert_series_equal(p != base, exp)
+
+        exp = Series([False, False, True, True])
+        tm.assert_series_equal(base > p, exp)
+        tm.assert_series_equal(p < base, exp)
+
+        exp = Series([True, False, False, False])
+        tm.assert_series_equal(base < p, exp)
+        tm.assert_series_equal(p > base, exp)
+
+        exp = Series([False, True, True, True])
+        tm.assert_series_equal(base >= p, exp)
+        tm.assert_series_equal(p <= base, exp)
+
+        exp = Series([True, True, False, False])
+        tm.assert_series_equal(base <= p, exp)
+        tm.assert_series_equal(p >= base, exp)
+
+        # different base freq
+        msg = "Input has different freq=A-DEC from Period"
+        with tm.assert_raises_regex(IncompatibleFrequency, msg):
+            base <= Period('2011', freq='A')
+
+        with tm.assert_raises_regex(IncompatibleFrequency, msg):
+            Period('2011', freq='A') >= base
+
+    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
+    def test_cmp_series_period_series(self, freq):
+        # GH#13200
+        base = Series([Period(x, freq=freq) for x in
+                       ['2011-01', '2011-02', '2011-03', '2011-04']])
+
+        ser = Series([Period(x, freq=freq) for x in
+                      ['2011-02', '2011-01', '2011-03', '2011-05']])
+
+        exp = Series([False, False, True, False])
+        tm.assert_series_equal(base == ser, exp)
+
+        exp = Series([True, True, False, True])
+        tm.assert_series_equal(base != ser, exp)
+
+        exp = Series([False, True, False, False])
+        tm.assert_series_equal(base > ser, exp)
+
+        exp = Series([True, False, False, True])
+        tm.assert_series_equal(base < ser, exp)
+
+        exp = Series([False, True, True, False])
+        tm.assert_series_equal(base >= ser, exp)
+
+        exp = Series([True, False, True, True])
+        tm.assert_series_equal(base <= ser, exp)
+
+        ser2 = Series([Period(x, freq='A') for x in
+                       ['2011', '2011', '2011', '2011']])
+
+        # different base freq
+        msg = "Input has different freq=A-DEC from Period"
+        with tm.assert_raises_regex(IncompatibleFrequency, msg):
+            base <= ser2
+
+    def test_cmp_series_period_series_mixed_freq(self):
+        # GH#13200
+        base = Series([Period('2011', freq='A'),
+                       Period('2011-02', freq='M'),
+                       Period('2013', freq='A'),
+                       Period('2011-04', freq='M')])
+
+        ser = Series([Period('2012', freq='A'),
+                      Period('2011-01', freq='M'),
+                      Period('2013', freq='A'),
+                      Period('2011-05', freq='M')])
+
+        exp = Series([False, False, True, False])
+        tm.assert_series_equal(base == ser, exp)
+
+        exp = Series([True, True, False, True])
+        tm.assert_series_equal(base != ser, exp)
+
+        exp = Series([False, True, False, False])
+        tm.assert_series_equal(base > ser, exp)
+
+        exp = Series([True, False, False, True])
+        tm.assert_series_equal(base < ser, exp)
+
+        exp = Series([False, True, True, False])
+        tm.assert_series_equal(base >= ser, exp)
+
+        exp = Series([True, False, True, True])
+        tm.assert_series_equal(base <= ser, exp)
+
+
+# ------------------------------------------------------------------
+# Arithmetic
+
+class TestPeriodFrameArithmetic(object):
+
+    def test_ops_frame_period(self):
+        # GH#13043
+        df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'),
+                                 pd.Period('2015-02', freq='M')],
+                           'B': [pd.Period('2014-01', freq='M'),
+                                 pd.Period('2014-02', freq='M')]})
+        assert df['A'].dtype == object
+        assert df['B'].dtype == object
+
+        p = pd.Period('2015-03', freq='M')
+        off = p.freq
+        # dtype will be object because of original dtype
+        exp = pd.DataFrame({'A': np.array([2 * off, 1 * off], dtype=object),
+                            'B': np.array([14 * off, 13 * off], dtype=object)})
+        tm.assert_frame_equal(p - df, exp)
+        tm.assert_frame_equal(df - p, -1 * exp)
+
+        df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'),
+                                  pd.Period('2015-06', freq='M')],
+                            'B': [pd.Period('2015-05', freq='M'),
+                                  pd.Period('2015-06', freq='M')]})
+        assert df2['A'].dtype == object
+        assert df2['B'].dtype == object
+
+        exp = pd.DataFrame({'A': np.array([4 * off, 4 * off], dtype=object),
+                            'B': np.array([16 * off, 16 * off], dtype=object)})
+        tm.assert_frame_equal(df2 - df, exp)
+        tm.assert_frame_equal(df - df2, -1 * exp)
+
+
+class TestPeriodIndexArithmetic(object):
+    # ---------------------------------------------------------------
+    # __add__/__sub__ with PeriodIndex
+    # PeriodIndex + other is defined for integers and timedelta-like others
+    # PeriodIndex - other is defined for integers, timedelta-like others,
+    #   and PeriodIndex (with matching freq)
+
+    def test_parr_add_iadd_parr_raises(self, box_df_broadcast_failure):
+        box = box_df_broadcast_failure
+
+        rng = pd.period_range('1/1/2000', freq='D', periods=5)
+        other = pd.period_range('1/6/2000', freq='D', periods=5)
+        # TODO: parametrize over boxes for other?
+
+        rng = tm.box_expected(rng, box)
+        # An earlier implementation of PeriodIndex addition performed
+        # a set operation (union).  This has since been changed to
+        # raise a TypeError. See GH#14164 and GH#13077 for historical
+        # reference.
+        with pytest.raises(TypeError):
+            rng + other
+
+        with pytest.raises(TypeError):
+            rng += other
+
+    def test_pi_sub_isub_pi(self):
+        # GH#20049
+        # For historical reference see GH#14164, GH#13077.
+        # PeriodIndex subtraction originally performed set difference,
+        # then changed to raise TypeError before being implemented in GH#20049
+        rng = pd.period_range('1/1/2000', freq='D', periods=5)
+        other = pd.period_range('1/6/2000', freq='D', periods=5)
+
+        off = rng.freq
+        expected = pd.Index([-5 * off] * 5)
+        result = rng - other
+        tm.assert_index_equal(result, expected)
+
+        rng -= other
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_sub_pi_with_nat(self):
+        rng = pd.period_range('1/1/2000', freq='D', periods=5)
+        other = rng[1:].insert(0, pd.NaT)
+        assert other[1:].equals(rng[1:])
+
+        result = rng - other
+        off = rng.freq
+        expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off])
+        tm.assert_index_equal(result, expected)
+
+    def test_parr_sub_pi_mismatched_freq(self, box_df_broadcast_failure):
+        box = box_df_broadcast_failure
+
+        rng = pd.period_range('1/1/2000', freq='D', periods=5)
+        other = pd.period_range('1/6/2000', freq='H', periods=5)
+        # TODO: parametrize over boxes for other?
+
+        rng = tm.box_expected(rng, box)
+        with pytest.raises(period.IncompatibleFrequency):
+            rng - other
+
+    # -------------------------------------------------------------
+    # Invalid Operations
+
+    @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
+    @pytest.mark.parametrize('op', [operator.add, ops.radd,
+                                    operator.sub, ops.rsub])
+    def test_pi_add_sub_float(self, op, other):
+        dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
+        pi = dti.to_period('D')
+        with pytest.raises(TypeError):
+            op(pi, other)
+
+    # -----------------------------------------------------------------
+    # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64]
+
+    def test_pi_add_sub_dt64_array_raises(self):
+        rng = pd.period_range('1/1/2000', freq='D', periods=3)
+        dti = pd.date_range('2016-01-01', periods=3)
+        dtarr = dti.values
+
+        with pytest.raises(TypeError):
+            rng + dtarr
+        with pytest.raises(TypeError):
+            dtarr + rng
+
+        with pytest.raises(TypeError):
+            rng - dtarr
+        with pytest.raises(TypeError):
+            dtarr - rng
+
+    def test_pi_add_sub_td64_array_non_tick_raises(self):
+        rng = pd.period_range('1/1/2000', freq='Q', periods=3)
+        tdi = pd.TimedeltaIndex(['-1 Day', '-1 Day', '-1 Day'])
+        tdarr = tdi.values
+
+        with pytest.raises(period.IncompatibleFrequency):
+            rng + tdarr
+        with pytest.raises(period.IncompatibleFrequency):
+            tdarr + rng
+
+        with pytest.raises(period.IncompatibleFrequency):
+            rng - tdarr
+        with pytest.raises(period.IncompatibleFrequency):
+            tdarr - rng
+
+    @pytest.mark.xfail(reason='op with TimedeltaIndex raises, with ndarray OK',
+                       strict=True)
+    def test_pi_add_sub_td64_array_tick(self):
+        rng = pd.period_range('1/1/2000', freq='Q', periods=3)
+        tdi = pd.TimedeltaIndex(['-1 Day', '-1 Day', '-1 Day'])
+        tdarr = tdi.values
+
+        expected = rng + tdi
+        result = rng + tdarr
+        tm.assert_index_equal(result, expected)
+        result = tdarr + rng
+        tm.assert_index_equal(result, expected)
+
+        expected = rng - tdi
+        result = rng - tdarr
+        tm.assert_index_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            tdarr - rng
+
+    # -----------------------------------------------------------------
+    # operations with array/Index of DateOffset objects
+
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    def test_pi_add_offset_array(self, box):
+        # GH#18849
+        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
+        offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12),
+                    pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
+        expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = pi + offs
+        tm.assert_index_equal(res, expected)
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res2 = offs + pi
+        tm.assert_index_equal(res2, expected)
+
+        unanchored = np.array([pd.offsets.Hour(n=1),
+                               pd.offsets.Minute(n=-2)])
+        # addition/subtraction ops with incompatible offsets should issue
+        # a PerformanceWarning and _then_ raise a TypeError.
+        with pytest.raises(period.IncompatibleFrequency):
+            with tm.assert_produces_warning(PerformanceWarning):
+                pi + unanchored
+        with pytest.raises(period.IncompatibleFrequency):
+            with tm.assert_produces_warning(PerformanceWarning):
+                unanchored + pi
+
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    def test_pi_sub_offset_array(self, box):
+        # GH#18824
+        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
+        other = box([pd.offsets.QuarterEnd(n=1, startingMonth=12),
+                     pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
+
+        expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))])
+
+        with tm.assert_produces_warning(PerformanceWarning):
+            res = pi - other
+        tm.assert_index_equal(res, expected)
+
+        anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
+
+        # addition/subtraction ops with anchored offsets should issue
+        # a PerformanceWarning and _then_ raise a TypeError.
+        with pytest.raises(period.IncompatibleFrequency):
+            with tm.assert_produces_warning(PerformanceWarning):
+                pi - anchored
+        with pytest.raises(period.IncompatibleFrequency):
+            with tm.assert_produces_warning(PerformanceWarning):
+                anchored - pi
+
+    def test_pi_add_iadd_int(self, one):
+        # Variants of `one` for #19012
+        rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
+        result = rng + one
+        expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10)
+        tm.assert_index_equal(result, expected)
+        rng += one
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_sub_isub_int(self, one):
+        """
+        PeriodIndex.__sub__ and __isub__ with several representations of
+        the integer 1, e.g. int, long, np.int64, np.uint8, ...
+        """
+        rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
+        result = rng - one
+        expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10)
+        tm.assert_index_equal(result, expected)
+        rng -= one
+        tm.assert_index_equal(rng, expected)
+
+    @pytest.mark.parametrize('five', [5, np.array(5, dtype=np.int64)])
+    def test_pi_sub_intlike(self, five):
+        rng = period_range('2007-01', periods=50)
+
+        result = rng - five
+        exp = rng + (-five)
+        tm.assert_index_equal(result, exp)
+
+    def test_pi_sub_isub_offset(self):
+        # offset
+        # DateOffset
+        rng = pd.period_range('2014', '2024', freq='A')
+        result = rng - pd.offsets.YearEnd(5)
+        expected = pd.period_range('2009', '2019', freq='A')
+        tm.assert_index_equal(result, expected)
+        rng -= pd.offsets.YearEnd(5)
+        tm.assert_index_equal(rng, expected)
+
+        rng = pd.period_range('2014-01', '2016-12', freq='M')
+        result = rng - pd.offsets.MonthEnd(5)
+        expected = pd.period_range('2013-08', '2016-07', freq='M')
+        tm.assert_index_equal(result, expected)
+
+        rng -= pd.offsets.MonthEnd(5)
+        tm.assert_index_equal(rng, expected)
+
+    # ---------------------------------------------------------------
+    # __add__/__sub__ with integer arrays
+
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    @pytest.mark.parametrize('op', [operator.add, ops.radd])
+    def test_pi_add_intarray(self, box, op):
+        # GH#19959
+        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('NaT')])
+        other = box([4, -1])
+        result = op(pi, other)
+        expected = pd.PeriodIndex([pd.Period('2016Q1'), pd.Period('NaT')])
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('box', [np.array, pd.Index])
+    def test_pi_sub_intarray(self, box):
+        # GH#19959
+        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('NaT')])
+        other = box([4, -1])
+        result = pi - other
+        expected = pd.PeriodIndex([pd.Period('2014Q1'), pd.Period('NaT')])
+        tm.assert_index_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            other - pi
+
+    # ---------------------------------------------------------------
+    # Timedelta-like (timedelta, timedelta64, Timedelta, Tick)
+    # TODO: Some of these are misnomers because of non-Tick DateOffsets
+
+    def test_pi_add_iadd_timedeltalike_daily(self, three_days):
+        # Tick
+        other = three_days
+        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
+        expected = pd.period_range('2014-05-04', '2014-05-18', freq='D')
+
+        result = rng + other
+        tm.assert_index_equal(result, expected)
+
+        rng += other
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_sub_isub_timedeltalike_daily(self, three_days):
+        # Tick-like 3 Days
+        other = three_days
+        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
+        expected = pd.period_range('2014-04-28', '2014-05-12', freq='D')
+
+        result = rng - other
+        tm.assert_index_equal(result, expected)
+
+        rng -= other
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_add_iadd_timedeltalike_freq_mismatch_daily(self, not_daily):
+        other = not_daily
+        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
+        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)'
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng + other
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng += other
+
+    def test_pi_sub_timedeltalike_freq_mismatch_daily(self, not_daily):
+        other = not_daily
+        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
+        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)'
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng - other
+
+    def test_pi_add_iadd_timedeltalike_hourly(self, two_hours):
+        other = two_hours
+        rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H')
+        expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00',
+                                   freq='H')
+
+        result = rng + other
+        tm.assert_index_equal(result, expected)
+
+        rng += other
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly):
+        other = not_hourly
+        rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H')
+        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)'
+
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng + other
+
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng += other
+
+    def test_pi_sub_isub_timedeltalike_hourly(self, two_hours):
+        other = two_hours
+        rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H')
+        expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00',
+                                   freq='H')
+
+        result = rng - other
+        tm.assert_index_equal(result, expected)
+
+        rng -= other
+        tm.assert_index_equal(rng, expected)
+
+    def test_add_iadd_timedeltalike_annual(self):
+        # offset
+        # DateOffset
+        rng = pd.period_range('2014', '2024', freq='A')
+        result = rng + pd.offsets.YearEnd(5)
+        expected = pd.period_range('2019', '2029', freq='A')
+        tm.assert_index_equal(result, expected)
+        rng += pd.offsets.YearEnd(5)
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, mismatched):
+        other = mismatched
+        rng = pd.period_range('2014', '2024', freq='A')
+        msg = ('Input has different freq(=.+)? '
+               'from PeriodIndex\\(freq=A-DEC\\)')
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng + other
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng += other
+
+    def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, mismatched):
+        other = mismatched
+        rng = pd.period_range('2014', '2024', freq='A')
+        msg = ('Input has different freq(=.+)? '
+               'from PeriodIndex\\(freq=A-DEC\\)')
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng - other
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng -= other
+
+    def test_pi_add_iadd_timedeltalike_M(self):
+        rng = pd.period_range('2014-01', '2016-12', freq='M')
+        expected = pd.period_range('2014-06', '2017-05', freq='M')
+
+        result = rng + pd.offsets.MonthEnd(5)
+        tm.assert_index_equal(result, expected)
+
+        rng += pd.offsets.MonthEnd(5)
+        tm.assert_index_equal(rng, expected)
+
+    def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched):
+        other = mismatched
+        rng = pd.period_range('2014-01', '2016-12', freq='M')
+        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)'
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng + other
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng += other
+
+    def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, mismatched):
+        other = mismatched
+        rng = pd.period_range('2014-01', '2016-12', freq='M')
+        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)'
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng - other
+        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+            rng -= other
+
+
+class TestPeriodSeriesArithmetic(object):
+    def test_ops_series_timedelta(self):
+        # GH 13043
+        ser = pd.Series([pd.Period('2015-01-01', freq='D'),
+                         pd.Period('2015-01-02', freq='D')], name='xxx')
+        assert ser.dtype == object
+
+        expected = pd.Series([pd.Period('2015-01-02', freq='D'),
+                              pd.Period('2015-01-03', freq='D')], name='xxx')
+
+        result = ser + pd.Timedelta('1 days')
+        tm.assert_series_equal(result, expected)
+
+        result = pd.Timedelta('1 days') + ser
+        tm.assert_series_equal(result, expected)
+
+        result = ser + pd.tseries.offsets.Day()
+        tm.assert_series_equal(result, expected)
+
+        result = pd.tseries.offsets.Day() + ser
+        tm.assert_series_equal(result, expected)
+
+    def test_ops_series_period(self):
+        # GH 13043
+        ser = pd.Series([pd.Period('2015-01-01', freq='D'),
+                         pd.Period('2015-01-02', freq='D')], name='xxx')
+        assert ser.dtype == object
+
+        per = pd.Period('2015-01-10', freq='D')
+        off = per.freq
+        # dtype will be object because of original dtype
+        expected = pd.Series([9 * off, 8 * off], name='xxx', dtype=object)
+        tm.assert_series_equal(per - ser, expected)
+        tm.assert_series_equal(ser - per, -1 * expected)
+
+        s2 = pd.Series([pd.Period('2015-01-05', freq='D'),
+                        pd.Period('2015-01-04', freq='D')], name='xxx')
+        assert s2.dtype == object
+
+        expected = pd.Series([4 * off, 2 * off], name='xxx', dtype=object)
+        tm.assert_series_equal(s2 - ser, expected)
+        tm.assert_series_equal(ser - s2, -1 * expected)
+
+
+class TestPeriodIndexSeriesMethods(object):
+    """ Test PeriodIndex and Period Series Ops consistency """
+
+    def _check(self, values, func, expected):
+        idx = pd.PeriodIndex(values)
+        result = func(idx)
+        if isinstance(expected, pd.Index):
+            tm.assert_index_equal(result, expected)
+        else:
+            # comp op results in bool
+            tm.assert_numpy_array_equal(result, expected)
+
+        ser = pd.Series(values)
+        result = func(ser)
+
+        exp = pd.Series(expected, name=values.name)
+        tm.assert_series_equal(result, exp)
+
+    def test_pi_ops(self):
+        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
+                          freq='M', name='idx')
+
+        expected = PeriodIndex(['2011-03', '2011-04', '2011-05', '2011-06'],
+                               freq='M', name='idx')
+        self._check(idx, lambda x: x + 2, expected)
+        self._check(idx, lambda x: 2 + x, expected)
+
+        self._check(idx + 2, lambda x: x - 2, idx)
+        result = idx - Period('2011-01', freq='M')
+        off = idx.freq
+        exp = pd.Index([0 * off, 1 * off, 2 * off, 3 * off], name='idx')
+        tm.assert_index_equal(result, exp)
+
+        result = Period('2011-01', freq='M') - idx
+        exp = pd.Index([0 * off, -1 * off, -2 * off, -3 * off], name='idx')
+        tm.assert_index_equal(result, exp)
+
+    @pytest.mark.parametrize('ng', ["str", 1.5])
+    def test_pi_ops_errors(self, ng):
+        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
+                          freq='M', name='idx')
+        ser = pd.Series(idx)
+
+        msg = r"unsupported operand type\(s\)"
+
+        for obj in [idx, ser]:
+            with tm.assert_raises_regex(TypeError, msg):
+                obj + ng
+
+            with pytest.raises(TypeError):
+                # error message differs between PY2 and 3
+                ng + obj
+
+            with tm.assert_raises_regex(TypeError, msg):
+                obj - ng
+
+            with pytest.raises(TypeError):
+                np.add(obj, ng)
+
+            if _np_version_under1p10:
+                assert np.add(ng, obj) is NotImplemented
+            else:
+                with pytest.raises(TypeError):
+                    np.add(ng, obj)
+
+            with pytest.raises(TypeError):
+                np.subtract(obj, ng)
+
+            if _np_version_under1p10:
+                assert np.subtract(ng, obj) is NotImplemented
+            else:
+                with pytest.raises(TypeError):
+                    np.subtract(ng, obj)
+
+    def test_pi_ops_nat(self):
+        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
+                          freq='M', name='idx')
+        expected = PeriodIndex(['2011-03', '2011-04', 'NaT', '2011-06'],
+                               freq='M', name='idx')
+        self._check(idx, lambda x: x + 2, expected)
+        self._check(idx, lambda x: 2 + x, expected)
+        self._check(idx, lambda x: np.add(x, 2), expected)
+
+        self._check(idx + 2, lambda x: x - 2, idx)
+        self._check(idx + 2, lambda x: np.subtract(x, 2), idx)
+
+        # freq with mult
+        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
+                          freq='2M', name='idx')
+        expected = PeriodIndex(['2011-07', '2011-08', 'NaT', '2011-10'],
+                               freq='2M', name='idx')
+        self._check(idx, lambda x: x + 3, expected)
+        self._check(idx, lambda x: 3 + x, expected)
+        self._check(idx, lambda x: np.add(x, 3), expected)
+
+        self._check(idx + 3, lambda x: x - 3, idx)
+        self._check(idx + 3, lambda x: np.subtract(x, 3), idx)
+
+    def test_pi_ops_array_int(self):
+        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
+                          freq='M', name='idx')
+        f = lambda x: x + np.array([1, 2, 3, 4])
+        exp = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'],
+                          freq='M', name='idx')
+        self._check(idx, f, exp)
+
+        f = lambda x: np.add(x, np.array([4, -1, 1, 2]))
+        exp = PeriodIndex(['2011-05', '2011-01', 'NaT', '2011-06'],
+                          freq='M', name='idx')
+        self._check(idx, f, exp)
+
+        f = lambda x: x - np.array([1, 2, 3, 4])
+        exp = PeriodIndex(['2010-12', '2010-12', 'NaT', '2010-12'],
+                          freq='M', name='idx')
+        self._check(idx, f, exp)
+
+        f = lambda x: np.subtract(x, np.array([3, 2, 3, -2]))
+        exp = PeriodIndex(['2010-10', '2010-12', 'NaT', '2011-06'],
+                          freq='M', name='idx')
+        self._check(idx, f, exp)
+
+    def test_pi_ops_offset(self):
+        idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01',
+                           '2011-04-01'], freq='D', name='idx')
+        f = lambda x: x + pd.offsets.Day()
+        exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02',
+                           '2011-04-02'], freq='D', name='idx')
+        self._check(idx, f, exp)
+
+        f = lambda x: x + pd.offsets.Day(2)
+        exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03',
+                           '2011-04-03'], freq='D', name='idx')
+        self._check(idx, f, exp)
+
+        f = lambda x: x - pd.offsets.Day(2)
+        exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27',
+                           '2011-03-30'], freq='D', name='idx')
+        self._check(idx, f, exp)
+
+    def test_pi_offset_errors(self):
+        idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01',
+                           '2011-04-01'], freq='D', name='idx')
+        ser = pd.Series(idx)
+
+        # Series op is applied per Period instance, thus error is raised
+        # from Period
+        msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)"
+        msg_s = r"Input cannot be converted to Period\(freq=D\)"
+        for obj, msg in [(idx, msg_idx), (ser, msg_s)]:
+            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+                obj + pd.offsets.Hour(2)
+
+            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+                pd.offsets.Hour(2) + obj
+
+            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
+                obj - pd.offsets.Hour(2)
+
+    def test_pi_sub_period(self):
+        # GH 13071
+        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
+                          freq='M', name='idx')
+
+        result = idx - pd.Period('2012-01', freq='M')
+        off = idx.freq
+        exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name='idx')
+        tm.assert_index_equal(result, exp)
+
+        result = np.subtract(idx, pd.Period('2012-01', freq='M'))
+        tm.assert_index_equal(result, exp)
+
+        result = pd.Period('2012-01', freq='M') - idx
+        exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name='idx')
+        tm.assert_index_equal(result, exp)
+
+        result = np.subtract(pd.Period('2012-01', freq='M'), idx)
+        if _np_version_under1p10:
+            assert result is NotImplemented
+        else:
+            tm.assert_index_equal(result, exp)
+
+        exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx')
+        tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp)
+        tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp)
+
+    def test_pi_sub_pdnat(self):
+        # GH 13071
+        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
+                          freq='M', name='idx')
+        exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx')
+        tm.assert_index_equal(pd.NaT - idx, exp)
+        tm.assert_index_equal(idx - pd.NaT, exp)
+
+    def test_pi_sub_period_nat(self):
+        # GH 13071
+        idx = PeriodIndex(['2011-01', 'NaT', '2011-03', '2011-04'],
+                          freq='M', name='idx')
+
+        result = idx - pd.Period('2012-01', freq='M')
+        off = idx.freq
+        exp = pd.Index([-12 * off, pd.NaT, -10 * off, -9 * off], name='idx')
+        tm.assert_index_equal(result, exp)
+
+        result = pd.Period('2012-01', freq='M') - idx
+        exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name='idx')
+        tm.assert_index_equal(result, exp)
+
+        exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx')
+        tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp)
+        tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp)
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 8d52bc9532136..20c547382ad57 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -59,15 +59,6 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname):
         result = getattr(empty, opname)(const).get_dtype_counts()
         tm.assert_series_equal(result, pd.Series([2], ['bool']))
 
-    @pytest.mark.parametrize('timestamps', [
-        [pd.Timestamp('2012-01-01 13:00:00+00:00')] * 2,
-        [pd.Timestamp('2012-01-01 13:00:00')] * 2])
-    def test_tz_aware_scalar_comparison(self, timestamps):
-        # Test for issue #15966
-        df = pd.DataFrame({'test': timestamps})
-        expected = pd.DataFrame({'test': [False, False]})
-        tm.assert_frame_equal(df == -1, expected)
-
 
 # -------------------------------------------------------------------
 # Arithmetic
@@ -91,17 +82,6 @@ def test_df_add_flex_filled_mixed_dtypes(self):
 
 class TestFrameArithmetic(object):
 
-    @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
-                       strict=True)
-    def test_df_sub_datetime64_not_ns(self):
-        df = pd.DataFrame(pd.date_range('20130101', periods=3))
-        dt64 = np.datetime64('2013-01-01')
-        assert dt64.dtype == 'datetime64[D]'
-        res = df - dt64
-        expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1),
-                                 pd.Timedelta(days=2)])
-        tm.assert_frame_equal(res, expected)
-
     @pytest.mark.parametrize('data', [
         [1, 2, 3],
         [1.1, 2.2, 3.3],
@@ -135,35 +115,3 @@ def test_df_radd_str(self):
         df = pd.DataFrame(['x', np.nan, 'x'])
         tm.assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax']))
         tm.assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa']))
-
-
-class TestPeriodFrameArithmetic(object):
-
-    def test_ops_frame_period(self):
-        # GH 13043
-        df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'),
-                                 pd.Period('2015-02', freq='M')],
-                           'B': [pd.Period('2014-01', freq='M'),
-                                 pd.Period('2014-02', freq='M')]})
-        assert df['A'].dtype == object
-        assert df['B'].dtype == object
-
-        p = pd.Period('2015-03', freq='M')
-        off = p.freq
-        # dtype will be object because of original dtype
-        exp = pd.DataFrame({'A': np.array([2 * off, 1 * off], dtype=object),
-                            'B': np.array([14 * off, 13 * off], dtype=object)})
-        tm.assert_frame_equal(p - df, exp)
-        tm.assert_frame_equal(df - p, -1 * exp)
-
-        df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'),
-                                  pd.Period('2015-06', freq='M')],
-                            'B': [pd.Period('2015-05', freq='M'),
-                                  pd.Period('2015-06', freq='M')]})
-        assert df2['A'].dtype == object
-        assert df2['B'].dtype == object
-
-        exp = pd.DataFrame({'A': np.array([4 * off, 4 * off], dtype=object),
-                            'B': np.array([16 * off, 16 * off], dtype=object)})
-        tm.assert_frame_equal(df2 - df, exp)
-        tm.assert_frame_equal(df - df2, -1 * exp)
diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py
index f54cb32b0a036..4feed589f5961 100644
--- a/pandas/tests/indexes/datetimes/test_arithmetic.py
+++ b/pandas/tests/indexes/datetimes/test_arithmetic.py
@@ -1,531 +1,17 @@
 # -*- coding: utf-8 -*-
-import warnings
-from datetime import datetime, timedelta
-import operator
+from datetime import datetime
 
 import pytest
 import pytz
-import numpy as np
 
 import pandas as pd
-from pandas.compat.numpy import np_datetime64_compat
 import pandas.util.testing as tm
-from pandas.errors import PerformanceWarning, NullFrequencyError
-from pandas import (Timestamp, Timedelta, Series,
-                    DatetimeIndex, TimedeltaIndex,
-                    date_range)
-from pandas.core import ops
-from pandas._libs.tslibs.conversion import localize_pydatetime
-from pandas._libs.tslibs.offsets import shift_months
-
-
-@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2),
-                        np.timedelta64(2, 'h'), Timedelta(hours=2)],
-                ids=str)
-def delta(request):
-    # Several ways of representing two hours
-    return request.param
-
-
-@pytest.fixture(
-    params=[
-        datetime(2011, 1, 1),
-        DatetimeIndex(['2011-01-01', '2011-01-02']),
-        DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'),
-        np.datetime64('2011-01-01'),
-        Timestamp('2011-01-01')],
-    ids=lambda x: type(x).__name__)
-def addend(request):
-    return request.param
-
-
-class TestDatetimeIndexComparisons(object):
-    @pytest.mark.parametrize('other', [datetime(2016, 1, 1),
-                                       Timestamp('2016-01-01'),
-                                       np.datetime64('2016-01-01')])
-    def test_dti_cmp_datetimelike(self, other, tz_naive_fixture):
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
-        if tz is not None:
-            if isinstance(other, np.datetime64):
-                # no tzaware version available
-                return
-            other = localize_pydatetime(other, dti.tzinfo)
-
-        result = dti == other
-        expected = np.array([True, False])
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = dti > other
-        expected = np.array([False, True])
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = dti >= other
-        expected = np.array([True, True])
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = dti < other
-        expected = np.array([False, False])
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = dti <= other
-        expected = np.array([True, False])
-        tm.assert_numpy_array_equal(result, expected)
-
-    def dti_cmp_non_datetime(self, tz_naive_fixture):
-        # GH#19301 by convention datetime.date is not considered comparable
-        # to Timestamp or DatetimeIndex.  This may change in the future.
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
-
-        other = datetime(2016, 1, 1).date()
-        assert not (dti == other).any()
-        assert (dti != other).all()
-        with pytest.raises(TypeError):
-            dti < other
-        with pytest.raises(TypeError):
-            dti <= other
-        with pytest.raises(TypeError):
-            dti > other
-        with pytest.raises(TypeError):
-            dti >= other
-
-    @pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
-    def test_dti_eq_null_scalar(self, other, tz_naive_fixture):
-        # GH#19301
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
-        assert not (dti == other).any()
-
-    @pytest.mark.parametrize('other', [None, np.nan, pd.NaT])
-    def test_dti_ne_null_scalar(self, other, tz_naive_fixture):
-        # GH#19301
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
-        assert (dti != other).all()
-
-    @pytest.mark.parametrize('other', [None, np.nan])
-    def test_dti_cmp_null_scalar_inequality(self, tz_naive_fixture, other):
-        # GH#19301
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
-
-        with pytest.raises(TypeError):
-            dti < other
-        with pytest.raises(TypeError):
-            dti <= other
-        with pytest.raises(TypeError):
-            dti > other
-        with pytest.raises(TypeError):
-            dti >= other
-
-    def test_dti_cmp_nat(self):
-        left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT,
-                                 pd.Timestamp('2011-01-03')])
-        right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')])
-
-        for lhs, rhs in [(left, right),
-                         (left.astype(object), right.astype(object))]:
-            result = rhs == lhs
-            expected = np.array([False, False, True])
-            tm.assert_numpy_array_equal(result, expected)
-
-            result = lhs != rhs
-            expected = np.array([True, True, False])
-            tm.assert_numpy_array_equal(result, expected)
-
-            expected = np.array([False, False, False])
-            tm.assert_numpy_array_equal(lhs == pd.NaT, expected)
-            tm.assert_numpy_array_equal(pd.NaT == rhs, expected)
-
-            expected = np.array([True, True, True])
-            tm.assert_numpy_array_equal(lhs != pd.NaT, expected)
-            tm.assert_numpy_array_equal(pd.NaT != lhs, expected)
-
-            expected = np.array([False, False, False])
-            tm.assert_numpy_array_equal(lhs < pd.NaT, expected)
-            tm.assert_numpy_array_equal(pd.NaT > lhs, expected)
-
-    def test_dti_cmp_nat_behaves_like_float_cmp_nan(self):
-        fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0])
-        fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0])
-
-        didx1 = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT,
-                                  '2014-05-01', '2014-07-01'])
-        didx2 = pd.DatetimeIndex(['2014-02-01', '2014-03-01', pd.NaT, pd.NaT,
-                                  '2014-06-01', '2014-07-01'])
-        darr = np.array([np_datetime64_compat('2014-02-01 00:00Z'),
-                         np_datetime64_compat('2014-03-01 00:00Z'),
-                         np_datetime64_compat('nat'), np.datetime64('nat'),
-                         np_datetime64_compat('2014-06-01 00:00Z'),
-                         np_datetime64_compat('2014-07-01 00:00Z')])
-
-        cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]
-
-        # Check pd.NaT is handles as the same as np.nan
-        with tm.assert_produces_warning(None):
-            for idx1, idx2 in cases:
-
-                result = idx1 < idx2
-                expected = np.array([True, False, False, False, True, False])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx2 > idx1
-                expected = np.array([True, False, False, False, True, False])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 <= idx2
-                expected = np.array([True, False, False, False, True, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx2 >= idx1
-                expected = np.array([True, False, False, False, True, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 == idx2
-                expected = np.array([False, False, False, False, False, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 != idx2
-                expected = np.array([True, True, True, True, True, False])
-                tm.assert_numpy_array_equal(result, expected)
-
-        with tm.assert_produces_warning(None):
-            for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]:
-                result = idx1 < val
-                expected = np.array([False, False, False, False, False, False])
-                tm.assert_numpy_array_equal(result, expected)
-                result = idx1 > val
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 <= val
-                tm.assert_numpy_array_equal(result, expected)
-                result = idx1 >= val
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 == val
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 != val
-                expected = np.array([True, True, True, True, True, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-        # Check pd.NaT is handles as the same as np.nan
-        with tm.assert_produces_warning(None):
-            for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]:
-                result = idx1 < val
-                expected = np.array([True, False, False, False, False, False])
-                tm.assert_numpy_array_equal(result, expected)
-                result = idx1 > val
-                expected = np.array([False, False, False, False, True, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 <= val
-                expected = np.array([True, False, True, False, False, False])
-                tm.assert_numpy_array_equal(result, expected)
-                result = idx1 >= val
-                expected = np.array([False, False, True, False, True, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 == val
-                expected = np.array([False, False, True, False, False, False])
-                tm.assert_numpy_array_equal(result, expected)
-
-                result = idx1 != val
-                expected = np.array([True, True, False, True, True, True])
-                tm.assert_numpy_array_equal(result, expected)
-
-    @pytest.mark.parametrize('op', [operator.eq, operator.ne,
-                                    operator.gt, operator.ge,
-                                    operator.lt, operator.le])
-    def test_comparison_tzawareness_compat(self, op):
-        # GH#18162
-        dr = pd.date_range('2016-01-01', periods=6)
-        dz = dr.tz_localize('US/Pacific')
-
-        with pytest.raises(TypeError):
-            op(dr, dz)
-        with pytest.raises(TypeError):
-            op(dr, list(dz))
-        with pytest.raises(TypeError):
-            op(dz, dr)
-        with pytest.raises(TypeError):
-            op(dz, list(dr))
-
-        # Check that there isn't a problem aware-aware and naive-naive do not
-        # raise
-        assert (dr == dr).all()
-        assert (dr == list(dr)).all()
-        assert (dz == dz).all()
-        assert (dz == list(dz)).all()
-
-        # Check comparisons against scalar Timestamps
-        ts = pd.Timestamp('2000-03-14 01:59')
-        ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam')
-
-        assert (dr > ts).all()
-        with pytest.raises(TypeError):
-            op(dr, ts_tz)
-
-        assert (dz > ts_tz).all()
-        with pytest.raises(TypeError):
-            op(dz, ts)
-
-        # GH 12601: Check comparison against Timestamps and DatetimeIndex
-        with pytest.raises(TypeError):
-            op(ts, dz)
-
-    @pytest.mark.parametrize('op', [operator.eq, operator.ne,
-                                    operator.gt, operator.ge,
-                                    operator.lt, operator.le])
-    @pytest.mark.parametrize('other', [datetime(2016, 1, 1),
-                                       Timestamp('2016-01-01'),
-                                       np.datetime64('2016-01-01')])
-    def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture):
-        tz = tz_aware_fixture
-        dti = pd.date_range('2016-01-01', periods=2, tz=tz)
-        with pytest.raises(TypeError):
-            op(dti, other)
-        with pytest.raises(TypeError):
-            op(other, dti)
-
-    @pytest.mark.parametrize('op', [operator.eq, operator.ne,
-                                    operator.gt, operator.ge,
-                                    operator.lt, operator.le])
-    def test_nat_comparison_tzawareness(self, op):
-        # GH#19276
-        # tzaware DatetimeIndex should not raise when compared to NaT
-        dti = pd.DatetimeIndex(['2014-01-01', pd.NaT, '2014-03-01', pd.NaT,
-                                '2014-05-01', '2014-07-01'])
-        expected = np.array([op == operator.ne] * len(dti))
-        result = op(dti, pd.NaT)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = op(dti.tz_localize('US/Pacific'), pd.NaT)
-        tm.assert_numpy_array_equal(result, expected)
-
-    def test_dti_cmp_str(self, tz_naive_fixture):
-        # GH#22074
-        # regardless of tz, we expect these comparisons are valid
-        tz = tz_naive_fixture
-        rng = date_range('1/1/2000', periods=10, tz=tz)
-        other = '1/1/2000'
-
-        result = rng == other
-        expected = np.array([True] + [False] * 9)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = rng != other
-        expected = np.array([False] + [True] * 9)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = rng < other
-        expected = np.array([False] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = rng <= other
-        expected = np.array([True] + [False] * 9)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = rng > other
-        expected = np.array([False] + [True] * 9)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = rng >= other
-        expected = np.array([True] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-    @pytest.mark.parametrize('other', ['foo', 99, 4.0,
-                                       object(), timedelta(days=2)])
-    def test_dti_cmp_scalar_invalid(self, other, tz_naive_fixture):
-        # GH#22074
-        tz = tz_naive_fixture
-        rng = date_range('1/1/2000', periods=10, tz=tz)
-
-        result = rng == other
-        expected = np.array([False] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = rng != other
-        expected = np.array([True] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            rng < other
-        with pytest.raises(TypeError):
-            rng <= other
-        with pytest.raises(TypeError):
-            rng > other
-        with pytest.raises(TypeError):
-            rng >= other
-
-    def test_dti_cmp_list(self):
-        rng = date_range('1/1/2000', periods=10)
-
-        result = rng == list(rng)
-        expected = rng == rng
-        tm.assert_numpy_array_equal(result, expected)
-
-    @pytest.mark.parametrize('other', [
-        pd.timedelta_range('1D', periods=10),
-        pd.timedelta_range('1D', periods=10).to_series(),
-        pd.timedelta_range('1D', periods=10).asi8.view('m8[ns]')
-    ], ids=lambda x: type(x).__name__)
-    def test_dti_cmp_tdi_tzawareness(self, other):
-        # GH#22074
-        # reversion test that we _don't_ call _assert_tzawareness_compat
-        # when comparing against TimedeltaIndex
-        dti = date_range('2000-01-01', periods=10, tz='Asia/Tokyo')
-
-        result = dti == other
-        expected = np.array([False] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = dti != other
-        expected = np.array([True] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            dti < other
-        with pytest.raises(TypeError):
-            dti <= other
-        with pytest.raises(TypeError):
-            dti > other
-        with pytest.raises(TypeError):
-            dti >= other
-
-    def test_dti_cmp_object_dtype(self):
-        # GH#22074
-        dti = date_range('2000-01-01', periods=10, tz='Asia/Tokyo')
-
-        other = dti.astype('O')
-
-        result = dti == other
-        expected = np.array([True] * 10)
-        tm.assert_numpy_array_equal(result, expected)
-
-        other = dti.tz_localize(None)
-        with pytest.raises(TypeError):
-            # tzawareness failure
-            dti != other
-
-        other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5)
-        result = dti == other
-        expected = np.array([True] * 5 + [False] * 5)
-        tm.assert_numpy_array_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            dti >= other
+from pandas.errors import NullFrequencyError
+from pandas import Series, DatetimeIndex, date_range
 
 
 class TestDatetimeIndexArithmetic(object):
 
-    # -------------------------------------------------------------
-    # Invalid Operations
-
-    @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
-    @pytest.mark.parametrize('op', [operator.add, ops.radd,
-                                    operator.sub, ops.rsub])
-    def test_dti_add_sub_float(self, op, other):
-        dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
-        with pytest.raises(TypeError):
-            op(dti, other)
-
-    def test_dti_add_timestamp_raises(self):
-        idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
-        msg = "cannot add DatetimeIndex and Timestamp"
-        with tm.assert_raises_regex(TypeError, msg):
-            idx + Timestamp('2011-01-01')
-
-    def test_dti_radd_timestamp_raises(self):
-        idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
-        msg = "cannot add DatetimeIndex and Timestamp"
-        with tm.assert_raises_regex(TypeError, msg):
-            Timestamp('2011-01-01') + idx
-
-    # -------------------------------------------------------------
-    # Binary operations DatetimeIndex and int
-
-    def test_dti_add_int(self, tz_naive_fixture, one):
-        # Variants of `one` for #19012
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01 09:00', freq='H',
-                            periods=10, tz=tz)
-        result = rng + one
-        expected = pd.date_range('2000-01-01 10:00', freq='H',
-                                 periods=10, tz=tz)
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_iadd_int(self, tz_naive_fixture, one):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01 09:00', freq='H',
-                            periods=10, tz=tz)
-        expected = pd.date_range('2000-01-01 10:00', freq='H',
-                                 periods=10, tz=tz)
-        rng += one
-        tm.assert_index_equal(rng, expected)
-
-    def test_dti_sub_int(self, tz_naive_fixture, one):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01 09:00', freq='H',
-                            periods=10, tz=tz)
-        result = rng - one
-        expected = pd.date_range('2000-01-01 08:00', freq='H',
-                                 periods=10, tz=tz)
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_isub_int(self, tz_naive_fixture, one):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01 09:00', freq='H',
-                            periods=10, tz=tz)
-        expected = pd.date_range('2000-01-01 08:00', freq='H',
-                                 periods=10, tz=tz)
-        rng -= one
-        tm.assert_index_equal(rng, expected)
-
-    # -------------------------------------------------------------
-    # __add__/__sub__ with integer arrays
-
-    @pytest.mark.parametrize('freq', ['H', 'D'])
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    def test_dti_add_intarray_tick(self, box, freq):
-        # GH#19959
-        dti = pd.date_range('2016-01-01', periods=2, freq=freq)
-        other = box([4, -1])
-        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))])
-        result = dti + other
-        tm.assert_index_equal(result, expected)
-        result = other + dti
-        tm.assert_index_equal(result, expected)
-
-    @pytest.mark.parametrize('freq', ['W', 'M', 'MS', 'Q'])
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    def test_dti_add_intarray_non_tick(self, box, freq):
-        # GH#19959
-        dti = pd.date_range('2016-01-01', periods=2, freq=freq)
-        other = box([4, -1])
-        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))])
-        with tm.assert_produces_warning(PerformanceWarning):
-            result = dti + other
-        tm.assert_index_equal(result, expected)
-        with tm.assert_produces_warning(PerformanceWarning):
-            result = other + dti
-        tm.assert_index_equal(result, expected)
-
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    def test_dti_add_intarray_no_freq(self, box):
-        # GH#19959
-        dti = pd.DatetimeIndex(['2016-01-01', 'NaT', '2017-04-05 06:07:08'])
-        other = box([9, 4, -1])
-        with pytest.raises(NullFrequencyError):
-            dti + other
-        with pytest.raises(NullFrequencyError):
-            other + dti
-        with pytest.raises(NullFrequencyError):
-            dti - other
-        with pytest.raises(TypeError):
-            other - dti
-
     # -------------------------------------------------------------
     # DatetimeIndex.shift is used in integer addition
 
@@ -614,651 +100,3 @@ def test_dti_shift_near_midnight(self, shift, result_time):
         result = s.shift(shift, freq='H')
         expected = Series(1, index=DatetimeIndex([result_time], tz='EST'))
         tm.assert_series_equal(result, expected)
-
-    # -------------------------------------------------------------
-    # Binary operations DatetimeIndex and timedelta-like
-
-    def test_dti_add_timedeltalike(self, tz_naive_fixture, delta):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
-        result = rng + delta
-        expected = pd.date_range('2000-01-01 02:00',
-                                 '2000-02-01 02:00', tz=tz)
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
-        expected = pd.date_range('2000-01-01 02:00',
-                                 '2000-02-01 02:00', tz=tz)
-        rng += delta
-        tm.assert_index_equal(rng, expected)
-
-    def test_dti_sub_timedeltalike(self, tz_naive_fixture, delta):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
-        expected = pd.date_range('1999-12-31 22:00',
-                                 '2000-01-31 22:00', tz=tz)
-        result = rng - delta
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_isub_timedeltalike(self, tz_naive_fixture, delta):
-        tz = tz_naive_fixture
-        rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
-        expected = pd.date_range('1999-12-31 22:00',
-                                 '2000-01-31 22:00', tz=tz)
-        rng -= delta
-        tm.assert_index_equal(rng, expected)
-
-    # -------------------------------------------------------------
-    # Binary operations DatetimeIndex and TimedeltaIndex/array
-    def test_dti_add_tdi(self, tz_naive_fixture):
-        # GH 17558
-        tz = tz_naive_fixture
-        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        tdi = pd.timedelta_range('0 days', periods=10)
-        expected = pd.date_range('2017-01-01', periods=10, tz=tz)
-
-        # add with TimdeltaIndex
-        result = dti + tdi
-        tm.assert_index_equal(result, expected)
-
-        result = tdi + dti
-        tm.assert_index_equal(result, expected)
-
-        # add with timedelta64 array
-        result = dti + tdi.values
-        tm.assert_index_equal(result, expected)
-
-        result = tdi.values + dti
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_iadd_tdi(self, tz_naive_fixture):
-        # GH 17558
-        tz = tz_naive_fixture
-        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        tdi = pd.timedelta_range('0 days', periods=10)
-        expected = pd.date_range('2017-01-01', periods=10, tz=tz)
-
-        # iadd with TimdeltaIndex
-        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        result += tdi
-        tm.assert_index_equal(result, expected)
-
-        result = pd.timedelta_range('0 days', periods=10)
-        result += dti
-        tm.assert_index_equal(result, expected)
-
-        # iadd with timedelta64 array
-        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        result += tdi.values
-        tm.assert_index_equal(result, expected)
-
-        result = pd.timedelta_range('0 days', periods=10)
-        result += dti
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_sub_tdi(self, tz_naive_fixture):
-        # GH 17558
-        tz = tz_naive_fixture
-        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        tdi = pd.timedelta_range('0 days', periods=10)
-        expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D')
-
-        # sub with TimedeltaIndex
-        result = dti - tdi
-        tm.assert_index_equal(result, expected)
-
-        msg = 'cannot subtract .*TimedeltaIndex'
-        with tm.assert_raises_regex(TypeError, msg):
-            tdi - dti
-
-        # sub with timedelta64 array
-        result = dti - tdi.values
-        tm.assert_index_equal(result, expected)
-
-        msg = 'cannot subtract DatetimeIndex from'
-        with tm.assert_raises_regex(TypeError, msg):
-            tdi.values - dti
-
-    def test_dti_isub_tdi(self, tz_naive_fixture):
-        # GH 17558
-        tz = tz_naive_fixture
-        dti = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        tdi = pd.timedelta_range('0 days', periods=10)
-        expected = pd.date_range('2017-01-01', periods=10, tz=tz, freq='-1D')
-
-        # isub with TimedeltaIndex
-        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        result -= tdi
-        tm.assert_index_equal(result, expected)
-
-        msg = 'cannot subtract .*TimedeltaIndex'
-        with tm.assert_raises_regex(TypeError, msg):
-            tdi -= dti
-
-        # isub with timedelta64 array
-        result = DatetimeIndex([Timestamp('2017-01-01', tz=tz)] * 10)
-        result -= tdi.values
-        tm.assert_index_equal(result, expected)
-
-        msg = '|'.join(['cannot perform __neg__ with this index type:',
-                        'ufunc subtract cannot use operands with types',
-                        'cannot subtract DatetimeIndex from'])
-        with tm.assert_raises_regex(TypeError, msg):
-            tdi.values -= dti
-
-    # -------------------------------------------------------------
-    # Binary Operations DatetimeIndex and datetime-like
-    # TODO: A couple other tests belong in this section.  Move them in
-    # A PR where there isn't already a giant diff.
-
-    def test_add_datetimelike_and_dti(self, addend):
-        # GH#9631
-        dti = DatetimeIndex(['2011-01-01', '2011-01-02'])
-        msg = 'cannot add DatetimeIndex and {0}'.format(
-            type(addend).__name__)
-        with tm.assert_raises_regex(TypeError, msg):
-            dti + addend
-        with tm.assert_raises_regex(TypeError, msg):
-            addend + dti
-
-    def test_add_datetimelike_and_dti_tz(self, addend):
-        # GH#9631
-        dti_tz = DatetimeIndex(['2011-01-01',
-                                '2011-01-02']).tz_localize('US/Eastern')
-        msg = 'cannot add DatetimeIndex and {0}'.format(
-            type(addend).__name__)
-        with tm.assert_raises_regex(TypeError, msg):
-            dti_tz + addend
-        with tm.assert_raises_regex(TypeError, msg):
-            addend + dti_tz
-
-    # -------------------------------------------------------------
-    # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64]
-
-    def test_dti_add_dt64_array_raises(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
-        dtarr = dti.values
-
-        with pytest.raises(TypeError):
-            dti + dtarr
-        with pytest.raises(TypeError):
-            dtarr + dti
-
-    def test_dti_sub_dt64_array_naive(self):
-        dti = pd.date_range('2016-01-01', periods=3, tz=None)
-        dtarr = dti.values
-
-        expected = dti - dti
-        result = dti - dtarr
-        tm.assert_index_equal(result, expected)
-        result = dtarr - dti
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_sub_dt64_array_aware_raises(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        if tz is None:
-            return
-        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
-        dtarr = dti.values
-
-        with pytest.raises(TypeError):
-            dti - dtarr
-        with pytest.raises(TypeError):
-            dtarr - dti
-
-    def test_dti_add_td64_array(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
-        tdi = dti - dti.shift(1)
-        tdarr = tdi.values
-
-        expected = dti + tdi
-        result = dti + tdarr
-        tm.assert_index_equal(result, expected)
-        result = tdarr + dti
-        tm.assert_index_equal(result, expected)
-
-    def test_dti_sub_td64_array(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        dti = pd.date_range('2016-01-01', periods=3, tz=tz)
-        tdi = dti - dti.shift(1)
-        tdarr = tdi.values
-
-        expected = dti - tdi
-        result = dti - tdarr
-        tm.assert_index_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            tdarr - dti
-
-    # -------------------------------------------------------------
-
-    def test_sub_dti_dti(self):
-        # previously performed setop (deprecated in 0.16.0), now changed to
-        # return subtraction -> TimeDeltaIndex (GH ...)
-
-        dti = date_range('20130101', periods=3)
-        dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern')
-        dti_tz2 = date_range('20130101', periods=3).tz_localize('UTC')
-        expected = TimedeltaIndex([0, 0, 0])
-
-        result = dti - dti
-        tm.assert_index_equal(result, expected)
-
-        result = dti_tz - dti_tz
-        tm.assert_index_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            dti_tz - dti
-
-        with pytest.raises(TypeError):
-            dti - dti_tz
-
-        with pytest.raises(TypeError):
-            dti_tz - dti_tz2
-
-        # isub
-        dti -= dti
-        tm.assert_index_equal(dti, expected)
-
-        # different length raises ValueError
-        dti1 = date_range('20130101', periods=3)
-        dti2 = date_range('20130101', periods=4)
-        with pytest.raises(ValueError):
-            dti1 - dti2
-
-        # NaN propagation
-        dti1 = DatetimeIndex(['2012-01-01', np.nan, '2012-01-03'])
-        dti2 = DatetimeIndex(['2012-01-02', '2012-01-03', np.nan])
-        expected = TimedeltaIndex(['1 days', np.nan, np.nan])
-        result = dti2 - dti1
-        tm.assert_index_equal(result, expected)
-
-    @pytest.mark.parametrize('freq', [None, 'D'])
-    def test_sub_period(self, freq):
-        # GH#13078
-        # not supported, check TypeError
-        p = pd.Period('2011-01-01', freq='D')
-
-        idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq)
-
-        with pytest.raises(TypeError):
-            idx - p
-
-        with pytest.raises(TypeError):
-            p - idx
-
-    @pytest.mark.parametrize('op', [operator.add, ops.radd,
-                                    operator.sub, ops.rsub])
-    @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H'])
-    @pytest.mark.parametrize('dti_freq', [None, 'D'])
-    def test_dti_sub_pi(self, dti_freq, pi_freq, op):
-        # GH#20049 subtracting PeriodIndex should raise TypeError
-        dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=dti_freq)
-        pi = dti.to_period(pi_freq)
-        with pytest.raises(TypeError):
-            op(dti, pi)
-
-    def test_ufunc_coercions(self):
-        idx = date_range('2011-01-01', periods=3, freq='2D', name='x')
-
-        delta = np.timedelta64(1, 'D')
-        for result in [idx + delta, np.add(idx, delta)]:
-            assert isinstance(result, DatetimeIndex)
-            exp = date_range('2011-01-02', periods=3, freq='2D', name='x')
-            tm.assert_index_equal(result, exp)
-            assert result.freq == '2D'
-
-        for result in [idx - delta, np.subtract(idx, delta)]:
-            assert isinstance(result, DatetimeIndex)
-            exp = date_range('2010-12-31', periods=3, freq='2D', name='x')
-            tm.assert_index_equal(result, exp)
-            assert result.freq == '2D'
-
-        delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'),
-                          np.timedelta64(3, 'D')])
-        for result in [idx + delta, np.add(idx, delta)]:
-            assert isinstance(result, DatetimeIndex)
-            exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'],
-                                freq='3D', name='x')
-            tm.assert_index_equal(result, exp)
-            assert result.freq == '3D'
-
-        for result in [idx - delta, np.subtract(idx, delta)]:
-            assert isinstance(result, DatetimeIndex)
-            exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'],
-                                freq='D', name='x')
-            tm.assert_index_equal(result, exp)
-            assert result.freq == 'D'
-
-    def test_datetimeindex_sub_timestamp_overflow(self):
-        dtimax = pd.to_datetime(['now', pd.Timestamp.max])
-        dtimin = pd.to_datetime(['now', pd.Timestamp.min])
-
-        tsneg = Timestamp('1950-01-01')
-        ts_neg_variants = [tsneg,
-                           tsneg.to_pydatetime(),
-                           tsneg.to_datetime64().astype('datetime64[ns]'),
-                           tsneg.to_datetime64().astype('datetime64[D]')]
-
-        tspos = Timestamp('1980-01-01')
-        ts_pos_variants = [tspos,
-                           tspos.to_pydatetime(),
-                           tspos.to_datetime64().astype('datetime64[ns]'),
-                           tspos.to_datetime64().astype('datetime64[D]')]
-
-        for variant in ts_neg_variants:
-            with pytest.raises(OverflowError):
-                dtimax - variant
-
-        expected = pd.Timestamp.max.value - tspos.value
-        for variant in ts_pos_variants:
-            res = dtimax - variant
-            assert res[1].value == expected
-
-        expected = pd.Timestamp.min.value - tsneg.value
-        for variant in ts_neg_variants:
-            res = dtimin - variant
-            assert res[1].value == expected
-
-        for variant in ts_pos_variants:
-            with pytest.raises(OverflowError):
-                dtimin - variant
-
-    @pytest.mark.parametrize('names', [('foo', None, None),
-                                       ('baz', 'bar', None),
-                                       ('bar', 'bar', 'bar')])
-    @pytest.mark.parametrize('tz', [None, 'America/Chicago'])
-    def test_dti_add_series(self, tz, names):
-        # GH#13905
-        index = DatetimeIndex(['2016-06-28 05:30', '2016-06-28 05:31'],
-                              tz=tz, name=names[0])
-        ser = Series([Timedelta(seconds=5)] * 2,
-                     index=index, name=names[1])
-        expected = Series(index + Timedelta(seconds=5),
-                          index=index, name=names[2])
-
-        # passing name arg isn't enough when names[2] is None
-        expected.name = names[2]
-        assert expected.dtype == index.dtype
-        result = ser + index
-        tm.assert_series_equal(result, expected)
-        result2 = index + ser
-        tm.assert_series_equal(result2, expected)
-
-        expected = index + Timedelta(seconds=5)
-        result3 = ser.values + index
-        tm.assert_index_equal(result3, expected)
-        result4 = index + ser.values
-        tm.assert_index_equal(result4, expected)
-
-    def test_dti_add_offset_array(self, tz_naive_fixture):
-        # GH#18849
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
-        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = dti + other
-        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
-                                 name=dti.name, freq='infer')
-        tm.assert_index_equal(res, expected)
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res2 = other + dti
-        tm.assert_index_equal(res2, expected)
-
-    @pytest.mark.parametrize('names', [(None, None, None),
-                                       ('foo', 'bar', None),
-                                       ('foo', 'foo', 'foo')])
-    def test_dti_add_offset_index(self, tz_naive_fixture, names):
-        # GH#18849, GH#19744
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
-        other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
-                         name=names[1])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = dti + other
-        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
-                                 name=names[2], freq='infer')
-        tm.assert_index_equal(res, expected)
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res2 = other + dti
-        tm.assert_index_equal(res2, expected)
-
-    def test_dti_sub_offset_array(self, tz_naive_fixture):
-        # GH#18824
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
-        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = dti - other
-        expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
-                                 name=dti.name, freq='infer')
-        tm.assert_index_equal(res, expected)
-
-    @pytest.mark.parametrize('names', [(None, None, None),
-                                       ('foo', 'bar', None),
-                                       ('foo', 'foo', 'foo')])
-    def test_dti_sub_offset_index(self, tz_naive_fixture, names):
-        # GH#18824, GH#19744
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
-        other = pd.Index([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
-                         name=names[1])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = dti - other
-        expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
-                                 name=names[2], freq='infer')
-        tm.assert_index_equal(res, expected)
-
-    @pytest.mark.parametrize('names', [(None, None, None),
-                                       ('foo', 'bar', None),
-                                       ('foo', 'foo', 'foo')])
-    def test_dti_with_offset_series(self, tz_naive_fixture, names):
-        # GH#18849
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz, name=names[0])
-        other = Series([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)],
-                       name=names[1])
-
-        expected_add = Series([dti[n] + other[n] for n in range(len(dti))],
-                              name=names[2])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = dti + other
-        tm.assert_series_equal(res, expected_add)
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res2 = other + dti
-        tm.assert_series_equal(res2, expected_add)
-
-        expected_sub = Series([dti[n] - other[n] for n in range(len(dti))],
-                              name=names[2])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res3 = dti - other
-        tm.assert_series_equal(res3, expected_sub)
-
-    def test_dti_add_offset_tzaware(self, tz_aware_fixture):
-        timezone = tz_aware_fixture
-        if timezone == 'US/Pacific':
-            dates = date_range('2012-11-01', periods=3, tz=timezone)
-            offset = dates + pd.offsets.Hour(5)
-            assert dates[0] + pd.offsets.Hour(5) == offset[0]
-
-        dates = date_range('2010-11-01 00:00',
-                           periods=3, tz=timezone, freq='H')
-        expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00',
-                                  '2010-11-01 07:00'], freq='H', tz=timezone)
-
-        offset = dates + pd.offsets.Hour(5)
-        tm.assert_index_equal(offset, expected)
-        offset = dates + np.timedelta64(5, 'h')
-        tm.assert_index_equal(offset, expected)
-        offset = dates + timedelta(hours=5)
-        tm.assert_index_equal(offset, expected)
-
-
-@pytest.mark.parametrize('klass,assert_func', [
-    (Series, tm.assert_series_equal),
-    (DatetimeIndex, tm.assert_index_equal)])
-def test_dt64_with_offset_array(klass, assert_func):
-    # GH#10699
-    # array of offsets
-    box = Series if klass is Series else pd.Index
-    with tm.assert_produces_warning(PerformanceWarning):
-        s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')])
-        result = s + box([pd.offsets.DateOffset(years=1),
-                          pd.offsets.MonthEnd()])
-        exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29')])
-        assert_func(result, exp)
-
-        # same offset
-        result = s + box([pd.offsets.DateOffset(years=1),
-                          pd.offsets.DateOffset(years=1)])
-        exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')])
-        assert_func(result, exp)
-
-
-@pytest.mark.parametrize('klass,assert_func', [
-    (Series, tm.assert_series_equal),
-    (DatetimeIndex, tm.assert_index_equal)])
-def test_dt64_with_DateOffsets_relativedelta(klass, assert_func):
-    # GH#10699
-    vec = klass([Timestamp('2000-01-05 00:15:00'),
-                 Timestamp('2000-01-31 00:23:00'),
-                 Timestamp('2000-01-01'),
-                 Timestamp('2000-03-31'),
-                 Timestamp('2000-02-29'),
-                 Timestamp('2000-12-31'),
-                 Timestamp('2000-05-15'),
-                 Timestamp('2001-06-15')])
-
-    # DateOffset relativedelta fastpath
-    relative_kwargs = [('years', 2), ('months', 5), ('days', 3),
-                       ('hours', 5), ('minutes', 10), ('seconds', 2),
-                       ('microseconds', 5)]
-    for i, kwd in enumerate(relative_kwargs):
-        op = pd.DateOffset(**dict([kwd]))
-        assert_func(klass([x + op for x in vec]), vec + op)
-        assert_func(klass([x - op for x in vec]), vec - op)
-        op = pd.DateOffset(**dict(relative_kwargs[:i + 1]))
-        assert_func(klass([x + op for x in vec]), vec + op)
-        assert_func(klass([x - op for x in vec]), vec - op)
-
-
-@pytest.mark.parametrize('cls_and_kwargs', [
-    'YearBegin', ('YearBegin', {'month': 5}),
-    'YearEnd', ('YearEnd', {'month': 5}),
-    'MonthBegin', 'MonthEnd',
-    'SemiMonthEnd', 'SemiMonthBegin',
-    'Week', ('Week', {'weekday': 3}),
-    'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin',
-    'CustomBusinessDay', 'CDay', 'CBMonthEnd',
-    'CBMonthBegin', 'BMonthBegin', 'BMonthEnd',
-    'BusinessHour', 'BYearBegin', 'BYearEnd',
-    'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}),
-    ('FY5253Quarter', {'qtr_with_extra_week': 1,
-                       'startingMonth': 1,
-                       'weekday': 2,
-                       'variation': 'nearest'}),
-    ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}),
-    ('WeekOfMonth', {'weekday': 2, 'week': 2}),
-    'Easter', ('DateOffset', {'day': 4}),
-    ('DateOffset', {'month': 5})])
-@pytest.mark.parametrize('normalize', [True, False])
-@pytest.mark.parametrize('klass,assert_func', [
-    (Series, tm.assert_series_equal),
-    (DatetimeIndex, tm.assert_index_equal)])
-def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs):
-    # GH#10699
-    # assert these are equal on a piecewise basis
-    vec = klass([Timestamp('2000-01-05 00:15:00'),
-                 Timestamp('2000-01-31 00:23:00'),
-                 Timestamp('2000-01-01'),
-                 Timestamp('2000-03-31'),
-                 Timestamp('2000-02-29'),
-                 Timestamp('2000-12-31'),
-                 Timestamp('2000-05-15'),
-                 Timestamp('2001-06-15')])
-
-    if isinstance(cls_and_kwargs, tuple):
-        # If cls_name param is a tuple, then 2nd entry is kwargs for
-        # the offset constructor
-        cls_name, kwargs = cls_and_kwargs
-    else:
-        cls_name = cls_and_kwargs
-        kwargs = {}
-
-    offset_cls = getattr(pd.offsets, cls_name)
-
-    with warnings.catch_warnings(record=True):
-        for n in [0, 5]:
-            if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth',
-                             'FY5253Quarter', 'FY5253'] and n == 0):
-                # passing n = 0 is invalid for these offset classes
-                continue
-
-            offset = offset_cls(n, normalize=normalize, **kwargs)
-            assert_func(klass([x + offset for x in vec]), vec + offset)
-            assert_func(klass([x - offset for x in vec]), vec - offset)
-            assert_func(klass([offset + x for x in vec]), offset + vec)
-
-
-# GH 10699
-@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex],
-                                                  [tm.assert_series_equal,
-                                                   tm.assert_index_equal]))
-def test_datetime64_with_DateOffset(klass, assert_func):
-    s = klass(date_range('2000-01-01', '2000-01-31'), name='a')
-    result = s + pd.DateOffset(years=1)
-    result2 = pd.DateOffset(years=1) + s
-    exp = klass(date_range('2001-01-01', '2001-01-31'), name='a')
-    assert_func(result, exp)
-    assert_func(result2, exp)
-
-    result = s - pd.DateOffset(years=1)
-    exp = klass(date_range('1999-01-01', '1999-01-31'), name='a')
-    assert_func(result, exp)
-
-    s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
-               pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
-    result = s + pd.offsets.Day()
-    result2 = pd.offsets.Day() + s
-    exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'),
-                 Timestamp('2000-02-16', tz='US/Central')], name='a')
-    assert_func(result, exp)
-    assert_func(result2, exp)
-
-    s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
-               pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
-    result = s + pd.offsets.MonthEnd()
-    result2 = pd.offsets.MonthEnd() + s
-    exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
-                 Timestamp('2000-02-29', tz='US/Central')], name='a')
-    assert_func(result, exp)
-    assert_func(result2, exp)
-
-
-@pytest.mark.parametrize('years', [-1, 0, 1])
-@pytest.mark.parametrize('months', [-2, 0, 2])
-def test_shift_months(years, months):
-    s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'),
-                       Timestamp('2000-01-31 00:23:00'),
-                       Timestamp('2000-01-01'),
-                       Timestamp('2000-02-29'),
-                       Timestamp('2000-12-31')])
-    actual = DatetimeIndex(shift_months(s.asi8, years * 12 + months))
-
-    raw = [x + pd.offsets.DateOffset(years=years, months=months)
-           for x in s]
-    expected = DatetimeIndex(raw)
-    tm.assert_index_equal(actual, expected)
diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py
index d7dbb1423a9a3..3380a1ebc58dc 100644
--- a/pandas/tests/indexes/period/test_arithmetic.py
+++ b/pandas/tests/indexes/period/test_arithmetic.py
@@ -1,645 +1,14 @@
 # -*- coding: utf-8 -*-
-from datetime import timedelta
-import operator
 
 import pytest
 import numpy as np
 
 import pandas as pd
 import pandas.util.testing as tm
-from pandas import (Timedelta,
-                    period_range, Period, PeriodIndex,
-                    _np_version_under1p10)
-import pandas.core.indexes.period as period
-from pandas.core import ops
-from pandas.errors import PerformanceWarning
-
-
-_common_mismatch = [pd.offsets.YearBegin(2),
-                    pd.offsets.MonthBegin(1),
-                    pd.offsets.Minute()]
-
-
-@pytest.fixture(params=[timedelta(minutes=30),
-                        np.timedelta64(30, 's'),
-                        Timedelta(seconds=30)] + _common_mismatch)
-def not_hourly(request):
-    """
-    Several timedelta-like and DateOffset instances that are _not_
-    compatible with Hourly frequencies.
-    """
-    return request.param
-
-
-@pytest.fixture(params=[np.timedelta64(4, 'h'),
-                        timedelta(hours=23),
-                        Timedelta('23:00:00')] + _common_mismatch)
-def not_daily(request):
-    """
-    Several timedelta-like and DateOffset instances that are _not_
-    compatible with Daily frequencies.
-    """
-    return request.param
-
-
-@pytest.fixture(params=[np.timedelta64(365, 'D'),
-                        timedelta(365),
-                        Timedelta(days=365)] + _common_mismatch)
-def mismatched(request):
-    """
-    Several timedelta-like and DateOffset instances that are _not_
-    compatible with Monthly or Annual frequencies.
-    """
-    return request.param
-
-
-@pytest.fixture(params=[pd.offsets.Day(3),
-                        timedelta(days=3),
-                        np.timedelta64(3, 'D'),
-                        pd.offsets.Hour(72),
-                        timedelta(minutes=60 * 24 * 3),
-                        np.timedelta64(72, 'h'),
-                        Timedelta('72:00:00')])
-def three_days(request):
-    """
-    Several timedelta-like and DateOffset objects that each represent
-    a 3-day timedelta
-    """
-    return request.param
-
-
-@pytest.fixture(params=[pd.offsets.Hour(2),
-                        timedelta(hours=2),
-                        np.timedelta64(2, 'h'),
-                        pd.offsets.Minute(120),
-                        timedelta(minutes=120),
-                        np.timedelta64(120, 'm')])
-def two_hours(request):
-    """
-    Several timedelta-like and DateOffset objects that each represent
-    a 2-hour timedelta
-    """
-    return request.param
-
-
-class TestPeriodIndexComparisons(object):
-    def test_pi_cmp_period(self):
-        idx = period_range('2007-01', periods=20, freq='M')
-
-        result = idx < idx[10]
-        exp = idx.values < idx.values[10]
-        tm.assert_numpy_array_equal(result, exp)
-
-    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
-    def test_pi_cmp_pi(self, freq):
-        base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
-                           freq=freq)
-        per = Period('2011-02', freq=freq)
-
-        exp = np.array([False, True, False, False])
-        tm.assert_numpy_array_equal(base == per, exp)
-        tm.assert_numpy_array_equal(per == base, exp)
-
-        exp = np.array([True, False, True, True])
-        tm.assert_numpy_array_equal(base != per, exp)
-        tm.assert_numpy_array_equal(per != base, exp)
-
-        exp = np.array([False, False, True, True])
-        tm.assert_numpy_array_equal(base > per, exp)
-        tm.assert_numpy_array_equal(per < base, exp)
-
-        exp = np.array([True, False, False, False])
-        tm.assert_numpy_array_equal(base < per, exp)
-        tm.assert_numpy_array_equal(per > base, exp)
-
-        exp = np.array([False, True, True, True])
-        tm.assert_numpy_array_equal(base >= per, exp)
-        tm.assert_numpy_array_equal(per <= base, exp)
-
-        exp = np.array([True, True, False, False])
-        tm.assert_numpy_array_equal(base <= per, exp)
-        tm.assert_numpy_array_equal(per >= base, exp)
-
-        idx = PeriodIndex(['2011-02', '2011-01', '2011-03', '2011-05'],
-                          freq=freq)
-
-        exp = np.array([False, False, True, False])
-        tm.assert_numpy_array_equal(base == idx, exp)
-
-        exp = np.array([True, True, False, True])
-        tm.assert_numpy_array_equal(base != idx, exp)
-
-        exp = np.array([False, True, False, False])
-        tm.assert_numpy_array_equal(base > idx, exp)
-
-        exp = np.array([True, False, False, True])
-        tm.assert_numpy_array_equal(base < idx, exp)
-
-        exp = np.array([False, True, True, False])
-        tm.assert_numpy_array_equal(base >= idx, exp)
-
-        exp = np.array([True, False, True, True])
-        tm.assert_numpy_array_equal(base <= idx, exp)
-
-    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
-    def test_pi_cmp_pi_mismatched_freq_raises(self, freq):
-        # different base freq
-        base = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
-                           freq=freq)
-
-        msg = "Input has different freq=A-DEC from PeriodIndex"
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            base <= Period('2011', freq='A')
-
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            Period('2011', freq='A') >= base
-
-        idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A')
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            base <= idx
-
-        # Different frequency
-        msg = "Input has different freq=4M from PeriodIndex"
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            base <= Period('2011', freq='4M')
-
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            Period('2011', freq='4M') >= base
-
-        idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M')
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            base <= idx
-
-    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
-    def test_pi_cmp_nat(self, freq):
-        idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq)
-
-        result = idx1 > Period('2011-02', freq=freq)
-        exp = np.array([False, False, False, True])
-        tm.assert_numpy_array_equal(result, exp)
-        result = Period('2011-02', freq=freq) < idx1
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = idx1 == Period('NaT', freq=freq)
-        exp = np.array([False, False, False, False])
-        tm.assert_numpy_array_equal(result, exp)
-        result = Period('NaT', freq=freq) == idx1
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = idx1 != Period('NaT', freq=freq)
-        exp = np.array([True, True, True, True])
-        tm.assert_numpy_array_equal(result, exp)
-        result = Period('NaT', freq=freq) != idx1
-        tm.assert_numpy_array_equal(result, exp)
-
-        idx2 = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq=freq)
-        result = idx1 < idx2
-        exp = np.array([True, False, False, False])
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = idx1 == idx2
-        exp = np.array([False, False, False, False])
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = idx1 != idx2
-        exp = np.array([True, True, True, True])
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = idx1 == idx1
-        exp = np.array([True, True, False, True])
-        tm.assert_numpy_array_equal(result, exp)
-
-        result = idx1 != idx1
-        exp = np.array([False, False, True, False])
-        tm.assert_numpy_array_equal(result, exp)
-
-    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
-    def test_pi_cmp_nat_mismatched_freq_raises(self, freq):
-        idx1 = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-05'], freq=freq)
-
-        diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M')
-        msg = "Input has different freq=4M from PeriodIndex"
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            idx1 > diff
-
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            idx1 == diff
-
-    # TODO: De-duplicate with test_pi_cmp_nat
-    @pytest.mark.parametrize('dtype', [object, None])
-    def test_comp_nat(self, dtype):
-        left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT,
-                               pd.Period('2011-01-03')])
-        right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')])
-
-        if dtype is not None:
-            left = left.astype(dtype)
-            right = right.astype(dtype)
-
-        result = left == right
-        expected = np.array([False, False, True])
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = left != right
-        expected = np.array([True, True, False])
-        tm.assert_numpy_array_equal(result, expected)
-
-        expected = np.array([False, False, False])
-        tm.assert_numpy_array_equal(left == pd.NaT, expected)
-        tm.assert_numpy_array_equal(pd.NaT == right, expected)
-
-        expected = np.array([True, True, True])
-        tm.assert_numpy_array_equal(left != pd.NaT, expected)
-        tm.assert_numpy_array_equal(pd.NaT != left, expected)
-
-        expected = np.array([False, False, False])
-        tm.assert_numpy_array_equal(left < pd.NaT, expected)
-        tm.assert_numpy_array_equal(pd.NaT > left, expected)
+from pandas import PeriodIndex
 
 
 class TestPeriodIndexArithmetic(object):
-    # ---------------------------------------------------------------
-    # __add__/__sub__ with PeriodIndex
-    # PeriodIndex + other is defined for integers and timedelta-like others
-    # PeriodIndex - other is defined for integers, timedelta-like others,
-    #   and PeriodIndex (with matching freq)
-
-    def test_pi_add_iadd_pi_raises(self):
-        rng = pd.period_range('1/1/2000', freq='D', periods=5)
-        other = pd.period_range('1/6/2000', freq='D', periods=5)
-
-        # An earlier implementation of PeriodIndex addition performed
-        # a set operation (union).  This has since been changed to
-        # raise a TypeError. See GH#14164 and GH#13077 for historical
-        # reference.
-        with pytest.raises(TypeError):
-            rng + other
-
-        with pytest.raises(TypeError):
-            rng += other
-
-    def test_pi_sub_isub_pi(self):
-        # GH#20049
-        # For historical reference see GH#14164, GH#13077.
-        # PeriodIndex subtraction originally performed set difference,
-        # then changed to raise TypeError before being implemented in GH#20049
-        rng = pd.period_range('1/1/2000', freq='D', periods=5)
-        other = pd.period_range('1/6/2000', freq='D', periods=5)
-
-        off = rng.freq
-        expected = pd.Index([-5 * off] * 5)
-        result = rng - other
-        tm.assert_index_equal(result, expected)
-
-        rng -= other
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_sub_pi_with_nat(self):
-        rng = pd.period_range('1/1/2000', freq='D', periods=5)
-        other = rng[1:].insert(0, pd.NaT)
-        assert other[1:].equals(rng[1:])
-
-        result = rng - other
-        off = rng.freq
-        expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off])
-        tm.assert_index_equal(result, expected)
-
-    def test_pi_sub_pi_mismatched_freq(self):
-        rng = pd.period_range('1/1/2000', freq='D', periods=5)
-        other = pd.period_range('1/6/2000', freq='H', periods=5)
-        with pytest.raises(period.IncompatibleFrequency):
-            rng - other
-
-    # -------------------------------------------------------------
-    # Invalid Operations
-
-    @pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])])
-    @pytest.mark.parametrize('op', [operator.add, ops.radd,
-                                    operator.sub, ops.rsub])
-    def test_pi_add_sub_float(self, op, other):
-        dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
-        pi = dti.to_period('D')
-        with pytest.raises(TypeError):
-            op(pi, other)
-
-    # -----------------------------------------------------------------
-    # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64]
-
-    def test_pi_add_sub_dt64_array_raises(self):
-        rng = pd.period_range('1/1/2000', freq='D', periods=3)
-        dti = pd.date_range('2016-01-01', periods=3)
-        dtarr = dti.values
-
-        with pytest.raises(TypeError):
-            rng + dtarr
-        with pytest.raises(TypeError):
-            dtarr + rng
-
-        with pytest.raises(TypeError):
-            rng - dtarr
-        with pytest.raises(TypeError):
-            dtarr - rng
-
-    def test_pi_add_sub_td64_array_non_tick_raises(self):
-        rng = pd.period_range('1/1/2000', freq='Q', periods=3)
-        dti = pd.date_range('2016-01-01', periods=3)
-        tdi = dti - dti.shift(1)
-        tdarr = tdi.values
-
-        with pytest.raises(period.IncompatibleFrequency):
-            rng + tdarr
-        with pytest.raises(period.IncompatibleFrequency):
-            tdarr + rng
-
-        with pytest.raises(period.IncompatibleFrequency):
-            rng - tdarr
-        with pytest.raises(period.IncompatibleFrequency):
-            tdarr - rng
-
-    @pytest.mark.xfail(reason='op with TimedeltaIndex raises, with ndarray OK',
-                       strict=True)
-    def test_pi_add_sub_td64_array_tick(self):
-        rng = pd.period_range('1/1/2000', freq='Q', periods=3)
-        dti = pd.date_range('2016-01-01', periods=3)
-        tdi = dti - dti.shift(1)
-        tdarr = tdi.values
-
-        expected = rng + tdi
-        result = rng + tdarr
-        tm.assert_index_equal(result, expected)
-        result = tdarr + rng
-        tm.assert_index_equal(result, expected)
-
-        expected = rng - tdi
-        result = rng - tdarr
-        tm.assert_index_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            tdarr - rng
-
-    # -----------------------------------------------------------------
-    # operations with array/Index of DateOffset objects
-
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    def test_pi_add_offset_array(self, box):
-        # GH#18849
-        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
-        offs = box([pd.offsets.QuarterEnd(n=1, startingMonth=12),
-                    pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
-        expected = pd.PeriodIndex([pd.Period('2015Q2'), pd.Period('2015Q4')])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = pi + offs
-        tm.assert_index_equal(res, expected)
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res2 = offs + pi
-        tm.assert_index_equal(res2, expected)
-
-        unanchored = np.array([pd.offsets.Hour(n=1),
-                               pd.offsets.Minute(n=-2)])
-        # addition/subtraction ops with incompatible offsets should issue
-        # a PerformanceWarning and _then_ raise a TypeError.
-        with pytest.raises(period.IncompatibleFrequency):
-            with tm.assert_produces_warning(PerformanceWarning):
-                pi + unanchored
-        with pytest.raises(period.IncompatibleFrequency):
-            with tm.assert_produces_warning(PerformanceWarning):
-                unanchored + pi
-
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    def test_pi_sub_offset_array(self, box):
-        # GH#18824
-        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('2016Q2')])
-        other = box([pd.offsets.QuarterEnd(n=1, startingMonth=12),
-                     pd.offsets.QuarterEnd(n=-2, startingMonth=12)])
-
-        expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))])
-
-        with tm.assert_produces_warning(PerformanceWarning):
-            res = pi - other
-        tm.assert_index_equal(res, expected)
-
-        anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
-
-        # addition/subtraction ops with anchored offsets should issue
-        # a PerformanceWarning and _then_ raise a TypeError.
-        with pytest.raises(period.IncompatibleFrequency):
-            with tm.assert_produces_warning(PerformanceWarning):
-                pi - anchored
-        with pytest.raises(period.IncompatibleFrequency):
-            with tm.assert_produces_warning(PerformanceWarning):
-                anchored - pi
-
-    def test_pi_add_iadd_int(self, one):
-        # Variants of `one` for #19012
-        rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
-        result = rng + one
-        expected = pd.period_range('2000-01-01 10:00', freq='H', periods=10)
-        tm.assert_index_equal(result, expected)
-        rng += one
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_sub_isub_int(self, one):
-        """
-        PeriodIndex.__sub__ and __isub__ with several representations of
-        the integer 1, e.g. int, long, np.int64, np.uint8, ...
-        """
-        rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
-        result = rng - one
-        expected = pd.period_range('2000-01-01 08:00', freq='H', periods=10)
-        tm.assert_index_equal(result, expected)
-        rng -= one
-        tm.assert_index_equal(rng, expected)
-
-    @pytest.mark.parametrize('five', [5, np.array(5, dtype=np.int64)])
-    def test_pi_sub_intlike(self, five):
-        rng = period_range('2007-01', periods=50)
-
-        result = rng - five
-        exp = rng + (-five)
-        tm.assert_index_equal(result, exp)
-
-    def test_pi_sub_isub_offset(self):
-        # offset
-        # DateOffset
-        rng = pd.period_range('2014', '2024', freq='A')
-        result = rng - pd.offsets.YearEnd(5)
-        expected = pd.period_range('2009', '2019', freq='A')
-        tm.assert_index_equal(result, expected)
-        rng -= pd.offsets.YearEnd(5)
-        tm.assert_index_equal(rng, expected)
-
-        rng = pd.period_range('2014-01', '2016-12', freq='M')
-        result = rng - pd.offsets.MonthEnd(5)
-        expected = pd.period_range('2013-08', '2016-07', freq='M')
-        tm.assert_index_equal(result, expected)
-
-        rng -= pd.offsets.MonthEnd(5)
-        tm.assert_index_equal(rng, expected)
-
-    # ---------------------------------------------------------------
-    # __add__/__sub__ with integer arrays
-
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    @pytest.mark.parametrize('op', [operator.add, ops.radd])
-    def test_pi_add_intarray(self, box, op):
-        # GH#19959
-        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('NaT')])
-        other = box([4, -1])
-        result = op(pi, other)
-        expected = pd.PeriodIndex([pd.Period('2016Q1'), pd.Period('NaT')])
-        tm.assert_index_equal(result, expected)
-
-    @pytest.mark.parametrize('box', [np.array, pd.Index])
-    def test_pi_sub_intarray(self, box):
-        # GH#19959
-        pi = pd.PeriodIndex([pd.Period('2015Q1'), pd.Period('NaT')])
-        other = box([4, -1])
-        result = pi - other
-        expected = pd.PeriodIndex([pd.Period('2014Q1'), pd.Period('NaT')])
-        tm.assert_index_equal(result, expected)
-
-        with pytest.raises(TypeError):
-            other - pi
-
-    # ---------------------------------------------------------------
-    # Timedelta-like (timedelta, timedelta64, Timedelta, Tick)
-    # TODO: Some of these are misnomers because of non-Tick DateOffsets
-
-    def test_pi_add_iadd_timedeltalike_daily(self, three_days):
-        # Tick
-        other = three_days
-        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
-        expected = pd.period_range('2014-05-04', '2014-05-18', freq='D')
-
-        result = rng + other
-        tm.assert_index_equal(result, expected)
-
-        rng += other
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_sub_isub_timedeltalike_daily(self, three_days):
-        # Tick-like 3 Days
-        other = three_days
-        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
-        expected = pd.period_range('2014-04-28', '2014-05-12', freq='D')
-
-        result = rng - other
-        tm.assert_index_equal(result, expected)
-
-        rng -= other
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_add_iadd_timedeltalike_freq_mismatch_daily(self, not_daily):
-        other = not_daily
-        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
-        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)'
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng + other
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng += other
-
-    def test_pi_sub_timedeltalike_freq_mismatch_daily(self, not_daily):
-        other = not_daily
-        rng = pd.period_range('2014-05-01', '2014-05-15', freq='D')
-        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)'
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng - other
-
-    def test_pi_add_iadd_timedeltalike_hourly(self, two_hours):
-        other = two_hours
-        rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H')
-        expected = pd.period_range('2014-01-01 12:00', '2014-01-05 12:00',
-                                   freq='H')
-
-        result = rng + other
-        tm.assert_index_equal(result, expected)
-
-        rng += other
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly):
-        other = not_hourly
-        rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H')
-        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)'
-
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng + other
-
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng += other
-
-    def test_pi_sub_isub_timedeltalike_hourly(self, two_hours):
-        other = two_hours
-        rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H')
-        expected = pd.period_range('2014-01-01 08:00', '2014-01-05 08:00',
-                                   freq='H')
-
-        result = rng - other
-        tm.assert_index_equal(result, expected)
-
-        rng -= other
-        tm.assert_index_equal(rng, expected)
-
-    def test_add_iadd_timedeltalike_annual(self):
-        # offset
-        # DateOffset
-        rng = pd.period_range('2014', '2024', freq='A')
-        result = rng + pd.offsets.YearEnd(5)
-        expected = pd.period_range('2019', '2029', freq='A')
-        tm.assert_index_equal(result, expected)
-        rng += pd.offsets.YearEnd(5)
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, mismatched):
-        other = mismatched
-        rng = pd.period_range('2014', '2024', freq='A')
-        msg = ('Input has different freq(=.+)? '
-               'from PeriodIndex\\(freq=A-DEC\\)')
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng + other
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng += other
-
-    def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, mismatched):
-        other = mismatched
-        rng = pd.period_range('2014', '2024', freq='A')
-        msg = ('Input has different freq(=.+)? '
-               'from PeriodIndex\\(freq=A-DEC\\)')
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng - other
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng -= other
-
-    def test_pi_add_iadd_timedeltalike_M(self):
-        rng = pd.period_range('2014-01', '2016-12', freq='M')
-        expected = pd.period_range('2014-06', '2017-05', freq='M')
-
-        result = rng + pd.offsets.MonthEnd(5)
-        tm.assert_index_equal(result, expected)
-
-        rng += pd.offsets.MonthEnd(5)
-        tm.assert_index_equal(rng, expected)
-
-    def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched):
-        other = mismatched
-        rng = pd.period_range('2014-01', '2016-12', freq='M')
-        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)'
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng + other
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng += other
-
-    def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, mismatched):
-        other = mismatched
-        rng = pd.period_range('2014-01', '2016-12', freq='M')
-        msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)'
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng - other
-        with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-            rng -= other
-
     # ---------------------------------------------------------------
     # PeriodIndex.shift is used by __add__ and __sub__
 
@@ -728,215 +97,3 @@ def test_shift_gh8083(self):
         expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04',
                                 '2013-01-05', '2013-01-06'], freq='D')
         tm.assert_index_equal(result, expected)
-
-
-class TestPeriodIndexSeriesMethods(object):
-    """ Test PeriodIndex and Period Series Ops consistency """
-
-    def _check(self, values, func, expected):
-        idx = pd.PeriodIndex(values)
-        result = func(idx)
-        if isinstance(expected, pd.Index):
-            tm.assert_index_equal(result, expected)
-        else:
-            # comp op results in bool
-            tm.assert_numpy_array_equal(result, expected)
-
-        ser = pd.Series(values)
-        result = func(ser)
-
-        exp = pd.Series(expected, name=values.name)
-        tm.assert_series_equal(result, exp)
-
-    def test_pi_ops(self):
-        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
-                          freq='M', name='idx')
-
-        expected = PeriodIndex(['2011-03', '2011-04', '2011-05', '2011-06'],
-                               freq='M', name='idx')
-        self._check(idx, lambda x: x + 2, expected)
-        self._check(idx, lambda x: 2 + x, expected)
-
-        self._check(idx + 2, lambda x: x - 2, idx)
-        result = idx - Period('2011-01', freq='M')
-        off = idx.freq
-        exp = pd.Index([0 * off, 1 * off, 2 * off, 3 * off], name='idx')
-        tm.assert_index_equal(result, exp)
-
-        result = Period('2011-01', freq='M') - idx
-        exp = pd.Index([0 * off, -1 * off, -2 * off, -3 * off], name='idx')
-        tm.assert_index_equal(result, exp)
-
-    @pytest.mark.parametrize('ng', ["str", 1.5])
-    def test_pi_ops_errors(self, ng):
-        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
-                          freq='M', name='idx')
-        ser = pd.Series(idx)
-
-        msg = r"unsupported operand type\(s\)"
-
-        for obj in [idx, ser]:
-            with tm.assert_raises_regex(TypeError, msg):
-                obj + ng
-
-            with pytest.raises(TypeError):
-                # error message differs between PY2 and 3
-                ng + obj
-
-            with tm.assert_raises_regex(TypeError, msg):
-                obj - ng
-
-            with pytest.raises(TypeError):
-                np.add(obj, ng)
-
-            if _np_version_under1p10:
-                assert np.add(ng, obj) is NotImplemented
-            else:
-                with pytest.raises(TypeError):
-                    np.add(ng, obj)
-
-            with pytest.raises(TypeError):
-                np.subtract(obj, ng)
-
-            if _np_version_under1p10:
-                assert np.subtract(ng, obj) is NotImplemented
-            else:
-                with pytest.raises(TypeError):
-                    np.subtract(ng, obj)
-
-    def test_pi_ops_nat(self):
-        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
-                          freq='M', name='idx')
-        expected = PeriodIndex(['2011-03', '2011-04', 'NaT', '2011-06'],
-                               freq='M', name='idx')
-        self._check(idx, lambda x: x + 2, expected)
-        self._check(idx, lambda x: 2 + x, expected)
-        self._check(idx, lambda x: np.add(x, 2), expected)
-
-        self._check(idx + 2, lambda x: x - 2, idx)
-        self._check(idx + 2, lambda x: np.subtract(x, 2), idx)
-
-        # freq with mult
-        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
-                          freq='2M', name='idx')
-        expected = PeriodIndex(['2011-07', '2011-08', 'NaT', '2011-10'],
-                               freq='2M', name='idx')
-        self._check(idx, lambda x: x + 3, expected)
-        self._check(idx, lambda x: 3 + x, expected)
-        self._check(idx, lambda x: np.add(x, 3), expected)
-
-        self._check(idx + 3, lambda x: x - 3, idx)
-        self._check(idx + 3, lambda x: np.subtract(x, 3), idx)
-
-    def test_pi_ops_array_int(self):
-        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
-                          freq='M', name='idx')
-        f = lambda x: x + np.array([1, 2, 3, 4])
-        exp = PeriodIndex(['2011-02', '2011-04', 'NaT', '2011-08'],
-                          freq='M', name='idx')
-        self._check(idx, f, exp)
-
-        f = lambda x: np.add(x, np.array([4, -1, 1, 2]))
-        exp = PeriodIndex(['2011-05', '2011-01', 'NaT', '2011-06'],
-                          freq='M', name='idx')
-        self._check(idx, f, exp)
-
-        f = lambda x: x - np.array([1, 2, 3, 4])
-        exp = PeriodIndex(['2010-12', '2010-12', 'NaT', '2010-12'],
-                          freq='M', name='idx')
-        self._check(idx, f, exp)
-
-        f = lambda x: np.subtract(x, np.array([3, 2, 3, -2]))
-        exp = PeriodIndex(['2010-10', '2010-12', 'NaT', '2011-06'],
-                          freq='M', name='idx')
-        self._check(idx, f, exp)
-
-    def test_pi_ops_offset(self):
-        idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01',
-                           '2011-04-01'], freq='D', name='idx')
-        f = lambda x: x + pd.offsets.Day()
-        exp = PeriodIndex(['2011-01-02', '2011-02-02', '2011-03-02',
-                           '2011-04-02'], freq='D', name='idx')
-        self._check(idx, f, exp)
-
-        f = lambda x: x + pd.offsets.Day(2)
-        exp = PeriodIndex(['2011-01-03', '2011-02-03', '2011-03-03',
-                           '2011-04-03'], freq='D', name='idx')
-        self._check(idx, f, exp)
-
-        f = lambda x: x - pd.offsets.Day(2)
-        exp = PeriodIndex(['2010-12-30', '2011-01-30', '2011-02-27',
-                           '2011-03-30'], freq='D', name='idx')
-        self._check(idx, f, exp)
-
-    def test_pi_offset_errors(self):
-        idx = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01',
-                           '2011-04-01'], freq='D', name='idx')
-        ser = pd.Series(idx)
-
-        # Series op is applied per Period instance, thus error is raised
-        # from Period
-        msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)"
-        msg_s = r"Input cannot be converted to Period\(freq=D\)"
-        for obj, msg in [(idx, msg_idx), (ser, msg_s)]:
-            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-                obj + pd.offsets.Hour(2)
-
-            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-                pd.offsets.Hour(2) + obj
-
-            with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
-                obj - pd.offsets.Hour(2)
-
-    def test_pi_sub_period(self):
-        # GH 13071
-        idx = PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
-                          freq='M', name='idx')
-
-        result = idx - pd.Period('2012-01', freq='M')
-        off = idx.freq
-        exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name='idx')
-        tm.assert_index_equal(result, exp)
-
-        result = np.subtract(idx, pd.Period('2012-01', freq='M'))
-        tm.assert_index_equal(result, exp)
-
-        result = pd.Period('2012-01', freq='M') - idx
-        exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name='idx')
-        tm.assert_index_equal(result, exp)
-
-        result = np.subtract(pd.Period('2012-01', freq='M'), idx)
-        if _np_version_under1p10:
-            assert result is NotImplemented
-        else:
-            tm.assert_index_equal(result, exp)
-
-        exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx')
-        tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp)
-        tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp)
-
-    def test_pi_sub_pdnat(self):
-        # GH 13071
-        idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'],
-                          freq='M', name='idx')
-        exp = pd.TimedeltaIndex([pd.NaT] * 4, name='idx')
-        tm.assert_index_equal(pd.NaT - idx, exp)
-        tm.assert_index_equal(idx - pd.NaT, exp)
-
-    def test_pi_sub_period_nat(self):
-        # GH 13071
-        idx = PeriodIndex(['2011-01', 'NaT', '2011-03', '2011-04'],
-                          freq='M', name='idx')
-
-        result = idx - pd.Period('2012-01', freq='M')
-        off = idx.freq
-        exp = pd.Index([-12 * off, pd.NaT, -10 * off, -9 * off], name='idx')
-        tm.assert_index_equal(result, exp)
-
-        result = pd.Period('2012-01', freq='M') - idx
-        exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name='idx')
-        tm.assert_index_equal(result, exp)
-
-        exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx')
-        tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp)
-        tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp)
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index 1413e01000ec5..9ec9cbd8e1b89 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -1,13 +1,12 @@
 # -*- coding: utf-8 -*-
-from datetime import datetime, timedelta
+from datetime import timedelta
 import operator
 from decimal import Decimal
 
 import numpy as np
 import pytest
 
-from pandas import Series, Timestamp, Period
-from pandas._libs.tslibs.period import IncompatibleFrequency
+from pandas import Series, Timestamp
 
 import pandas as pd
 import pandas.util.testing as tm
@@ -79,132 +78,6 @@ def test_ser_cmp_result_names(self, names, op):
             assert result.name == names[2]
 
 
-class TestTimestampSeriesComparison(object):
-    def test_dt64_ser_cmp_date_warning(self):
-        # https://github.com/pandas-dev/pandas/issues/21359
-        # Remove this test and enble invalid test below
-        ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
-        date = ser.iloc[0].to_pydatetime().date()
-
-        with tm.assert_produces_warning(FutureWarning) as m:
-            result = ser == date
-        expected = pd.Series([True] + [False] * 9, name='dates')
-        tm.assert_series_equal(result, expected)
-        assert "Comparing Series of datetimes " in str(m[0].message)
-        assert "will not compare equal" in str(m[0].message)
-
-        with tm.assert_produces_warning(FutureWarning) as m:
-            result = ser != date
-        tm.assert_series_equal(result, ~expected)
-        assert "will not compare equal" in str(m[0].message)
-
-        with tm.assert_produces_warning(FutureWarning) as m:
-            result = ser <= date
-        tm.assert_series_equal(result, expected)
-        assert "a TypeError will be raised" in str(m[0].message)
-
-        with tm.assert_produces_warning(FutureWarning) as m:
-            result = ser < date
-        tm.assert_series_equal(result, pd.Series([False] * 10, name='dates'))
-        assert "a TypeError will be raised" in str(m[0].message)
-
-        with tm.assert_produces_warning(FutureWarning) as m:
-            result = ser >= date
-        tm.assert_series_equal(result, pd.Series([True] * 10, name='dates'))
-        assert "a TypeError will be raised" in str(m[0].message)
-
-        with tm.assert_produces_warning(FutureWarning) as m:
-            result = ser > date
-        tm.assert_series_equal(result, pd.Series([False] + [True] * 9,
-                                                 name='dates'))
-        assert "a TypeError will be raised" in str(m[0].message)
-
-    @pytest.mark.skip(reason="GH-21359")
-    def test_dt64ser_cmp_date_invalid(self):
-        # GH#19800 datetime.date comparison raises to
-        # match DatetimeIndex/Timestamp.  This also matches the behavior
-        # of stdlib datetime.datetime
-        ser = pd.Series(pd.date_range('20010101', periods=10), name='dates')
-        date = ser.iloc[0].to_pydatetime().date()
-        assert not (ser == date).any()
-        assert (ser != date).all()
-        with pytest.raises(TypeError):
-            ser > date
-        with pytest.raises(TypeError):
-            ser < date
-        with pytest.raises(TypeError):
-            ser >= date
-        with pytest.raises(TypeError):
-            ser <= date
-
-    def test_dt64ser_cmp_period_scalar(self):
-        ser = Series(pd.period_range('2000-01-01', periods=10, freq='D'))
-        val = Period('2000-01-04', freq='D')
-        result = ser > val
-        expected = Series([x > val for x in ser])
-        tm.assert_series_equal(result, expected)
-
-        val = ser[5]
-        result = ser > val
-        expected = Series([x > val for x in ser])
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("left,right", [
-        ("lt", "gt"),
-        ("le", "ge"),
-        ("eq", "eq"),
-        ("ne", "ne"),
-    ])
-    def test_timestamp_compare_series(self, left, right):
-        # see gh-4982
-        # Make sure we can compare Timestamps on the right AND left hand side.
-        ser = pd.Series(pd.date_range("20010101", periods=10), name="dates")
-        s_nat = ser.copy(deep=True)
-
-        ser[0] = pd.Timestamp("nat")
-        ser[3] = pd.Timestamp("nat")
-
-        left_f = getattr(operator, left)
-        right_f = getattr(operator, right)
-
-        # No NaT
-        expected = left_f(ser, pd.Timestamp("20010109"))
-        result = right_f(pd.Timestamp("20010109"), ser)
-        tm.assert_series_equal(result, expected)
-
-        # NaT
-        expected = left_f(ser, pd.Timestamp("nat"))
-        result = right_f(pd.Timestamp("nat"), ser)
-        tm.assert_series_equal(result, expected)
-
-        # Compare to Timestamp with series containing NaT
-        expected = left_f(s_nat, pd.Timestamp("20010109"))
-        result = right_f(pd.Timestamp("20010109"), s_nat)
-        tm.assert_series_equal(result, expected)
-
-        # Compare to NaT with series containing NaT
-        expected = left_f(s_nat, pd.Timestamp("nat"))
-        result = right_f(pd.Timestamp("nat"), s_nat)
-        tm.assert_series_equal(result, expected)
-
-    def test_timestamp_equality(self):
-        # GH#11034
-        ser = pd.Series([pd.Timestamp('2000-01-29 01:59:00'), 'NaT'])
-        result = ser != ser
-        tm.assert_series_equal(result, pd.Series([False, True]))
-        result = ser != ser[0]
-        tm.assert_series_equal(result, pd.Series([False, True]))
-        result = ser != ser[1]
-        tm.assert_series_equal(result, pd.Series([True, True]))
-
-        result = ser == ser
-        tm.assert_series_equal(result, pd.Series([True, False]))
-        result = ser == ser[0]
-        tm.assert_series_equal(result, pd.Series([True, False]))
-        result = ser == ser[1]
-        tm.assert_series_equal(result, pd.Series([False, False]))
-
-
 class TestTimedeltaSeriesComparisons(object):
     def test_compare_timedelta_series(self):
         # regresssion test for GH5963
@@ -214,112 +87,6 @@ def test_compare_timedelta_series(self):
         tm.assert_series_equal(actual, expected)
 
 
-class TestPeriodSeriesComparisons(object):
-    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
-    def test_cmp_series_period_scalar(self, freq):
-        # GH 13200
-        base = Series([Period(x, freq=freq) for x in
-                       ['2011-01', '2011-02', '2011-03', '2011-04']])
-        p = Period('2011-02', freq=freq)
-
-        exp = Series([False, True, False, False])
-        tm.assert_series_equal(base == p, exp)
-        tm.assert_series_equal(p == base, exp)
-
-        exp = Series([True, False, True, True])
-        tm.assert_series_equal(base != p, exp)
-        tm.assert_series_equal(p != base, exp)
-
-        exp = Series([False, False, True, True])
-        tm.assert_series_equal(base > p, exp)
-        tm.assert_series_equal(p < base, exp)
-
-        exp = Series([True, False, False, False])
-        tm.assert_series_equal(base < p, exp)
-        tm.assert_series_equal(p > base, exp)
-
-        exp = Series([False, True, True, True])
-        tm.assert_series_equal(base >= p, exp)
-        tm.assert_series_equal(p <= base, exp)
-
-        exp = Series([True, True, False, False])
-        tm.assert_series_equal(base <= p, exp)
-        tm.assert_series_equal(p >= base, exp)
-
-        # different base freq
-        msg = "Input has different freq=A-DEC from Period"
-        with tm.assert_raises_regex(IncompatibleFrequency, msg):
-            base <= Period('2011', freq='A')
-
-        with tm.assert_raises_regex(IncompatibleFrequency, msg):
-            Period('2011', freq='A') >= base
-
-    @pytest.mark.parametrize('freq', ['M', '2M', '3M'])
-    def test_cmp_series_period_series(self, freq):
-        # GH#13200
-        base = Series([Period(x, freq=freq) for x in
-                       ['2011-01', '2011-02', '2011-03', '2011-04']])
-
-        ser = Series([Period(x, freq=freq) for x in
-                      ['2011-02', '2011-01', '2011-03', '2011-05']])
-
-        exp = Series([False, False, True, False])
-        tm.assert_series_equal(base == ser, exp)
-
-        exp = Series([True, True, False, True])
-        tm.assert_series_equal(base != ser, exp)
-
-        exp = Series([False, True, False, False])
-        tm.assert_series_equal(base > ser, exp)
-
-        exp = Series([True, False, False, True])
-        tm.assert_series_equal(base < ser, exp)
-
-        exp = Series([False, True, True, False])
-        tm.assert_series_equal(base >= ser, exp)
-
-        exp = Series([True, False, True, True])
-        tm.assert_series_equal(base <= ser, exp)
-
-        ser2 = Series([Period(x, freq='A') for x in
-                       ['2011', '2011', '2011', '2011']])
-
-        # different base freq
-        msg = "Input has different freq=A-DEC from Period"
-        with tm.assert_raises_regex(IncompatibleFrequency, msg):
-            base <= ser2
-
-    def test_cmp_series_period_series_mixed_freq(self):
-        # GH#13200
-        base = Series([Period('2011', freq='A'),
-                       Period('2011-02', freq='M'),
-                       Period('2013', freq='A'),
-                       Period('2011-04', freq='M')])
-
-        ser = Series([Period('2012', freq='A'),
-                      Period('2011-01', freq='M'),
-                      Period('2013', freq='A'),
-                      Period('2011-05', freq='M')])
-
-        exp = Series([False, False, True, False])
-        tm.assert_series_equal(base == ser, exp)
-
-        exp = Series([True, True, False, True])
-        tm.assert_series_equal(base != ser, exp)
-
-        exp = Series([False, True, False, False])
-        tm.assert_series_equal(base > ser, exp)
-
-        exp = Series([True, False, False, True])
-        tm.assert_series_equal(base < ser, exp)
-
-        exp = Series([False, True, True, False])
-        tm.assert_series_equal(base >= ser, exp)
-
-        exp = Series([True, False, True, True])
-        tm.assert_series_equal(base <= ser, exp)
-
-
 # ------------------------------------------------------------------
 # Arithmetic
 
@@ -423,66 +190,3 @@ def test_series_with_dtype_radd_timedelta(self, dtype):
 
         result = ser + pd.Timedelta('3 days')
         tm.assert_series_equal(result, expected)
-
-
-class TestPeriodSeriesArithmetic(object):
-    def test_ops_series_timedelta(self):
-        # GH 13043
-        ser = pd.Series([pd.Period('2015-01-01', freq='D'),
-                         pd.Period('2015-01-02', freq='D')], name='xxx')
-        assert ser.dtype == object
-
-        expected = pd.Series([pd.Period('2015-01-02', freq='D'),
-                              pd.Period('2015-01-03', freq='D')], name='xxx')
-
-        result = ser + pd.Timedelta('1 days')
-        tm.assert_series_equal(result, expected)
-
-        result = pd.Timedelta('1 days') + ser
-        tm.assert_series_equal(result, expected)
-
-        result = ser + pd.tseries.offsets.Day()
-        tm.assert_series_equal(result, expected)
-
-        result = pd.tseries.offsets.Day() + ser
-        tm.assert_series_equal(result, expected)
-
-    def test_ops_series_period(self):
-        # GH 13043
-        ser = pd.Series([pd.Period('2015-01-01', freq='D'),
-                         pd.Period('2015-01-02', freq='D')], name='xxx')
-        assert ser.dtype == object
-
-        per = pd.Period('2015-01-10', freq='D')
-        off = per.freq
-        # dtype will be object because of original dtype
-        expected = pd.Series([9 * off, 8 * off], name='xxx', dtype=object)
-        tm.assert_series_equal(per - ser, expected)
-        tm.assert_series_equal(ser - per, -1 * expected)
-
-        s2 = pd.Series([pd.Period('2015-01-05', freq='D'),
-                        pd.Period('2015-01-04', freq='D')], name='xxx')
-        assert s2.dtype == object
-
-        expected = pd.Series([4 * off, 2 * off], name='xxx', dtype=object)
-        tm.assert_series_equal(s2 - ser, expected)
-        tm.assert_series_equal(ser - s2, -1 * expected)
-
-
-class TestTimestampSeriesArithmetic(object):
-    def test_timestamp_sub_series(self):
-        ser = pd.Series(pd.date_range('2014-03-17', periods=2, freq='D',
-                                      tz='US/Eastern'))
-        ts = ser[0]
-
-        delta_series = pd.Series([np.timedelta64(0, 'D'),
-                                  np.timedelta64(1, 'D')])
-        tm.assert_series_equal(ser - ts, delta_series)
-        tm.assert_series_equal(ts - ser, -delta_series)
-
-    def test_dt64ser_sub_datetime_dtype(self):
-        ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00))
-        dt = datetime(1993, 6, 22, 13, 30)
-        ser = Series([ts])
-        result = pd.to_timedelta(np.abs(ser - dt))
-        assert result.dtype == 'timedelta64[ns]'