diff --git a/pandas/tests/indexes/datetimes/methods/test_factorize.py b/pandas/tests/indexes/datetimes/methods/test_factorize.py new file mode 100644 index 0000000000000..6e095e29e47cd --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_factorize.py @@ -0,0 +1,102 @@ +import numpy as np + +from pandas import DatetimeIndex, Index, date_range, factorize +import pandas._testing as tm + + +class TestDatetimeIndexFactorize: + def test_factorize(self): + idx1 = DatetimeIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] + ) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + # tz must be preserved + idx1 = idx1.tz_localize("Asia/Tokyo") + exp_idx = exp_idx.tz_localize("Asia/Tokyo") + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + idx2 = DatetimeIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] + ) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) + arr, idx = idx2.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) + arr, idx = idx2.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + def test_factorize_preserves_freq(self): + # GH#38120 freq should be preserved + idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + arr, idx = factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + def test_factorize_tz(self, tz_naive_fixture, index_or_series): + tz = tz_naive_fixture + # GH#13750 + base = date_range("2016-11-05", freq="H", periods=100, tz=tz) + idx = base.repeat(5) + + exp_arr = np.arange(100, dtype=np.intp).repeat(5) + + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + expected = base._with_freq(None) + tm.assert_index_equal(res, expected) + assert res.freq == expected.freq + + def test_factorize_dst(self, index_or_series): + # GH#13750 + idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq + + idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) + + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 077c3ebc56ac4..c4684e857f706 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -200,102 +200,6 @@ def test_ns_index(self): new_index = date_range(start=index[0], end=index[-1], freq=index.freq) self.assert_index_parameters(new_index) - def test_factorize(self): - idx1 = DatetimeIndex( - ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] - ) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) - - arr, idx = idx1.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - arr, idx = idx1.factorize(sort=True) - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - # tz must be preserved - idx1 = idx1.tz_localize("Asia/Tokyo") - exp_idx = exp_idx.tz_localize("Asia/Tokyo") - - arr, idx = idx1.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - idx2 = DatetimeIndex( - ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] - ) - - exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) - exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) - arr, idx = idx2.factorize(sort=True) - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) - exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) - arr, idx = idx2.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - def test_factorize_preserves_freq(self): - # GH#38120 freq should be preserved - idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - - arr, idx = idx3.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, idx3) - assert idx.freq == idx3.freq - - arr, idx = pd.factorize(idx3) - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, idx3) - assert idx.freq == idx3.freq - - def test_factorize_tz(self, tz_naive_fixture, index_or_series): - tz = tz_naive_fixture - # GH#13750 - base = date_range("2016-11-05", freq="H", periods=100, tz=tz) - idx = base.repeat(5) - - exp_arr = np.arange(100, dtype=np.intp).repeat(5) - - obj = index_or_series(idx) - - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - expected = base._with_freq(None) - tm.assert_index_equal(res, expected) - assert res.freq == expected.freq - - def test_factorize_dst(self, index_or_series): - # GH 13750 - idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern") - obj = index_or_series(idx) - - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) - if index_or_series is Index: - assert res.freq == idx.freq - - idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern") - obj = index_or_series(idx) - - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) - if index_or_series is Index: - assert res.freq == idx.freq - @pytest.mark.parametrize( "arr, expected", [ diff --git a/pandas/tests/indexes/period/test_factorize.py b/pandas/tests/indexes/period/methods/test_factorize.py similarity index 100% rename from pandas/tests/indexes/period/test_factorize.py rename to pandas/tests/indexes/period/methods/test_factorize.py diff --git a/pandas/tests/indexes/timedeltas/methods/test_factorize.py b/pandas/tests/indexes/timedeltas/methods/test_factorize.py new file mode 100644 index 0000000000000..dcf8cefba70fd --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_factorize.py @@ -0,0 +1,36 @@ +import numpy as np + +from pandas import TimedeltaIndex, factorize, timedelta_range +import pandas._testing as tm + + +class TestTimedeltaIndexFactorize: + def test_factorize(self): + idx1 = TimedeltaIndex(["1 day", "1 day", "2 day", "2 day", "3 day", "3 day"]) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(["1 day", "2 day", "3 day"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq + + def test_factorize_preserves_freq(self): + # GH#38120 freq should be preserved + idx3 = timedelta_range("1 day", periods=4, freq="s") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + + arr, idx = factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 9c9fa08dd2852..a86cd8dd11c59 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -65,36 +65,6 @@ def test_isin(self): index.isin([index[2], 5]), np.array([False, False, True, False]) ) - def test_factorize(self): - idx1 = TimedeltaIndex(["1 day", "1 day", "2 day", "2 day", "3 day", "3 day"]) - - exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) - exp_idx = TimedeltaIndex(["1 day", "2 day", "3 day"]) - - arr, idx = idx1.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - arr, idx = idx1.factorize(sort=True) - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, exp_idx) - assert idx.freq == exp_idx.freq - - def test_factorize_preserves_freq(self): - # GH#38120 freq should be preserved - idx3 = timedelta_range("1 day", periods=4, freq="s") - exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) - arr, idx = idx3.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, idx3) - assert idx.freq == idx3.freq - - arr, idx = pd.factorize(idx3) - tm.assert_numpy_array_equal(arr, exp_arr) - tm.assert_index_equal(idx, idx3) - assert idx.freq == idx3.freq - def test_sort_values(self): idx = TimedeltaIndex(["4d", "1d", "2d"])