From a947f0f83b5e1e1bbe795d74c2882951a25ce008 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:33:38 -0800 Subject: [PATCH 1/8] parametrize --- .../tests/indexes/datetimes/test_indexing.py | 19 +-- .../tests/indexes/timedeltas/test_indexing.py | 131 +++++++++--------- 2 files changed, 79 insertions(+), 71 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 2f954117f48d7..1f07dcb931238 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -639,18 +639,19 @@ def test_get_value(self): result = dti.get_value(ser, key.to_datetime64()) assert result == 7 - def test_get_loc(self): + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc_method_exact_match(self, method): idx = pd.date_range("2000-01-01", periods=3) - for method in [None, "pad", "backfill", "nearest"]: - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 - if method is not None: - assert ( - idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 - ) + def test_get_loc(self): + idx = pd.date_range("2000-01-01", periods=3) assert idx.get_loc("2000-01-01", method="nearest") == 0 assert idx.get_loc("2000-01-01T12", method="nearest") == 1 diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 14fff6f9c85b5..6a4be9e538cb8 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -180,6 +180,75 @@ def test_take_fill_value(self): idx.take(np.array([1, -5])) +class TestGetLoc: + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc_method_exact_match(self, method): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + def test_get_loc(self): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + + assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc(idx[1], method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="tolerance size must match"): + idx.get_loc( + idx[1], + method="nearest", + tolerance=[ + Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64(), + ], + ) + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc("1 day 1 hour", method) == loc + + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + + # GH 16896 + assert idx.get_loc("0 days") == 0 + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) + + assert tidx.get_loc(pd.NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float("nan")) == 1 + assert tidx.get_loc(np.nan) == 1 + + +class TestGetIndexer: + def test_get_indexer(self): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) + + class TestTimedeltaIndex: def test_insert_empty(self): # Corner case inserting with length zero doesnt raise IndexError @@ -328,65 +397,3 @@ def test_delete_slice(self): tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq - - def test_get_loc(self): - idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) - - for method in [None, "pad", "backfill", "nearest"]: - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 - - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc(idx[1], method="nearest", tolerance="foo") - - with pytest.raises(ValueError, match="tolerance size must match"): - idx.get_loc( - idx[1], - method="nearest", - tolerance=[ - Timedelta(0).to_timedelta64(), - Timedelta(0).to_timedelta64(), - ], - ) - - for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: - assert idx.get_loc("1 day 1 hour", method) == loc - - # GH 16909 - assert idx.get_loc(idx[1].to_timedelta64()) == 1 - - # GH 16896 - assert idx.get_loc("0 days") == 0 - - def test_get_loc_nat(self): - tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) - - assert tidx.get_loc(pd.NaT) == 1 - assert tidx.get_loc(None) == 1 - assert tidx.get_loc(float("nan")) == 1 - assert tidx.get_loc(np.nan) == 1 - - def test_get_indexer(self): - idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) - tm.assert_numpy_array_equal( - idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) - ) - - target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) - ) - tm.assert_numpy_array_equal( - idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) - ) - tm.assert_numpy_array_equal( - idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) - ) - - res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) From f778c88cb08a92d9d0bbfdac8ca1302ddeab3cea Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:36:33 -0800 Subject: [PATCH 2/8] implement test_map --- .../indexes/categorical/test_category.py | 17 ------------- pandas/tests/indexes/categorical/test_map.py | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 17 deletions(-) create mode 100644 pandas/tests/indexes/categorical/test_map.py diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index d870259c2539b..d09dc586fe056 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -975,20 +975,3 @@ def test_engine_type(self, dtype, engine_type): ci.values._codes = ci.values._codes.astype("int64") assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type) - - @pytest.mark.parametrize( - "data, categories", - [ - (list("abcbca"), list("cab")), - (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), - ], - ids=["string", "interval"], - ) - def test_map_str(self, data, categories, ordered_fixture): - # GH 31202 - override base class since we want to maintain categorical/ordered - index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture) - result = index.map(str) - expected = CategoricalIndex( - map(str, data), categories=map(str, categories), ordered=ordered_fixture - ) - tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py new file mode 100644 index 0000000000000..eac78eeb234c7 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_map.py @@ -0,0 +1,24 @@ +import pytest + +import pandas as pd +from pandas import CategoricalIndex +import pandas._testing as tm + + +class TestMap: + @pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], + ) + def test_map_str(self, data, categories, ordered_fixture): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered_fixture + ) + tm.assert_index_equal(result, expected) From 49220d02ba427cb50ff02697a4940d475c01f37b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:39:05 -0800 Subject: [PATCH 3/8] move map tests --- .../indexes/categorical/test_category.py | 70 ------------------ .../indexes/categorical/test_indexing.py | 0 pandas/tests/indexes/categorical/test_map.py | 73 ++++++++++++++++++- .../tests/indexes/categorical/test_reindex.py | 0 4 files changed, 72 insertions(+), 71 deletions(-) create mode 100644 pandas/tests/indexes/categorical/test_indexing.py create mode 100644 pandas/tests/indexes/categorical/test_reindex.py diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index d09dc586fe056..f1be007cf9932 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -146,76 +146,6 @@ def test_contains_list(self): with pytest.raises(TypeError, match="unhashable type"): ["a", "b"] in idx - def test_map(self): - ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) - result = ci.map(lambda x: x.lower()) - exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) - tm.assert_index_equal(result, exp) - - ci = pd.CategoricalIndex( - list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" - ) - result = ci.map(lambda x: x.lower()) - exp = pd.CategoricalIndex( - list("ababc"), categories=list("bac"), ordered=False, name="XXX" - ) - tm.assert_index_equal(result, exp) - - # GH 12766: Return an index not an array - tm.assert_index_equal( - ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") - ) - - # change categories dtype - ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) - - def f(x): - return {"A": 10, "B": 20, "C": 30}.get(x) - - result = ci.map(f) - exp = pd.CategoricalIndex( - [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False - ) - tm.assert_index_equal(result, exp) - - result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"])) - tm.assert_index_equal(result, exp) - - result = ci.map({"A": 10, "B": 20, "C": 30}) - tm.assert_index_equal(result, exp) - - def test_map_with_categorical_series(self): - # GH 12756 - a = pd.Index([1, 2, 3, 4]) - b = pd.Series(["even", "odd", "even", "odd"], dtype="category") - c = pd.Series(["even", "odd", "even", "odd"]) - - exp = CategoricalIndex(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(b), exp) - exp = pd.Index(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(c), exp) - - @pytest.mark.parametrize( - ("data", "f"), - ( - ([1, 1, np.nan], pd.isna), - ([1, 2, np.nan], pd.isna), - ([1, 1, np.nan], {1: False}), - ([1, 2, np.nan], {1: False, 2: False}), - ([1, 1, np.nan], pd.Series([False, False])), - ([1, 2, np.nan], pd.Series([False, False, False])), - ), - ) - def test_map_with_nan(self, data, f): # GH 24241 - values = pd.Categorical(data) - result = values.map(f) - if data[1] == 1: - expected = pd.Categorical([False, False, np.nan]) - tm.assert_categorical_equal(result, expected) - else: - expected = pd.Index([False, False, np.nan]) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) def test_where(self, klass): i = self.create_index() diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py index eac78eeb234c7..943359a72e971 100644 --- a/pandas/tests/indexes/categorical/test_map.py +++ b/pandas/tests/indexes/categorical/test_map.py @@ -1,7 +1,8 @@ +import numpy as np import pytest import pandas as pd -from pandas import CategoricalIndex +from pandas import CategoricalIndex, Index import pandas._testing as tm @@ -22,3 +23,73 @@ def test_map_str(self, data, categories, ordered_fixture): map(str, data), categories=map(str, categories), ordered=ordered_fixture ) tm.assert_index_equal(result, expected) + + def test_map(self): + ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = pd.CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = pd.CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") + ) + + # change categories dtype + ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = pd.CategoricalIndex( + [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False + ) + tm.assert_index_equal(result, exp) + + result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = pd.Index([1, 2, 3, 4]) + b = pd.Series(["even", "odd", "even", "odd"], dtype="category") + c = pd.Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = pd.Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + @pytest.mark.parametrize( + ("data", "f"), + ( + ([1, 1, np.nan], pd.isna), + ([1, 2, np.nan], pd.isna), + ([1, 1, np.nan], {1: False}), + ([1, 2, np.nan], {1: False, 2: False}), + ([1, 1, np.nan], pd.Series([False, False])), + ([1, 2, np.nan], pd.Series([False, False, False])), + ), + ) + def test_map_with_nan(self, data, f): # GH 24241 + values = pd.Categorical(data) + result = values.map(f) + if data[1] == 1: + expected = pd.Categorical([False, False, np.nan]) + tm.assert_categorical_equal(result, expected) + else: + expected = pd.Index([False, False, np.nan]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py new file mode 100644 index 0000000000000..e69de29bb2d1d From a127ed9ba1ddb9b92700c1dbb8108c71f82b8cb8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:44:25 -0800 Subject: [PATCH 4/8] move tests --- .../indexes/categorical/test_category.py | 175 +----------------- .../indexes/categorical/test_indexing.py | 123 ++++++++++++ .../tests/indexes/categorical/test_reindex.py | 53 ++++++ 3 files changed, 186 insertions(+), 165 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index f1be007cf9932..2cd7167816011 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -314,7 +314,7 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_reindex_base(self): + def test_get_indexer_base(self): # Determined by cat ordering. idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) @@ -325,7 +325,7 @@ def test_reindex_base(self): with pytest.raises(ValueError, match="Invalid fill method"): idx.get_indexer(idx, method="invalid") - def test_reindexing(self): + def test_get_indexer_non_unique(self): np.random.seed(123456789) ci = self.create_index() @@ -350,53 +350,6 @@ def test_reindexing(self): actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) - def test_reindex_dtype(self): - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(["a", "c"]) - tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(Categorical(["a", "c"])) - - exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(["a", "c"]) - exp = Index(["a", "a", "c"], dtype="object") - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(Categorical(["a", "c"])) - exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - def test_reindex_duplicate_target(self): - # See GH25459 - cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) - res, indexer = cat.reindex(["a", "c", "c"]) - exp = Index(["a", "c", "c"], dtype="object") - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) - - res, indexer = cat.reindex( - CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) - ) - exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) - - def test_reindex_empty_index(self): - # See GH16770 - c = CategoricalIndex([]) - res, indexer = c.reindex(["a", "b"]) - tm.assert_index_equal(res, Index(["a", "b"]), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) - @pytest.mark.parametrize( "data, non_lexsorted_data", [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], @@ -767,122 +720,6 @@ def test_fillna_categorical(self): with pytest.raises(ValueError, match=msg): idx.fillna(2.0) - def test_take_fill_value(self): - # GH 12631 - - # numeric category - idx = pd.CategoricalIndex([1, 2, 3], name="xxx") - result = idx.take(np.array([1, 0, -1])) - expected = pd.CategoricalIndex([2, 1, 3], name="xxx") - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) - expected = pd.CategoricalIndex([2, 1, 3], name="xxx") - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # object category - idx = pd.CategoricalIndex( - list("CBA"), categories=list("ABC"), ordered=True, name="xxx" - ) - result = idx.take(np.array([1, 0, -1])) - expected = pd.CategoricalIndex( - list("BCA"), categories=list("ABC"), ordered=True, name="xxx" - ) - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.CategoricalIndex( - ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" - ) - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) - expected = pd.CategoricalIndex( - list("BCA"), categories=list("ABC"), ordered=True, name="xxx" - ) - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" - ) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_fill_value_datetime(self): - - # datetime category - idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") - idx = pd.CategoricalIndex(idx) - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex( - ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" - ) - expected = pd.CategoricalIndex(expected) - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") - exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) - expected = pd.CategoricalIndex(expected, categories=exp_cats) - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) - expected = pd.DatetimeIndex( - ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" - ) - expected = pd.CategoricalIndex(expected) - tm.assert_index_equal(result, expected) - - msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" - ) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_invalid_kwargs(self): - idx = pd.CategoricalIndex([1, 2, 3], name="foo") - indices = [1, 0, -1] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - with pytest.raises(TypeError, match=msg): - idx.take(indices, foo=2) - - msg = "the 'out' parameter is not supported" - with pytest.raises(ValueError, match=msg): - idx.take(indices, out=indices) - - msg = "the 'mode' parameter is not supported" - with pytest.raises(ValueError, match=msg): - idx.take(indices, mode="clip") - @pytest.mark.parametrize( "dtype, engine_type", [ @@ -905,3 +742,11 @@ def test_engine_type(self, dtype, engine_type): ci.values._codes = ci.values._codes.astype("int64") assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type) + + def test_reindex_base(self): + # See test_reindex.py + pass + + def test_map_str(self): + # See test_map.py + pass diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index e69de29bb2d1d..7e87425af2ad2 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestTake: + def test_take_fill_value(self): + # GH 12631 + + # numeric category + idx = pd.CategoricalIndex([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = pd.CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # object category + idx = pd.CategoricalIndex( + list("CBA"), categories=list("ABC"), ordered=True, name="xxx" + ) + result = idx.take(np.array([1, 0, -1])) + expected = pd.CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.CategoricalIndex( + ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_datetime(self): + + # datetime category + idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + idx = pd.CategoricalIndex(idx) + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = pd.CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) + expected = pd.CategoricalIndex(expected, categories=exp_cats) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = pd.CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_invalid_kwargs(self): + idx = pd.CategoricalIndex([1, 2, 3], name="foo") + indices = [1, 0, -1] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py index e69de29bb2d1d..f59ddc42ce4e4 100644 --- a/pandas/tests/indexes/categorical/test_reindex.py +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import Categorical, CategoricalIndex, Index +import pandas._testing as tm + + +class TestReindex: + def test_reindex_dtype(self): + c = CategoricalIndex(["a", "b", "c", "a"]) + res, indexer = c.reindex(["a", "c"]) + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"]) + res, indexer = c.reindex(Categorical(["a", "c"])) + + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + res, indexer = c.reindex(["a", "c"]) + exp = Index(["a", "a", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + res, indexer = c.reindex(Categorical(["a", "c"])) + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_duplicate_target(self): + # See GH25459 + cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) + res, indexer = cat.reindex(["a", "c", "c"]) + exp = Index(["a", "c", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + res, indexer = cat.reindex( + CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + ) + exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(["a", "b"]) + tm.assert_index_equal(res, Index(["a", "b"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) From 22e060fedef69f56788d6db56e0b55f730e8b78f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:45:38 -0800 Subject: [PATCH 5/8] Separate out get_loc --- .../indexes/categorical/test_category.py | 47 ----------------- .../indexes/categorical/test_indexing.py | 50 +++++++++++++++++++ 2 files changed, 50 insertions(+), 47 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 2cd7167816011..489d2b5190e7c 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -423,53 +423,6 @@ def test_get_indexer(self): with pytest.raises(NotImplementedError, match=msg): idx2.get_indexer(idx1, method="nearest") - def test_get_loc(self): - # GH 12531 - cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) - idx1 = Index(list("abcde")) - assert cidx1.get_loc("a") == idx1.get_loc("a") - assert cidx1.get_loc("e") == idx1.get_loc("e") - - for i in [cidx1, idx1]: - with pytest.raises(KeyError, match="'NOT-EXIST'"): - i.get_loc("NOT-EXIST") - - # non-unique - cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) - idx2 = Index(list("aacded")) - - # results in bool array - res = cidx2.get_loc("d") - tm.assert_numpy_array_equal(res, idx2.get_loc("d")) - tm.assert_numpy_array_equal( - res, np.array([False, False, False, True, False, True]) - ) - # unique element results in scalar - res = cidx2.get_loc("e") - assert res == idx2.get_loc("e") - assert res == 4 - - for i in [cidx2, idx2]: - with pytest.raises(KeyError, match="'NOT-EXIST'"): - i.get_loc("NOT-EXIST") - - # non-unique, sliceable - cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) - idx3 = Index(list("aabbb")) - - # results in slice - res = cidx3.get_loc("a") - assert res == idx3.get_loc("a") - assert res == slice(0, 2, None) - - res = cidx3.get_loc("b") - assert res == idx3.get_loc("b") - assert res == slice(2, 5, None) - - for i in [cidx3, idx3]: - with pytest.raises(KeyError, match="'c'"): - i.get_loc("c") - def test_repr_roundtrip(self): ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 7e87425af2ad2..194b716a33751 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +from pandas import CategoricalIndex, Index import pandas._testing as tm @@ -121,3 +122,52 @@ def test_take_invalid_kwargs(self): msg = "the 'mode' parameter is not supported" with pytest.raises(ValueError, match=msg): idx.take(indices, mode="clip") + + +class TestGetLoc: + def test_get_loc(self): + # GH 12531 + cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) + idx1 = Index(list("abcde")) + assert cidx1.get_loc("a") == idx1.get_loc("a") + assert cidx1.get_loc("e") == idx1.get_loc("e") + + for i in [cidx1, idx1]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique + cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) + idx2 = Index(list("aacded")) + + # results in bool array + res = cidx2.get_loc("d") + tm.assert_numpy_array_equal(res, idx2.get_loc("d")) + tm.assert_numpy_array_equal( + res, np.array([False, False, False, True, False, True]) + ) + # unique element results in scalar + res = cidx2.get_loc("e") + assert res == idx2.get_loc("e") + assert res == 4 + + for i in [cidx2, idx2]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique, sliceable + cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) + idx3 = Index(list("aabbb")) + + # results in slice + res = cidx3.get_loc("a") + assert res == idx3.get_loc("a") + assert res == slice(0, 2, None) + + res = cidx3.get_loc("b") + assert res == idx3.get_loc("b") + assert res == slice(2, 5, None) + + for i in [cidx3, idx3]: + with pytest.raises(KeyError, match="'c'"): + i.get_loc("c") From 6ed58697cd240b3ab55f5d8ddb051cd9bfe67eee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:48:35 -0800 Subject: [PATCH 6/8] move get_indexer tests --- .../indexes/categorical/test_category.py | 58 ------------------ .../indexes/categorical/test_indexing.py | 60 +++++++++++++++++++ 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 489d2b5190e7c..c18cd1f252c83 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -314,42 +314,6 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_get_indexer_base(self): - # Determined by cat ordering. - idx = CategoricalIndex(list("cab"), categories=list("cab")) - expected = np.arange(len(idx), dtype=np.intp) - - actual = idx.get_indexer(idx) - tm.assert_numpy_array_equal(expected, actual) - - with pytest.raises(ValueError, match="Invalid fill method"): - idx.get_indexer(idx, method="invalid") - - def test_get_indexer_non_unique(self): - np.random.seed(123456789) - - ci = self.create_index() - oidx = Index(np.array(ci)) - - for n in [1, 2, 5, len(ci)]: - finder = oidx[np.random.randint(0, len(ci), size=n)] - expected = oidx.get_indexer_non_unique(finder)[0] - - actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected, actual) - - # see gh-17323 - # - # Even when indexer is equal to the - # members in the index, we should - # respect duplicates instead of taking - # the fast-track path. - for finder in [list("aabbca"), list("aababca")]: - expected = oidx.get_indexer_non_unique(finder)[0] - - actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected, actual) - @pytest.mark.parametrize( "data, non_lexsorted_data", [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], @@ -401,28 +365,6 @@ def test_drop_duplicates(self): tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected) - def test_get_indexer(self): - - idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) - idx2 = CategoricalIndex(list("abf")) - - for indexer in [idx2, list("abf"), Index(list("abf"))]: - r1 = idx1.get_indexer(idx2) - tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) - - msg = ( - "method='pad' and method='backfill' not implemented yet for " - "CategoricalIndex" - ) - with pytest.raises(NotImplementedError, match=msg): - idx2.get_indexer(idx1, method="pad") - with pytest.raises(NotImplementedError, match=msg): - idx2.get_indexer(idx1, method="backfill") - - msg = "method='nearest' not implemented yet for CategoricalIndex" - with pytest.raises(NotImplementedError, match=msg): - idx2.get_indexer(idx1, method="nearest") - def test_repr_roundtrip(self): ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 194b716a33751..6fce6542d228e 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -171,3 +171,63 @@ def test_get_loc(self): for i in [cidx3, idx3]: with pytest.raises(KeyError, match="'c'"): i.get_loc("c") + + +class TestGetIndexer: + def test_get_indexer_base(self): + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) + expected = np.arange(len(idx), dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + def test_get_indexer_non_unique(self): + np.random.seed(123456789) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + oidx = Index(np.array(ci)) + + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + def test_get_indexer(self): + + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + for indexer in [idx2, list("abf"), Index(list("abf"))]: + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) + + msg = ( + "method='pad' and method='backfill' not implemented yet for " + "CategoricalIndex" + ) + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="pad") + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="backfill") + + msg = "method='nearest' not implemented yet for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="nearest") From 890ef881530507527c12b076370167cea91d675b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:53:16 -0800 Subject: [PATCH 7/8] implement TestGetLoc --- .../tests/indexes/datetimes/test_indexing.py | 218 +++++++++--------- 1 file changed, 110 insertions(+), 108 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 1f07dcb931238..de024d205e4aa 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -344,6 +344,116 @@ def test_take_fill_value_with_timezone(self): idx.take(np.array([1, -5])) +class TestGetLoc: + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc_method_exact_match(self, method): + idx = pd.date_range("2000-01-01", periods=3) + + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 + + def test_get_loc(self): + idx = pd.date_range("2000-01-01", periods=3) + + assert idx.get_loc("2000-01-01", method="nearest") == 0 + assert idx.get_loc("2000-01-01T12", method="nearest") == 1 + + assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) + == 1 + ) + assert ( + idx.get_loc( + "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") + ) + == 1 + ) + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 + ) + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") + with pytest.raises(KeyError, match="'2000-01-01T03'"): + idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc( + "2000-01-01", + method="nearest", + tolerance=[ + pd.Timedelta("1day").to_timedelta64(), + pd.Timedelta("1day").to_timedelta64(), + ], + ) + + assert idx.get_loc("2000", method="nearest") == slice(0, 3) + assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) + + assert idx.get_loc("1999", method="nearest") == 0 + assert idx.get_loc("2001", method="nearest") == 2 + + with pytest.raises(KeyError, match="'1999'"): + idx.get_loc("1999", method="pad") + with pytest.raises(KeyError, match="'2001'"): + idx.get_loc("2001", method="backfill") + + with pytest.raises(KeyError, match="'foobar'"): + idx.get_loc("foobar") + with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(["2000-01-01", "2000-01-04"]) + assert idx.get_loc("2000-01-02", method="nearest") == 0 + assert idx.get_loc("2000-01-03", method="nearest") == 1 + assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) + + # time indexing + idx = pd.date_range("2000-01-01", periods=24, freq="H") + tm.assert_numpy_array_equal( + idx.get_loc(time(12)), np.array([12]), check_dtype=False + ) + tm.assert_numpy_array_equal( + idx.get_loc(time(12, 30)), np.array([]), check_dtype=False + ) + with pytest.raises(NotImplementedError): + idx.get_loc(time(12, 30), method="pad") + + def test_get_loc_reasonable_key_error(self): + # GH#1062 + index = DatetimeIndex(["1/3/2000"]) + with pytest.raises(KeyError, match="2000"): + index.get_loc("1/1/2000") + + @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) + def test_get_loc_timedelta_invalid_key(self, key): + # GH#20464 + dti = pd.date_range("1970-01-01", periods=10) + with pytest.raises(TypeError): + dti.get_loc(key) + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(["1/3/2000", "NaT"]) + assert index.get_loc(pd.NaT) == 1 + + assert index.get_loc(None) == 1 + + assert index.get_loc(np.nan) == 1 + + assert index.get_loc(pd.NA) == 1 + + assert index.get_loc(np.datetime64("NaT")) == 1 + + with pytest.raises(KeyError, match="NaT"): + index.get_loc(np.timedelta64("NaT")) + + class TestDatetimeIndex: @pytest.mark.parametrize( "null", [None, np.nan, np.datetime64("NaT"), pd.NaT, pd.NA] @@ -639,85 +749,6 @@ def test_get_value(self): result = dti.get_value(ser, key.to_datetime64()) assert result == 7 - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - def test_get_loc_method_exact_match(self, method): - idx = pd.date_range("2000-01-01", periods=3) - - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - if method is not None: - assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 - - def test_get_loc(self): - idx = pd.date_range("2000-01-01", periods=3) - - assert idx.get_loc("2000-01-01", method="nearest") == 0 - assert idx.get_loc("2000-01-01T12", method="nearest") == 1 - - assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) - == 1 - ) - assert ( - idx.get_loc( - "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") - ) - == 1 - ) - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 - ) - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") - with pytest.raises(KeyError, match="'2000-01-01T03'"): - idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") - with pytest.raises( - ValueError, match="tolerance size must match target index size" - ): - idx.get_loc( - "2000-01-01", - method="nearest", - tolerance=[ - pd.Timedelta("1day").to_timedelta64(), - pd.Timedelta("1day").to_timedelta64(), - ], - ) - - assert idx.get_loc("2000", method="nearest") == slice(0, 3) - assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) - - assert idx.get_loc("1999", method="nearest") == 0 - assert idx.get_loc("2001", method="nearest") == 2 - - with pytest.raises(KeyError, match="'1999'"): - idx.get_loc("1999", method="pad") - with pytest.raises(KeyError, match="'2001'"): - idx.get_loc("2001", method="backfill") - - with pytest.raises(KeyError, match="'foobar'"): - idx.get_loc("foobar") - with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(["2000-01-01", "2000-01-04"]) - assert idx.get_loc("2000-01-02", method="nearest") == 0 - assert idx.get_loc("2000-01-03", method="nearest") == 1 - assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) - - # time indexing - idx = pd.date_range("2000-01-01", periods=24, freq="H") - tm.assert_numpy_array_equal( - idx.get_loc(time(12)), np.array([12]), check_dtype=False - ) - tm.assert_numpy_array_equal( - idx.get_loc(time(12, 30)), np.array([]), check_dtype=False - ) - with pytest.raises(NotImplementedError): - idx.get_loc(time(12, 30), method="pad") - def test_get_indexer(self): idx = pd.date_range("2000-01-01", periods=3) exp = np.array([0, 1, 2], dtype=np.intp) @@ -757,32 +788,3 @@ def test_get_indexer(self): idx.get_indexer(target, "nearest", tolerance=tol_bad) with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") - - def test_reasonable_key_error(self): - # GH#1062 - index = DatetimeIndex(["1/3/2000"]) - with pytest.raises(KeyError, match="2000"): - index.get_loc("1/1/2000") - - @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) - def test_timedelta_invalid_key(self, key): - # GH#20464 - dti = pd.date_range("1970-01-01", periods=10) - with pytest.raises(TypeError): - dti.get_loc(key) - - def test_get_loc_nat(self): - # GH#20464 - index = DatetimeIndex(["1/3/2000", "NaT"]) - assert index.get_loc(pd.NaT) == 1 - - assert index.get_loc(None) == 1 - - assert index.get_loc(np.nan) == 1 - - assert index.get_loc(pd.NA) == 1 - - assert index.get_loc(np.datetime64("NaT")) == 1 - - with pytest.raises(KeyError, match="NaT"): - index.get_loc(np.timedelta64("NaT")) From 13bf93f748365e0bddafb41c3f48c9f3fc871b4c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 18:55:01 -0800 Subject: [PATCH 8/8] revert --- .../tests/indexes/datetimes/test_indexing.py | 217 +++++++++--------- .../tests/indexes/timedeltas/test_indexing.py | 131 +++++------ 2 files changed, 169 insertions(+), 179 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index de024d205e4aa..2f954117f48d7 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -344,116 +344,6 @@ def test_take_fill_value_with_timezone(self): idx.take(np.array([1, -5])) -class TestGetLoc: - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - def test_get_loc_method_exact_match(self, method): - idx = pd.date_range("2000-01-01", periods=3) - - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - if method is not None: - assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 - - def test_get_loc(self): - idx = pd.date_range("2000-01-01", periods=3) - - assert idx.get_loc("2000-01-01", method="nearest") == 0 - assert idx.get_loc("2000-01-01T12", method="nearest") == 1 - - assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) - == 1 - ) - assert ( - idx.get_loc( - "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") - ) - == 1 - ) - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 - ) - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") - with pytest.raises(KeyError, match="'2000-01-01T03'"): - idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") - with pytest.raises( - ValueError, match="tolerance size must match target index size" - ): - idx.get_loc( - "2000-01-01", - method="nearest", - tolerance=[ - pd.Timedelta("1day").to_timedelta64(), - pd.Timedelta("1day").to_timedelta64(), - ], - ) - - assert idx.get_loc("2000", method="nearest") == slice(0, 3) - assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) - - assert idx.get_loc("1999", method="nearest") == 0 - assert idx.get_loc("2001", method="nearest") == 2 - - with pytest.raises(KeyError, match="'1999'"): - idx.get_loc("1999", method="pad") - with pytest.raises(KeyError, match="'2001'"): - idx.get_loc("2001", method="backfill") - - with pytest.raises(KeyError, match="'foobar'"): - idx.get_loc("foobar") - with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(["2000-01-01", "2000-01-04"]) - assert idx.get_loc("2000-01-02", method="nearest") == 0 - assert idx.get_loc("2000-01-03", method="nearest") == 1 - assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) - - # time indexing - idx = pd.date_range("2000-01-01", periods=24, freq="H") - tm.assert_numpy_array_equal( - idx.get_loc(time(12)), np.array([12]), check_dtype=False - ) - tm.assert_numpy_array_equal( - idx.get_loc(time(12, 30)), np.array([]), check_dtype=False - ) - with pytest.raises(NotImplementedError): - idx.get_loc(time(12, 30), method="pad") - - def test_get_loc_reasonable_key_error(self): - # GH#1062 - index = DatetimeIndex(["1/3/2000"]) - with pytest.raises(KeyError, match="2000"): - index.get_loc("1/1/2000") - - @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) - def test_get_loc_timedelta_invalid_key(self, key): - # GH#20464 - dti = pd.date_range("1970-01-01", periods=10) - with pytest.raises(TypeError): - dti.get_loc(key) - - def test_get_loc_nat(self): - # GH#20464 - index = DatetimeIndex(["1/3/2000", "NaT"]) - assert index.get_loc(pd.NaT) == 1 - - assert index.get_loc(None) == 1 - - assert index.get_loc(np.nan) == 1 - - assert index.get_loc(pd.NA) == 1 - - assert index.get_loc(np.datetime64("NaT")) == 1 - - with pytest.raises(KeyError, match="NaT"): - index.get_loc(np.timedelta64("NaT")) - - class TestDatetimeIndex: @pytest.mark.parametrize( "null", [None, np.nan, np.datetime64("NaT"), pd.NaT, pd.NA] @@ -749,6 +639,84 @@ def test_get_value(self): result = dti.get_value(ser, key.to_datetime64()) assert result == 7 + def test_get_loc(self): + idx = pd.date_range("2000-01-01", periods=3) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert ( + idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 + ) + + assert idx.get_loc("2000-01-01", method="nearest") == 0 + assert idx.get_loc("2000-01-01T12", method="nearest") == 1 + + assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) + == 1 + ) + assert ( + idx.get_loc( + "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") + ) + == 1 + ) + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 + ) + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") + with pytest.raises(KeyError, match="'2000-01-01T03'"): + idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc( + "2000-01-01", + method="nearest", + tolerance=[ + pd.Timedelta("1day").to_timedelta64(), + pd.Timedelta("1day").to_timedelta64(), + ], + ) + + assert idx.get_loc("2000", method="nearest") == slice(0, 3) + assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) + + assert idx.get_loc("1999", method="nearest") == 0 + assert idx.get_loc("2001", method="nearest") == 2 + + with pytest.raises(KeyError, match="'1999'"): + idx.get_loc("1999", method="pad") + with pytest.raises(KeyError, match="'2001'"): + idx.get_loc("2001", method="backfill") + + with pytest.raises(KeyError, match="'foobar'"): + idx.get_loc("foobar") + with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(["2000-01-01", "2000-01-04"]) + assert idx.get_loc("2000-01-02", method="nearest") == 0 + assert idx.get_loc("2000-01-03", method="nearest") == 1 + assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) + + # time indexing + idx = pd.date_range("2000-01-01", periods=24, freq="H") + tm.assert_numpy_array_equal( + idx.get_loc(time(12)), np.array([12]), check_dtype=False + ) + tm.assert_numpy_array_equal( + idx.get_loc(time(12, 30)), np.array([]), check_dtype=False + ) + with pytest.raises(NotImplementedError): + idx.get_loc(time(12, 30), method="pad") + def test_get_indexer(self): idx = pd.date_range("2000-01-01", periods=3) exp = np.array([0, 1, 2], dtype=np.intp) @@ -788,3 +756,32 @@ def test_get_indexer(self): idx.get_indexer(target, "nearest", tolerance=tol_bad) with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") + + def test_reasonable_key_error(self): + # GH#1062 + index = DatetimeIndex(["1/3/2000"]) + with pytest.raises(KeyError, match="2000"): + index.get_loc("1/1/2000") + + @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) + def test_timedelta_invalid_key(self, key): + # GH#20464 + dti = pd.date_range("1970-01-01", periods=10) + with pytest.raises(TypeError): + dti.get_loc(key) + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(["1/3/2000", "NaT"]) + assert index.get_loc(pd.NaT) == 1 + + assert index.get_loc(None) == 1 + + assert index.get_loc(np.nan) == 1 + + assert index.get_loc(pd.NA) == 1 + + assert index.get_loc(np.datetime64("NaT")) == 1 + + with pytest.raises(KeyError, match="NaT"): + index.get_loc(np.timedelta64("NaT")) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 6a4be9e538cb8..14fff6f9c85b5 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -180,75 +180,6 @@ def test_take_fill_value(self): idx.take(np.array([1, -5])) -class TestGetLoc: - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - def test_get_loc_method_exact_match(self, method): - idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) - - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - def test_get_loc(self): - idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) - - assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 - - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc(idx[1], method="nearest", tolerance="foo") - - with pytest.raises(ValueError, match="tolerance size must match"): - idx.get_loc( - idx[1], - method="nearest", - tolerance=[ - Timedelta(0).to_timedelta64(), - Timedelta(0).to_timedelta64(), - ], - ) - - for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: - assert idx.get_loc("1 day 1 hour", method) == loc - - # GH 16909 - assert idx.get_loc(idx[1].to_timedelta64()) == 1 - - # GH 16896 - assert idx.get_loc("0 days") == 0 - - def test_get_loc_nat(self): - tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) - - assert tidx.get_loc(pd.NaT) == 1 - assert tidx.get_loc(None) == 1 - assert tidx.get_loc(float("nan")) == 1 - assert tidx.get_loc(np.nan) == 1 - - -class TestGetIndexer: - def test_get_indexer(self): - idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) - tm.assert_numpy_array_equal( - idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) - ) - - target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) - ) - tm.assert_numpy_array_equal( - idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) - ) - tm.assert_numpy_array_equal( - idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) - ) - - res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) - tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) - - class TestTimedeltaIndex: def test_insert_empty(self): # Corner case inserting with length zero doesnt raise IndexError @@ -397,3 +328,65 @@ def test_delete_slice(self): tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq + + def test_get_loc(self): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc(idx[1], method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="tolerance size must match"): + idx.get_loc( + idx[1], + method="nearest", + tolerance=[ + Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64(), + ], + ) + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc("1 day 1 hour", method) == loc + + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + + # GH 16896 + assert idx.get_loc("0 days") == 0 + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) + + assert tidx.get_loc(pd.NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float("nan")) == 1 + assert tidx.get_loc(np.nan) == 1 + + def test_get_indexer(self): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))