diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e2253aed1c88..730011ffddc1d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2073,6 +2073,67 @@ def __setstate__(self, state): self._item_cache = {} + def slice(self, *args, closed="left", axis=0, **kwds): + """A minimalist, pythonic, function for slicing a Pandas-Object + + slice(stop) + slice(start, stop[, step] + slice(start, stop[, step], closed="left") + slice(start, stop[, step], axis=1) + + Parameters + ---------- + start : optional, default None + stop : optional, default None + step : integer, optional, default 1 + closed : str, default 'left' + 'left'/'right'/'both'/'neither' + axis : int, default 0 + + Returns + ------- + Series or DataFrame + """ + assert axis in (0, 1) + step = None + if len(args) < 1: + raise TypeError("TypeError: slice expected at least 1 arguments, got 0") + elif len(args) == 1: + start = None + stop = args[0] + elif len(args) < 4: + values = [None] * 3 + values[: len(args)] = args + start, stop, step = values + else: + msg = "TypeError: slice expected 3 arguments, got %d" + raise TypeError(msg % len(args)) + + obj = self + if axis == 1: + obj = obj.T + + if closed == "both": + obj = obj.loc[slice(start, stop, step)] + elif closed == "left": + obj = obj.loc_left[slice(start, stop, step)] + elif closed == "right": + raise NotImplementedError + elif closed == "neither": + raise NotImplementedError + else: + raise ValueError( + ( + "closed='%s' is Invalid. " + " Valid values for 'closed' are 'left', " + "'right', 'both', or 'neither'" + ) + % closed + ) + if axis == 1: + obj = obj.T + return obj + # ---------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index abe20ee0a91ce..f4f3fbcceec62 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3101,7 +3101,7 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance): @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["ix", "loc", "loc_left", "getitem", "iloc", None] if kind == "iloc": return self._validate_indexer("positional", key, kind) @@ -3131,7 +3131,7 @@ def _convert_scalar_indexer(self, key, kind=None): ]: return self._invalid_indexer("label", key) - elif kind in ["loc"] and is_integer(key): + elif kind in ["loc", "loc_left"] and is_integer(key): if not self.holds_integer(): return self._invalid_indexer("label", key) @@ -3153,7 +3153,7 @@ def _convert_scalar_indexer(self, key, kind=None): @Appender(_index_shared_docs["_convert_slice_indexer"]) def _convert_slice_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["ix", "loc", "loc_left", "getitem", "iloc", None] # if we are not a slice, then we are done if not isinstance(key, slice): @@ -5094,7 +5094,7 @@ def _validate_indexer(self, form, key, kind): @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["ix", "loc", "loc_left", "getitem", None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and @@ -5143,7 +5143,7 @@ def get_slice_bound(self, label, side, kind): int Index of label. """ - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["ix", "loc", "loc_left", "getitem", None] if side not in ("left", "right"): raise ValueError( @@ -5184,10 +5184,16 @@ def get_slice_bound(self, label, side, kind): if side == "left": return slc.start else: - return slc.stop + if kind == "loc_left": + return slc.start + else: + return slc.stop else: if side == "right": - return slc + 1 + if kind == "loc_left": + return slc + else: + return slc + 1 else: return slc diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 731ab9c416345..4b3da82b97cfd 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -484,7 +484,7 @@ def _convert_scalar_indexer(self, key, kind=None): kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["ix", "loc", "loc_left", "getitem", "iloc", None] # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5024eebe03bb4..12abd43656f34 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -6,7 +6,7 @@ from pandas._libs import Timestamp, index as libindex, lib, tslib as libts import pandas._libs.join as libjoin -from pandas._libs.tslibs import ccalendar, fields, parsing, timezones +from pandas._libs.tslibs import Timedelta, ccalendar, fields, parsing, timezones from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( @@ -1094,7 +1094,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["ix", "loc", "loc_left", "getitem", None] if is_float(label) or isinstance(label, time) or is_integer(label): self._invalid_indexer("slice", label) @@ -1111,7 +1111,12 @@ def _maybe_cast_slice_bound(self, label, side, kind): # and length 1 index) if self._is_strictly_monotonic_decreasing and len(self) > 1: return upper if side == "left" else lower - return lower if side == "left" else upper + if side == "left": + return lower + if kind == "loc_left": + return lower - Timedelta(1, "ns") + else: + return upper else: return label diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index daf26d53aa6e2..34a9052dd73be 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -74,7 +74,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=None): @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["ix", "loc", "loc_left", "getitem", None] # we will try to coerce to integers return self._maybe_cast_indexer(label) @@ -237,7 +237,7 @@ def asi8(self): @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["ix", "loc", "loc_left", "getitem", "iloc", None] # don't coerce ilocs to integers if kind != "iloc": @@ -292,7 +292,7 @@ def asi8(self): @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["ix", "loc", "loc_left", "getitem", "iloc", None] # don't coerce ilocs to integers if kind != "iloc": @@ -377,7 +377,7 @@ def astype(self, dtype, copy=True): @Appender(_index_shared_docs["_convert_scalar_indexer"]) def _convert_scalar_indexer(self, key, kind=None): - assert kind in ["ix", "loc", "getitem", "iloc", None] + assert kind in ["ix", "loc", "loc_left", "getitem", "iloc", None] if kind == "iloc": return self._validate_indexer("positional", key, kind) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 47cf0f26f9ca5..639a40a837c1f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -746,7 +746,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ - assert kind in ["ix", "loc", "getitem"] + assert kind in ["ix", "loc", "loc_left", "getitem"] if isinstance(label, datetime): return Period(label, freq=self.freq) @@ -754,7 +754,10 @@ def _maybe_cast_slice_bound(self, label, side, kind): try: _, parsed, reso = parse_time_string(label, self.freq) bounds = self._parsed_string_to_bounds(reso, parsed) - return bounds[0 if side == "left" else 1] + if kind == "loc_left": + return bounds[0] + else: + return bounds[0 if side == "left" else 1] except Exception: raise KeyError(label) elif is_integer(label) or is_float(label): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ecadd11894bfb..2b0d83c8bba3e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -584,7 +584,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): label : object """ - assert kind in ["ix", "loc", "getitem", None] + assert kind in ["ix", "loc", "loc_left", "getitem", None] if isinstance(label, str): parsed = Timedelta(label) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 01f338a021cec..4134b99a18b06 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -38,6 +38,7 @@ def get_indexers_list(): ("loc", _LocIndexer), ("at", _AtIndexer), ("iat", _iAtIndexer), + ("loc_left", _LocIndexer), ] @@ -1190,7 +1191,7 @@ def _validate_read_indexer( # We (temporarily) allow for some missing keys with .loc, except in # some cases (e.g. setting) in which "raise_missing" will be False - if not (self.name == "loc" and not raise_missing): + if not (self.name in ("loc", "loc_left") and not raise_missing): not_found = list(set(key) - set(ax)) raise KeyError("{} not in index".format(not_found)) @@ -1269,7 +1270,7 @@ def _convert_to_indexer( if is_setter: # always valid - if self.name == "loc": + if self.name in ("loc", "loc_left"): return {"key": obj} # a positional diff --git a/pandas/tests/indexing/test_loc_left.py b/pandas/tests/indexing/test_loc_left.py new file mode 100644 index 0000000000000..8c462be00c246 --- /dev/null +++ b/pandas/tests/indexing/test_loc_left.py @@ -0,0 +1,1199 @@ +""" test label based indexing with loc """ +from io import StringIO +from warnings import catch_warnings, filterwarnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timedelta, Timestamp, date_range +from pandas.api.types import is_scalar +from pandas.tests.indexing.common import Base +from pandas.util import testing as tm + + +class Test_loc_left_new(Base): + def test_loc_left_PeriodIndex(self): + pix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = pix.to_series() + result = ser.loc_left[:"2018"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_left_TimedeltaIndex(self): + tdix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") + ser = tdix.to_series() + result = ser.loc_left[:"1 days"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_left_non_dupe_label_slice_step(self): + ser = Series(range(5), index=[10, 20, 30, 40, 50]) + result = ser.loc_left[:40:2] + expected = ser.iloc[:3:2] + + tm.assert_series_equal(result, expected) + + def test_loc_left_dupe_label_slice_step(self): + ser = Series(range(6), index=[10, 10, 20, 20, 30, 30]) + result = ser.loc_left[:30:2] + expected = ser.iloc[:4:2] + + tm.assert_series_equal(result, expected) + + def test_loc_left_non_dupe_label_slice(self): + ser = Series(range(3), index=[10, 20, 30]) + result = ser.loc_left[:20] + expected = ser.iloc[:1] + + tm.assert_series_equal(result, expected) + + def test_loc_left_dupe_label_slice(self): + ser = Series(range(4), index=[10, 20, 20, 30]) + result = ser.loc_left[:20] + expected = ser.iloc[:1] + + tm.assert_series_equal(result, expected) + + def test_non_dupe_match_freq_string_slicing(self): + dates = ["2001-01-01", "2001-01-02", "2001-01-03"] + + ser = Series(range(len(dates)), pd.DatetimeIndex(dates)) + result = ser.loc_left[:"2001-01-03"] + expected = ser.loc[: Timestamp("2001-01-03") - Timedelta(1, "ns")] + + tm.assert_series_equal(result, expected) + + def test_loc_left_time_string_slice(self): + dates = [ + "2001-01-01 23:50", + "2001-01-02 00:00", + "2001-01-03 00:07", + "2001-01-03 00:08", + "2001-01-03 00:09", + ] + + ser = Series(range(len(dates)), pd.DatetimeIndex(dates)) + result = ser.loc_left[:"2001-01-03"] + expected = ser.loc[: pd.Timestamp("2001-01-03") - Timedelta(1, "ns")] + + tm.assert_series_equal(result, expected) + + +class Test_loc_left(Base): + def test_loc_left_getitem_dups(self): + # GH 5678 + # repeated gettitems on a dup index returning a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] + ) + expected = df.loc_left["A", 0] + result = df.loc_left[:, 0].loc_left["A"] + tm.assert_series_equal(result, expected) + + def test_loc_left_getitem_dups2(self): + + # GH4726 dup indexing with iloc/loc (originally) + df = DataFrame( + [[1, 2, "foo", "bar", Timestamp("20130101")]], + columns=["a", "a", "a", "a", "a"], + index=[1], + ) + expected = Series( + [1, 2, "foo", "bar", Timestamp("20130101")], + index=["a", "a", "a", "a", "a"], + name=1, + ) + + result = df.loc_left[1] + tm.assert_series_equal(result, expected) + + def test_loc_left_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + { + "me": list("rttti"), + "foo": list("aaade"), + "bar": np.arange(5, dtype="float64") * 1.34 + 2, + "bar2": np.arange(5, dtype="float64") * -0.34 + 2, + } + ).set_index("me") + + indexer = tuple(["r", ["bar", "bar2"]]) + df = df_orig.copy() + df.loc_left[indexer] *= 2.0 + tm.assert_series_equal(df.loc_left[indexer], 2.0 * df_orig.loc_left[indexer]) + + indexer = tuple(["r", "bar"]) + df = df_orig.copy() + df.loc_left[indexer] *= 2.0 + assert df.loc_left[indexer] == 2.0 * df_orig.loc_left[indexer] + + indexer = tuple(["t", ["bar", "bar2"]]) + df = df_orig.copy() + df.loc_left[indexer] *= 2.0 + tm.assert_frame_equal(df.loc_left[indexer], 2.0 * df_orig.loc_left[indexer]) + + def test_loc_left_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")}) + ix = df1["a"] == 1 + newb1 = df1.loc_left[ix, "b"] + 1 + df1.loc_left[ix, "b"] = newb1 + expected = DataFrame( + {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")} + ) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + ix = df1["a"] == 1 + newb2 = df2.loc_left[ix, "b"] + df1.loc_left[ix, "b"] = newb2 + expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + tm.assert_frame_equal(df2, expected) + + def test_loc_left_getitem_int(self): + + # int label + self.check_result( + "int label", "loc_left", 2, "ix", 1, typs=["ints", "uints"], axes=0 + ) + self.check_result( + "int label", "loc_left", 3, "ix", 2, typs=["ints", "uints"], axes=1 + ) + self.check_result( + "int label", "loc_left", 2, "ix", 1, typs=["label"], fails=KeyError + ) + + def test_loc_left_getitem_label(self): + # label + self.check_result("label", "loc_left", "c", "ix", "c", typs=["labels"], axes=0) + self.check_result( + "label", "loc_left", "null", "ix", "null", typs=["mixed"], axes=0 + ) + self.check_result("label", "loc_left", 8, "ix", 8, typs=["mixed"], axes=0) + self.check_result( + "label", "loc_left", Timestamp("20130102"), "ix", 1, typs=["ts"], axes=0 + ) + self.check_result( + "label", "loc_left", "c", "ix", "c", typs=["empty"], fails=KeyError + ) + + def test_loc_left_getitem_label_out_of_range(self): + + # out of range label + self.check_result( + "label range", + "loc_left", + "f", + "ix", + "f", + typs=["ints", "uints", "labels", "mixed", "ts"], + fails=KeyError, + ) + self.check_result( + "label range", "loc_left", "f", "ix", "f", typs=["floats"], fails=KeyError + ) + self.check_result( + "label range", + "loc_left", + 20, + "ix", + 20, + typs=["ints", "uints", "mixed"], + fails=KeyError, + ) + self.check_result( + "label range", "loc_left", 20, "ix", 20, typs=["labels"], fails=TypeError + ) + self.check_result( + "label range", + "loc_left", + 20, + "ix", + 20, + typs=["ts"], + axes=0, + fails=TypeError, + ) + self.check_result( + "label range", + "loc_left", + 20, + "ix", + 20, + typs=["floats"], + axes=0, + fails=KeyError, + ) + + def test_loc_left_getitem_label_list(self): + + # list of labels + self.check_result( + "list lbl", + "loc_left", + [0, 2, 4], + "ix", + [0, 2, 4], + typs=["ints", "uints"], + axes=0, + ) + self.check_result( + "list lbl", + "loc_left", + [3, 6, 9], + "ix", + [3, 6, 9], + typs=["ints", "uints"], + axes=1, + ) + self.check_result( + "list lbl", + "loc_left", + ["a", "b", "d"], + "ix", + ["a", "b", "d"], + typs=["labels"], + axes=0, + ) + self.check_result( + "list lbl", + "loc_left", + ["A", "B", "C"], + "ix", + ["A", "B", "C"], + typs=["labels"], + axes=1, + ) + self.check_result( + "list lbl", + "loc_left", + [2, 8, "null"], + "ix", + [2, 8, "null"], + typs=["mixed"], + axes=0, + ) + self.check_result( + "list lbl", + "loc_left", + [Timestamp("20130102"), Timestamp("20130103")], + "ix", + [Timestamp("20130102"), Timestamp("20130103")], + typs=["ts"], + axes=0, + ) + + def test_loc_left_getitem_label_list_with_missing(self): + self.check_result( + "list lbl", + "loc_left", + [0, 1, 2], + "indexer", + [0, 1, 2], + typs=["empty"], + fails=KeyError, + ) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result( + "list lbl", + "loc_left", + [0, 2, 10], + "ix", + [0, 2, 10], + typs=["ints", "uints", "floats"], + axes=0, + fails=KeyError, + ) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result( + "list lbl", + "loc_left", + [3, 6, 7], + "ix", + [3, 6, 7], + typs=["ints", "uints", "floats"], + axes=1, + fails=KeyError, + ) + + # GH 17758 - MultiIndex and missing keys + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result( + "list lbl", + "loc_left", + [(1, 3), (1, 4), (2, 5)], + "ix", + [(1, 3), (1, 4), (2, 5)], + typs=["multi"], + axes=0, + ) + + # def test_getitem_label_list_with_missing(self): + # REMOVED, does not reference loc_left + + def test_loc_left_getitem_label_list_fails(self): + # fails + self.check_result( + "list lbl", + "loc_left", + [20, 30, 40], + "ix", + [20, 30, 40], + typs=["ints", "uints"], + axes=1, + fails=KeyError, + ) + + def test_loc_left_getitem_label_array_like(self): + # array like + self.check_result( + "array like", + "loc_left", + Series(index=[0, 2, 4]).index, + "ix", + [0, 2, 4], + typs=["ints", "uints"], + axes=0, + ) + self.check_result( + "array like", + "loc_left", + Series(index=[3, 6, 9]).index, + "ix", + [3, 6, 9], + typs=["ints", "uints"], + axes=1, + ) + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + self.check_result( + "bool", + "loc_left", + b, + "ix", + b, + typs=["ints", "uints", "labels", "mixed", "ts", "floats"], + ) + self.check_result("bool", "loc", b, "ix", b, typs=["empty"], fails=IndexError) + + def test_loc_left_getitem_int_slice(self): + + # ok + self.check_result( + "int slice2", + "loc_left", + slice(2, 4), + "ix", + [2, 3], + typs=["ints", "uints"], + axes=0, + ) + self.check_result( + "int slice2", + "loc_left", + slice(3, 6), + "ix", + [3, 5], + typs=["ints", "uints"], + axes=1, + ) + + def test_loc_left_to_fail(self): + + # GH3449 + df = DataFrame( + np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] + ) + + # raise a KeyError? + msg = ( + r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc_left[[1, 2], [1, 2]] + + # GH 7496 (originally) loc should not fallback + + s = Series() + s.loc_left[1] = 1 + s.loc_left["a"] = 2 + + with pytest.raises(KeyError, match=r"^-1$"): + s.loc_left[-1] + + msg = ( + r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc_left[[-1, -2]] + + msg = ( + r"\"None of \[Index\(\['4'\], dtype='object'\)\] are" r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc_left[["4"]] + + s.loc_left[-1] = 3 + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc_left[[-1, -2]] + expected = Series([3, np.nan], index=[-1, -2]) + tm.assert_series_equal(result, expected) + + s["a"] = 2 + msg = ( + r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc_left[[-2]] + + del s["a"] + + with pytest.raises(KeyError, match=msg): + s.loc_left[[-2]] = 0 + + # inconsistency between .loc_left[values] and .loc_left[values,:] + # GH 7999 + df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) + + msg = ( + r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc_left[[3], :] + + with pytest.raises(KeyError, match=msg): + df.loc_left[[3]] + + def test_loc_left_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc_left[[2]] + + with pytest.raises(KeyError): + s.loc_left[[3]] + + # a non-match and a match + with tm.assert_produces_warning(FutureWarning): + expected = s.loc_left[[2, 3]] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + + def test_loc_left_getitem_label_slice(self): + + # label slices (with ints) + self.check_result( + "lab slice", + "loc_left", + slice(1, 3), + "ix", + slice(1, 2), + typs=["labels", "mixed", "empty", "ts", "floats"], + fails=TypeError, + ) + + # real label slices + self.check_result( + "lab slice", + "loc_left", + slice("a", "c"), + "ix", + slice("a", "b"), + typs=["labels"], + axes=0, + ) + self.check_result( + "lab slice", + "loc_left", + slice("A", "C"), + "ix", + slice("A", "B"), + typs=["labels"], + axes=1, + ) + + self.check_result( + "ts slice", + "loc_left", + slice("20130102", "20130104"), + "ix", + slice("20130102", "20130103"), + typs=["ts"], + axes=0, + ) + self.check_result( + "ts slice", + "loc_left", + slice("20130102", "20130104"), + "ix", + slice("20130102", "20130103"), + typs=["ts"], + axes=1, + fails=TypeError, + ) + + # GH 14316 + self.check_result( + "ts slice rev", + "loc_left", + slice("20130104", "20130102"), + "indexer", + [0, 1], + typs=["ts_rev"], + axes=0, + ) + + self.check_result( + "mixed slice", + "loc_left", + slice(2, 8), + "ix", + slice(2, 7), + typs=["mixed"], + axes=0, + fails=TypeError, + ) + self.check_result( + "mixed slice", + "loc_left", + slice(2, 8), + "ix", + slice(2, 7), + typs=["mixed"], + axes=1, + fails=KeyError, + ) + + self.check_result( + "mixed slice", + "loc_left", + slice(2, 4, 2), + "ix", + slice(2, 4, 2), + typs=["mixed"], + axes=0, + fails=TypeError, + ) + + def test_loc_left_index(self): + # gh-17131 + # a boolean index should index like a boolean numpy array + + df = DataFrame( + np.random.random(size=(5, 10)), + index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], + ) + + mask = df.index.map(lambda x: "alpha" in x) + expected = df.loc_left[np.array(mask)] + + result = df.loc_left[mask] + tm.assert_frame_equal(result, expected) + + result = df.loc_left[mask.values] + tm.assert_frame_equal(result, expected) + + def test_loc_left_general(self): + + df = DataFrame( + np.random.rand(4, 4), + columns=["A", "B", "C", "D"], + index=["A", "B", "C", "D"], + ) + + # want this to work + result = df.loc_left[:, "A":"C"].iloc[0:2, :] + assert (result.columns == ["A", "B"]).all() + assert (result.index == ["A", "B"]).all() + + # mixed type + result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0] + expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0) + tm.assert_series_equal(result, expected) + assert result.dtype == object + + def test_loc_left_setitem_consistency(self): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame( + { + "date": Series(0, index=range(5), dtype=np.int64), + "val": Series(range(5), dtype=np.int64), + } + ) + + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc_left[:, "date"] = 0 + tm.assert_frame_equal(df, expected) + + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc_left[:, "date"] = np.array(0, dtype=np.int64) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc_left[:, "date"] = np.array([0, 0, 0, 0, 0], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "date": Series("foo", index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc_left[:, "date"] = "foo" + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "date": Series(1.0, index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc_left[:, "date"] = 1.0 + tm.assert_frame_equal(df, expected) + + # GH 15494 + # setting on frame with single row + df = DataFrame({"date": Series([Timestamp("20180101")])}) + df.loc_left[:, "date"] = "string" + expected = DataFrame({"date": Series(["string"])}) + tm.assert_frame_equal(df, expected) + + def test_loc_left_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=["x", "y"]) + expected["x"] = expected["x"].astype(np.int64) + df = DataFrame(columns=["x", "y"]) + df.loc_left[:, "x"] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df["x"] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_left_setitem_consistency_slice_column_len(self): + # .loc_left[:,column] setting with slice == len of the column + # GH10408 + data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat +Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse +Region,Site,RespondentID,,,,, +Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, +Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes +Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, +Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" + + df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + df.loc_left[:, ("Respondent", "StartDate")] = pd.to_datetime( + df.loc_left[:, ("Respondent", "StartDate")] + ) + df.loc_left[:, ("Respondent", "EndDate")] = pd.to_datetime( + df.loc_left[:, ("Respondent", "EndDate")] + ) + df.loc_left[:, ("Respondent", "Duration")] = ( + df.loc_left[:, ("Respondent", "EndDate")] + - df.loc_left[:, ("Respondent", "StartDate")] + ) + + df.loc_left[:, ("Respondent", "Duration")] = df.loc_left[ + :, ("Respondent", "Duration") + ].astype("timedelta64[s]") + expected = Series( + [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") + ) + tm.assert_series_equal(df[("Respondent", "Duration")], expected) + + def test_loc_left_setitem_frame(self): + df = self.frame_labels + + result = df.iloc[0, 0] + + df.loc_left["a", "A"] = 1 + result = df.loc_left["a", "A"] + assert result == 1 + + result = df.iloc[0, 0] + assert result == 1 + + df.loc_left[:, "B":"D"] = 0 + expected = df.loc_left[:, "B":"D"] + result = df.iloc[:, 1:-1] + tm.assert_frame_equal(result, expected) + + # GH 6254 + # setting issue + df = DataFrame(index=[3, 5, 4], columns=["A"]) + df.loc_left[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + expected = DataFrame(dict(A=Series([1, 2, 3], index=[4, 3, 5]))).reindex( + index=[3, 5, 4] + ) + tm.assert_frame_equal(df, expected) + + # GH 6252 + # setting with an empty frame + keys1 = ["@" + str(i) for i in range(5)] + val1 = np.arange(5, dtype="int64") + + keys2 = ["@" + str(i) for i in range(4)] + val2 = np.arange(4, dtype="int64") + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df["A"] = np.nan + df.loc_left[keys1, "A"] = val1 + + df["B"] = np.nan + df.loc_left[keys2, "B"] = val2 + + expected = DataFrame( + dict(A=Series(val1, index=keys1), B=Series(val2, index=keys2)) + ).reindex(index=index) + tm.assert_frame_equal(df, expected) + + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({"A": [1, 2, 3], "B": np.nan}) + df.loc_left[df.B > df.A, "B"] = df.A + expected = DataFrame({"A": [1, 2, 3], "B": np.nan}) + tm.assert_frame_equal(df, expected) + + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) + + result = df.loc_left[0, [1, 2]] + expected = Series([1, 3], index=[1, 2], dtype=object, name=0) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) + df.loc_left[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_left_setitem_frame_multiples(self): + # multiple setting + df = DataFrame( + {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} + ) + rhs = df.loc_left[1:3] + rhs.index = df.index[0:2] + df.loc_left[0:2] = rhs + expected = DataFrame( + {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)} + ) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + expected = DataFrame( + { + "date": [ + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000103"), + ], + "val": Series([0, 1, 0, 1, 2], dtype=np.int64), + } + ) + rhs = df.loc_left[0:3] + rhs.index = df.index[2:5] + df.loc_left[2:5] = rhs + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "indexer", [["A"], slice(None, "B", None), np.array(["A"])] + ) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_loc_left_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.loc_left[0, ['B']] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.loc_left[0, indexer] = value + result = df.loc_left[0, "A"] + + assert is_scalar(result) and result == "Z" + + # def test_loc_left_coerceion(self): + # REMOVED, does not reference loc_left + + def test_loc_left_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise because the we are non monotonic + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ) + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc_left[1:] + msg = "'Cannot get left slice bound for non-unique label: 0'" + with pytest.raises(KeyError, match=msg): + df.loc_left[0:] + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc_left[1:2] + + # monotonic are ok + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ).sort_index(axis=0) + result = df.loc_left[1:] + expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc_left[0:] + tm.assert_frame_equal(result, df) + + result = df.loc_left[1:3] + expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_loc_left_non_unique_memory_error(self): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list("ABCDEFG") + + def gen_test(l, l2): + return pd.concat( + [ + DataFrame( + np.random.randn(l, len(columns)), + index=np.arange(l), + columns=columns, + ), + DataFrame( + np.ones((l2, len(columns))), index=[0] * l2, columns=columns + ), + ] + ) + + def gen_expected(df, mask): + len_mask = len(mask) + return pd.concat( + [ + df.take([0]), + DataFrame( + np.ones((len_mask, len(columns))), + index=[0] * len_mask, + columns=columns, + ), + df.take(mask[1:]), + ] + ) + + df = gen_test(900, 100) + assert df.index.is_unique is False + + mask = np.arange(100) + result = df.loc_left[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + df = gen_test(900000, 100000) + assert df.index.is_unique is False + + mask = np.arange(100000) + result = df.loc_left[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + def test_loc_left_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = "index_name" + result = df.iloc[[0, 1]].index.name + assert result == "index_name" + + with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", FutureWarning) + result = df.ix[[0, 1]].index.name + assert result == "index_name" + + result = df.loc_left[[0, 1]].index.name + assert result == "index_name" + + def test_loc_left_empty_list_indexer_is_ok(self): + from pandas.util.testing import makeCustomDataframe as mkdf + + df = mkdf(5, 2) + # vertical empty + tm.assert_frame_equal( + df.loc_left[:, []], + df.iloc[:, :0], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.loc_left[[], :], + df.iloc[:0, :], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.loc_left[[]], + df.iloc[:0, :], + check_index_type=True, + check_column_type=True, + ) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.loc_left[:] + assert sliced_df is not original_df + assert original_df[:] is not original_df + + # should be a shallow copy + original_df["a"] = [4, 4, 4] + assert (sliced_df["a"] == 4).all() + + # These should not return copies + assert original_df is original_df.loc_left[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc_left[:, 0] + + # Same tests for Series + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc_left[:] + assert sliced_series is not original_series + assert original_series[:] is not original_series + + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) + + def test_loc_left_uint64(self): + # GH20722 + # Test whether loc accept uint64 max value as index. + s = pd.Series( + [1, 2], index=[np.iinfo("uint64").max - 1, np.iinfo("uint64").max] + ) + + result = s.loc_left[np.iinfo("uint64").max - 1] + expected = s.iloc[0] + assert result == expected + + result = s.loc_left[[np.iinfo("uint64").max - 1]] + expected = s.iloc[[0]] + tm.assert_series_equal(result, expected) + + result = s.loc_left[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] + tm.assert_series_equal(result, s) + + def test_loc_left_setitem_empty_append(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + # appends to fit length of data + df = DataFrame(columns=["x", "y"]) + df.loc_left[:, "x"] = data + tm.assert_frame_equal(df, expected) + + # only appends one value + expected = DataFrame({"x": [1.0], "y": [np.nan]}) + df = DataFrame(columns=["x", "y"], dtype=np.float) + df.loc_left[0, "x"] = expected.loc_left[0, "x"] + tm.assert_frame_equal(df, expected) + + def test_loc_left_setitem_empty_append_raises(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2] + df = DataFrame(columns=["x", "y"]) + msg = ( + r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " + r"are in the \[index\]" + ) + with pytest.raises(KeyError, match=msg): + df.loc_left[[0, 1], "x"] = data + + msg = "cannot copy sequence with size 2 to array axis with dimension 0" + with pytest.raises(ValueError, match=msg): + df.loc_left[0:2, "x"] = data + + def test_indexing_zerodim_np_array(self): + # GH24924 + df = DataFrame([[1, 2], [3, 4]]) + result = df.loc_left[np.array(0)] + s = pd.Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24924 + s = Series([1, 2]) + result = s.loc_left[np.array(0)] + assert result == 1 + + +class Test_slice(Base): + def test_slice_both(self): + dti = pd.date_range(start="2017-01-01", end="2018-02", freq="M") + ser = pd.Series(range(len(dti)), index=dti) + + start = "2017-02" + + result = ser.slice("2018", closed="both") + expected = ser.loc[:"2018"] + tm.assert_series_equal(result, expected) + + result = ser.slice(start, "2018", closed="both") + expected = ser.loc[start:"2018"] + tm.assert_series_equal(result, expected) + + df = ser.to_frame() + result = df.slice("2018", closed="both") + expected = df.loc[:"2018"] + tm.assert_frame_equal(result, expected) + result = df.slice(start, "2018", closed="both") + expected = df.loc[start:"2018"] + tm.assert_frame_equal(result, expected) + + # duplicates + rdti = np.repeat(dti, 2) + ser = rdti.to_series() + result = ser.slice("2018", closed="both") + expected = ser.loc[:"2018"] + tm.assert_series_equal(result, expected) + + result = ser.slice(start, "2018", closed="both") + expected = ser.loc[start:"2018"] + tm.assert_series_equal(result, expected) + + df = pd.DataFrame( + dict(a=range(len(rdti)), b=range(100, 100 + len(rdti))), index=rdti + ) + result = df.slice("2018", closed="both") + expected = df.loc[:"2018"] + tm.assert_frame_equal(result, expected) + result = df.slice(start, "2018", closed="both") + expected = df.loc[start:"2018"] + tm.assert_frame_equal(result, expected) + + # axis + result = df.slice("2018", closed="both", axis=1) + expected = df.T.loc[:"2018"].T + tm.assert_frame_equal(result, expected) + + result = df.slice(start, "2018", closed="both", axis=1) + expected = df.T.loc[start:"2018"].T + tm.assert_frame_equal(result, expected) + + def test_slice_left(self): + dti = pd.date_range(start="2017-01-01", end="2018-02", freq="M") + ser = pd.Series(range(len(dti)), index=dti) + result1 = ser.slice("2018") + result2 = ser.slice("2018", closed="left") + + expected = ser.loc_left[:"2018"] + tm.assert_series_equal(result1, expected) + tm.assert_series_equal(result2, expected) + + start = "2017-02" + + result = ser.slice("2018", closed="left") + expected = ser.loc_left[:"2018"] + tm.assert_series_equal(result, expected) + + result = ser.slice(start, "2018", closed="left") + expected = ser.loc_left[start:"2018"] + tm.assert_series_equal(result, expected) + + df = ser.to_frame() + result = df.slice("2018", closed="left") + expected = df.loc_left[:"2018"] + tm.assert_frame_equal(result, expected) + result = df.slice(start, "2018", closed="left") + expected = df.loc_left[start:"2018"] + tm.assert_frame_equal(result, expected) + + # duplicates + rdti = np.repeat(dti, 2) + ser = pd.Series(range(len(rdti)), index=rdti) + result = ser.slice("2018", closed="left") + expected = ser.loc_left[:"2018"] + tm.assert_series_equal(result, expected) + + result = ser.slice(start, "2018", closed="left") + expected = ser.loc_left[start:"2018"] + tm.assert_series_equal(result, expected) + + df = pd.DataFrame( + dict(a=range(len(rdti)), b=range(100, 100 + len(rdti))), index=rdti + ) + result = df.slice("2018", closed="left") + expected = df.loc_left[:"2018"] + tm.assert_frame_equal(result, expected) + result = df.slice(start, "2018", closed="left") + expected = df.loc_left[start:"2018"] + tm.assert_frame_equal(result, expected) + + # axis + df = pd.DataFrame( + dict(a=range(len(rdti)), b=range(100, 100 + len(rdti))), index=rdti + ) + df = df.T + result = df.slice("2018", closed="left", axis=1) + expected = df.T.loc_left[:"2018"].T + tm.assert_frame_equal(result, expected) + + result = df.slice(start, "2018", closed="left", axis=1) + expected = df.T.loc_left[start:"2018"].T + tm.assert_frame_equal(result, expected)