diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 67523facb7b7d..de03ed4ebcfd1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2555,7 +2555,7 @@ def _ixs(self, i: int, axis: int = 0): label = self.columns[i] values = self._mgr.iget(i) - result = self._box_col_values(values, label) + result = self._box_col_values(values, i) # this is a cached value, mark it so result._set_as_cached(label, self) @@ -2645,7 +2645,7 @@ def _getitem_bool_array(self, key): def _getitem_multilevel(self, key): # self.columns is a MultiIndex loc = self.columns.get_loc(key) - if isinstance(loc, (slice, Series, np.ndarray, Index)): + if isinstance(loc, (slice, np.ndarray)): new_columns = self.columns[loc] result_columns = maybe_droplevels(new_columns, key) if self._is_mixed_type: @@ -2678,7 +2678,8 @@ def _getitem_multilevel(self, key): result._set_is_copy(self) return result else: - return self._get_item_cache(key) + # loc is neither a slice nor ndarray, so must be an int + return self._ixs(loc, axis=1) def _get_value(self, index, col, takeable: bool = False): """ @@ -2869,19 +2870,15 @@ def _ensure_valid_index(self, value): value.index.copy(), axis=1, fill_value=np.nan ) - def _box_item_values(self, key, values): - items = self.columns[self.columns.get_loc(key)] - if values.ndim == 2: - return self._constructor(values.T, columns=items, index=self.index) - else: - return self._box_col_values(values, items) - - def _box_col_values(self, values, items): + def _box_col_values(self, values, loc: int) -> Series: """ Provide boxed values for a column. """ + # Lookup in columns so that if e.g. a str datetime was passed + # we attach the Timestamp object as the name. + name = self.columns[loc] klass = self._constructor_sliced - return klass(values, index=self.index, name=items, fastpath=True) + return klass(values, index=self.index, name=name, fastpath=True) # ---------------------------------------------------------------------- # Unsorted diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fac4ca6768ece..e1ec80858e7b2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3547,8 +3547,13 @@ def _get_item_cache(self, item): cache = self._item_cache res = cache.get(item) if res is None: - values = self._mgr.get(item) - res = self._box_item_values(item, values) + # All places that call _get_item_cache have unique columns, + # pending resolution of GH#33047 + + loc = self.columns.get_loc(item) + values = self._mgr.iget(loc) + res = self._box_col_values(values, loc) + cache[item] = res res._set_as_cached(item, self) @@ -3556,9 +3561,6 @@ def _get_item_cache(self, item): res._is_copy = self._is_copy return res - def _box_item_values(self, key, values): - raise AbstractMethodError(self) - def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: """ Construct a slice of this container. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c6efd6a2ac6a7..7a080ea155ac2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -963,35 +963,6 @@ def _consolidate_inplace(self) -> None: self._known_consolidated = True self._rebuild_blknos_and_blklocs() - def get(self, item): - """ - Return values for selected item (ndarray or BlockManager). - """ - if self.items.is_unique: - - if not isna(item): - loc = self.items.get_loc(item) - else: - indexer = np.arange(len(self.items))[isna(self.items)] - - # allow a single nan location indexer - if not is_scalar(indexer): - if len(indexer) == 1: - loc = indexer.item() - else: - raise ValueError("cannot label index with a null key") - - return self.iget(loc) - else: - - if isna(item): - raise TypeError("cannot label index with a null key") - - indexer = self.items.get_indexer_for([item]) - return self.reindex_indexer( - new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True - ) - def iget(self, i: int) -> "SingleBlockManager": """ Return the data as a SingleBlockManager. diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 58a84f5a267bc..f1d4c865a0ced 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -329,45 +329,48 @@ def test_categorical_block_pickle(self): smgr2 = tm.round_trip_pickle(smgr) tm.assert_series_equal(Series(smgr), Series(smgr2)) - def test_get(self): + def test_iget(self): cols = Index(list("abc")) values = np.random.rand(3, 3) block = make_block(values=values.copy(), placement=np.arange(3)) mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)]) - tm.assert_almost_equal(mgr.get("a").internal_values(), values[0]) - tm.assert_almost_equal(mgr.get("b").internal_values(), values[1]) - tm.assert_almost_equal(mgr.get("c").internal_values(), values[2]) + tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0]) + tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1]) + tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2]) def test_set(self): mgr = create_mgr("a,b,c: int", item_shape=(3,)) mgr.set("d", np.array(["foo"] * 3)) mgr.set("b", np.array(["bar"] * 3)) - tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3)) + tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3)) tm.assert_numpy_array_equal( - mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_) + mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_) ) - tm.assert_numpy_array_equal(mgr.get("c").internal_values(), np.array([2] * 3)) + tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3)) tm.assert_numpy_array_equal( - mgr.get("d").internal_values(), np.array(["foo"] * 3, dtype=np.object_) + mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_) ) def test_set_change_dtype(self, mgr): mgr.set("baz", np.zeros(N, dtype=bool)) mgr.set("baz", np.repeat("foo", N)) - assert mgr.get("baz").dtype == np.object_ + idx = mgr.items.get_loc("baz") + assert mgr.iget(idx).dtype == np.object_ mgr2 = mgr.consolidate() mgr2.set("baz", np.repeat("foo", N)) - assert mgr2.get("baz").dtype == np.object_ + idx = mgr2.items.get_loc("baz") + assert mgr2.iget(idx).dtype == np.object_ mgr2.set("quux", tm.randn(N).astype(int)) - assert mgr2.get("quux").dtype == np.int_ + idx = mgr2.items.get_loc("quux") + assert mgr2.iget(idx).dtype == np.int_ mgr2.set("quux", tm.randn(N)) - assert mgr2.get("quux").dtype == np.float_ + assert mgr2.iget(idx).dtype == np.float_ def test_copy(self, mgr): cp = mgr.copy(deep=False) @@ -430,8 +433,8 @@ def test_as_array_datetime(self): def test_as_array_datetime_tz(self): mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]") - assert mgr.get("h").dtype == "datetime64[ns, US/Eastern]" - assert mgr.get("g").dtype == "datetime64[ns, CET]" + assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]" + assert mgr.iget(1).dtype == "datetime64[ns, CET]" assert mgr.as_array().dtype == "object" @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"]) @@ -441,26 +444,26 @@ def test_astype(self, t): t = np.dtype(t) tmgr = mgr.astype(t) - assert tmgr.get("c").dtype.type == t - assert tmgr.get("d").dtype.type == t - assert tmgr.get("e").dtype.type == t + assert tmgr.iget(0).dtype.type == t + assert tmgr.iget(1).dtype.type == t + assert tmgr.iget(2).dtype.type == t # mixed mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8") t = np.dtype(t) tmgr = mgr.astype(t, errors="ignore") - assert tmgr.get("c").dtype.type == t - assert tmgr.get("e").dtype.type == t - assert tmgr.get("f").dtype.type == t - assert tmgr.get("g").dtype.type == t + assert tmgr.iget(2).dtype.type == t + assert tmgr.iget(4).dtype.type == t + assert tmgr.iget(5).dtype.type == t + assert tmgr.iget(6).dtype.type == t - assert tmgr.get("a").dtype.type == np.object_ - assert tmgr.get("b").dtype.type == np.object_ + assert tmgr.iget(0).dtype.type == np.object_ + assert tmgr.iget(1).dtype.type == np.object_ if t != np.int64: - assert tmgr.get("d").dtype.type == np.datetime64 + assert tmgr.iget(3).dtype.type == np.datetime64 else: - assert tmgr.get("d").dtype.type == t + assert tmgr.iget(3).dtype.type == t def test_convert(self): def _compare(old_mgr, new_mgr): @@ -497,11 +500,11 @@ def _compare(old_mgr, new_mgr): mgr.set("b", np.array(["2."] * N, dtype=np.object_)) mgr.set("foo", np.array(["foo."] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - assert new_mgr.get("a").dtype == np.int64 - assert new_mgr.get("b").dtype == np.float64 - assert new_mgr.get("foo").dtype == np.object_ - assert new_mgr.get("f").dtype == np.int64 - assert new_mgr.get("g").dtype == np.float64 + assert new_mgr.iget(0).dtype == np.int64 + assert new_mgr.iget(1).dtype == np.float64 + assert new_mgr.iget(2).dtype == np.object_ + assert new_mgr.iget(3).dtype == np.int64 + assert new_mgr.iget(4).dtype == np.float64 mgr = create_mgr( "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2" @@ -510,15 +513,15 @@ def _compare(old_mgr, new_mgr): mgr.set("b", np.array(["2."] * N, dtype=np.object_)) mgr.set("foo", np.array(["foo."] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) - assert new_mgr.get("a").dtype == np.int64 - assert new_mgr.get("b").dtype == np.float64 - assert new_mgr.get("foo").dtype == np.object_ - assert new_mgr.get("f").dtype == np.int32 - assert new_mgr.get("bool").dtype == np.bool_ - assert new_mgr.get("dt").dtype.type, np.datetime64 - assert new_mgr.get("i").dtype == np.int64 - assert new_mgr.get("g").dtype == np.float64 - assert new_mgr.get("h").dtype == np.float16 + assert new_mgr.iget(0).dtype == np.int64 + assert new_mgr.iget(1).dtype == np.float64 + assert new_mgr.iget(2).dtype == np.object_ + assert new_mgr.iget(3).dtype == np.int32 + assert new_mgr.iget(4).dtype == np.bool_ + assert new_mgr.iget(5).dtype.type, np.datetime64 + assert new_mgr.iget(6).dtype == np.int64 + assert new_mgr.iget(7).dtype == np.float64 + assert new_mgr.iget(8).dtype == np.float16 def test_invalid_ea_block(self): with pytest.raises(AssertionError, match="block.size != values.size"): @@ -620,16 +623,16 @@ def test_reindex_items(self): assert reindexed.nblocks == 2 tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"])) tm.assert_almost_equal( - mgr.get("g").internal_values(), reindexed.get("g").internal_values() + mgr.iget(6).internal_values(), reindexed.iget(0).internal_values() ) tm.assert_almost_equal( - mgr.get("c").internal_values(), reindexed.get("c").internal_values() + mgr.iget(2).internal_values(), reindexed.iget(1).internal_values() ) tm.assert_almost_equal( - mgr.get("a").internal_values(), reindexed.get("a").internal_values() + mgr.iget(0).internal_values(), reindexed.iget(2).internal_values() ) tm.assert_almost_equal( - mgr.get("d").internal_values(), reindexed.get("d").internal_values() + mgr.iget(3).internal_values(), reindexed.iget(3).internal_values() ) def test_get_numeric_data(self): @@ -645,13 +648,15 @@ def test_get_numeric_data(self): numeric.items, pd.Index(["int", "float", "complex", "bool"]) ) tm.assert_almost_equal( - mgr.get("float").internal_values(), numeric.get("float").internal_values() + mgr.iget(mgr.items.get_loc("float")).internal_values(), + numeric.iget(numeric.items.get_loc("float")).internal_values(), ) # Check sharing numeric.set("float", np.array([100.0, 200.0, 300.0])) tm.assert_almost_equal( - mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0]) + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([100.0, 200.0, 300.0]), ) numeric2 = mgr.get_numeric_data(copy=True) @@ -660,7 +665,8 @@ def test_get_numeric_data(self): ) numeric2.set("float", np.array([1000.0, 2000.0, 3000.0])) tm.assert_almost_equal( - mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0]) + mgr.iget(mgr.items.get_loc("float")).internal_values(), + np.array([100.0, 200.0, 300.0]), ) def test_get_bool_data(self): @@ -674,19 +680,22 @@ def test_get_bool_data(self): bools = mgr.get_bool_data() tm.assert_index_equal(bools.items, pd.Index(["bool"])) tm.assert_almost_equal( - mgr.get("bool").internal_values(), bools.get("bool").internal_values() + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + bools.iget(bools.items.get_loc("bool")).internal_values(), ) bools.set("bool", np.array([True, False, True])) tm.assert_numpy_array_equal( - mgr.get("bool").internal_values(), np.array([True, False, True]) + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, False, True]), ) # Check sharing bools2 = mgr.get_bool_data(copy=True) bools2.set("bool", np.array([False, True, False])) tm.assert_numpy_array_equal( - mgr.get("bool").internal_values(), np.array([True, False, True]) + mgr.iget(mgr.items.get_loc("bool")).internal_values(), + np.array([True, False, True]), ) def test_unicode_repr_doesnt_raise(self):