Skip to content

Commit ac0458d

Browse files
jbrockmendelCloseChoice
authored andcommitted
CLN: remove BlockManager.get (pandas-dev#33052)
* CLN: remove BlockManager.get * Remove assertions
1 parent 7625c05 commit ac0458d

File tree

4 files changed

+74
-95
lines changed

4 files changed

+74
-95
lines changed

pandas/core/frame.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -2586,7 +2586,7 @@ def _ixs(self, i: int, axis: int = 0):
25862586
label = self.columns[i]
25872587

25882588
values = self._mgr.iget(i)
2589-
result = self._box_col_values(values, label)
2589+
result = self._box_col_values(values, i)
25902590

25912591
# this is a cached value, mark it so
25922592
result._set_as_cached(label, self)
@@ -2691,7 +2691,7 @@ def _getitem_bool_array(self, key):
26912691
def _getitem_multilevel(self, key):
26922692
# self.columns is a MultiIndex
26932693
loc = self.columns.get_loc(key)
2694-
if isinstance(loc, (slice, Series, np.ndarray, Index)):
2694+
if isinstance(loc, (slice, np.ndarray)):
26952695
new_columns = self.columns[loc]
26962696
result_columns = maybe_droplevels(new_columns, key)
26972697
if self._is_mixed_type:
@@ -2724,7 +2724,8 @@ def _getitem_multilevel(self, key):
27242724
result._set_is_copy(self)
27252725
return result
27262726
else:
2727-
return self._get_item_cache(key)
2727+
# loc is neither a slice nor ndarray, so must be an int
2728+
return self._ixs(loc, axis=1)
27282729

27292730
def _get_value(self, index, col, takeable: bool = False):
27302731
"""
@@ -2915,19 +2916,15 @@ def _ensure_valid_index(self, value):
29152916
value.index.copy(), axis=1, fill_value=np.nan
29162917
)
29172918

2918-
def _box_item_values(self, key, values):
2919-
items = self.columns[self.columns.get_loc(key)]
2920-
if values.ndim == 2:
2921-
return self._constructor(values.T, columns=items, index=self.index)
2922-
else:
2923-
return self._box_col_values(values, items)
2924-
2925-
def _box_col_values(self, values, items):
2919+
def _box_col_values(self, values, loc: int) -> Series:
29262920
"""
29272921
Provide boxed values for a column.
29282922
"""
2923+
# Lookup in columns so that if e.g. a str datetime was passed
2924+
# we attach the Timestamp object as the name.
2925+
name = self.columns[loc]
29292926
klass = self._constructor_sliced
2930-
return klass(values, index=self.index, name=items, fastpath=True)
2927+
return klass(values, index=self.index, name=name, fastpath=True)
29312928

29322929
# ----------------------------------------------------------------------
29332930
# Unsorted

pandas/core/generic.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3558,18 +3558,20 @@ def _get_item_cache(self, item):
35583558
cache = self._item_cache
35593559
res = cache.get(item)
35603560
if res is None:
3561-
values = self._mgr.get(item)
3562-
res = self._box_item_values(item, values)
3561+
# All places that call _get_item_cache have unique columns,
3562+
# pending resolution of GH#33047
3563+
3564+
loc = self.columns.get_loc(item)
3565+
values = self._mgr.iget(loc)
3566+
res = self._box_col_values(values, loc)
3567+
35633568
cache[item] = res
35643569
res._set_as_cached(item, self)
35653570

35663571
# for a chain
35673572
res._is_copy = self._is_copy
35683573
return res
35693574

3570-
def _box_item_values(self, key, values):
3571-
raise AbstractMethodError(self)
3572-
35733575
def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
35743576
"""
35753577
Construct a slice of this container.

pandas/core/internals/managers.py

-29
Original file line numberDiff line numberDiff line change
@@ -935,35 +935,6 @@ def _consolidate_inplace(self) -> None:
935935
self._known_consolidated = True
936936
self._rebuild_blknos_and_blklocs()
937937

938-
def get(self, item):
939-
"""
940-
Return values for selected item (ndarray or BlockManager).
941-
"""
942-
if self.items.is_unique:
943-
944-
if not isna(item):
945-
loc = self.items.get_loc(item)
946-
else:
947-
indexer = np.arange(len(self.items))[isna(self.items)]
948-
949-
# allow a single nan location indexer
950-
if not is_scalar(indexer):
951-
if len(indexer) == 1:
952-
loc = indexer.item()
953-
else:
954-
raise ValueError("cannot label index with a null key")
955-
956-
return self.iget(loc)
957-
else:
958-
959-
if isna(item):
960-
raise TypeError("cannot label index with a null key")
961-
962-
indexer = self.items.get_indexer_for([item])
963-
return self.reindex_indexer(
964-
new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True
965-
)
966-
967938
def iget(self, i: int) -> "SingleBlockManager":
968939
"""
969940
Return the data as a SingleBlockManager.

pandas/tests/internals/test_internals.py

+58-49
Original file line numberDiff line numberDiff line change
@@ -329,45 +329,48 @@ def test_categorical_block_pickle(self):
329329
smgr2 = tm.round_trip_pickle(smgr)
330330
tm.assert_series_equal(Series(smgr), Series(smgr2))
331331

332-
def test_get(self):
332+
def test_iget(self):
333333
cols = Index(list("abc"))
334334
values = np.random.rand(3, 3)
335335
block = make_block(values=values.copy(), placement=np.arange(3))
336336
mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])
337337

338-
tm.assert_almost_equal(mgr.get("a").internal_values(), values[0])
339-
tm.assert_almost_equal(mgr.get("b").internal_values(), values[1])
340-
tm.assert_almost_equal(mgr.get("c").internal_values(), values[2])
338+
tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0])
339+
tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1])
340+
tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2])
341341

342342
def test_set(self):
343343
mgr = create_mgr("a,b,c: int", item_shape=(3,))
344344

345345
mgr.set("d", np.array(["foo"] * 3))
346346
mgr.set("b", np.array(["bar"] * 3))
347-
tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3))
347+
tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3))
348348
tm.assert_numpy_array_equal(
349-
mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_)
349+
mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_)
350350
)
351-
tm.assert_numpy_array_equal(mgr.get("c").internal_values(), np.array([2] * 3))
351+
tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3))
352352
tm.assert_numpy_array_equal(
353-
mgr.get("d").internal_values(), np.array(["foo"] * 3, dtype=np.object_)
353+
mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_)
354354
)
355355

356356
def test_set_change_dtype(self, mgr):
357357
mgr.set("baz", np.zeros(N, dtype=bool))
358358

359359
mgr.set("baz", np.repeat("foo", N))
360-
assert mgr.get("baz").dtype == np.object_
360+
idx = mgr.items.get_loc("baz")
361+
assert mgr.iget(idx).dtype == np.object_
361362

362363
mgr2 = mgr.consolidate()
363364
mgr2.set("baz", np.repeat("foo", N))
364-
assert mgr2.get("baz").dtype == np.object_
365+
idx = mgr2.items.get_loc("baz")
366+
assert mgr2.iget(idx).dtype == np.object_
365367

366368
mgr2.set("quux", tm.randn(N).astype(int))
367-
assert mgr2.get("quux").dtype == np.int_
369+
idx = mgr2.items.get_loc("quux")
370+
assert mgr2.iget(idx).dtype == np.int_
368371

369372
mgr2.set("quux", tm.randn(N))
370-
assert mgr2.get("quux").dtype == np.float_
373+
assert mgr2.iget(idx).dtype == np.float_
371374

372375
def test_copy(self, mgr):
373376
cp = mgr.copy(deep=False)
@@ -430,8 +433,8 @@ def test_as_array_datetime(self):
430433

431434
def test_as_array_datetime_tz(self):
432435
mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
433-
assert mgr.get("h").dtype == "datetime64[ns, US/Eastern]"
434-
assert mgr.get("g").dtype == "datetime64[ns, CET]"
436+
assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]"
437+
assert mgr.iget(1).dtype == "datetime64[ns, CET]"
435438
assert mgr.as_array().dtype == "object"
436439

437440
@pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
@@ -441,26 +444,26 @@ def test_astype(self, t):
441444

442445
t = np.dtype(t)
443446
tmgr = mgr.astype(t)
444-
assert tmgr.get("c").dtype.type == t
445-
assert tmgr.get("d").dtype.type == t
446-
assert tmgr.get("e").dtype.type == t
447+
assert tmgr.iget(0).dtype.type == t
448+
assert tmgr.iget(1).dtype.type == t
449+
assert tmgr.iget(2).dtype.type == t
447450

448451
# mixed
449452
mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
450453

451454
t = np.dtype(t)
452455
tmgr = mgr.astype(t, errors="ignore")
453-
assert tmgr.get("c").dtype.type == t
454-
assert tmgr.get("e").dtype.type == t
455-
assert tmgr.get("f").dtype.type == t
456-
assert tmgr.get("g").dtype.type == t
456+
assert tmgr.iget(2).dtype.type == t
457+
assert tmgr.iget(4).dtype.type == t
458+
assert tmgr.iget(5).dtype.type == t
459+
assert tmgr.iget(6).dtype.type == t
457460

458-
assert tmgr.get("a").dtype.type == np.object_
459-
assert tmgr.get("b").dtype.type == np.object_
461+
assert tmgr.iget(0).dtype.type == np.object_
462+
assert tmgr.iget(1).dtype.type == np.object_
460463
if t != np.int64:
461-
assert tmgr.get("d").dtype.type == np.datetime64
464+
assert tmgr.iget(3).dtype.type == np.datetime64
462465
else:
463-
assert tmgr.get("d").dtype.type == t
466+
assert tmgr.iget(3).dtype.type == t
464467

465468
def test_convert(self):
466469
def _compare(old_mgr, new_mgr):
@@ -497,11 +500,11 @@ def _compare(old_mgr, new_mgr):
497500
mgr.set("b", np.array(["2."] * N, dtype=np.object_))
498501
mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
499502
new_mgr = mgr.convert(numeric=True)
500-
assert new_mgr.get("a").dtype == np.int64
501-
assert new_mgr.get("b").dtype == np.float64
502-
assert new_mgr.get("foo").dtype == np.object_
503-
assert new_mgr.get("f").dtype == np.int64
504-
assert new_mgr.get("g").dtype == np.float64
503+
assert new_mgr.iget(0).dtype == np.int64
504+
assert new_mgr.iget(1).dtype == np.float64
505+
assert new_mgr.iget(2).dtype == np.object_
506+
assert new_mgr.iget(3).dtype == np.int64
507+
assert new_mgr.iget(4).dtype == np.float64
505508

506509
mgr = create_mgr(
507510
"a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
@@ -510,15 +513,15 @@ def _compare(old_mgr, new_mgr):
510513
mgr.set("b", np.array(["2."] * N, dtype=np.object_))
511514
mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
512515
new_mgr = mgr.convert(numeric=True)
513-
assert new_mgr.get("a").dtype == np.int64
514-
assert new_mgr.get("b").dtype == np.float64
515-
assert new_mgr.get("foo").dtype == np.object_
516-
assert new_mgr.get("f").dtype == np.int32
517-
assert new_mgr.get("bool").dtype == np.bool_
518-
assert new_mgr.get("dt").dtype.type, np.datetime64
519-
assert new_mgr.get("i").dtype == np.int64
520-
assert new_mgr.get("g").dtype == np.float64
521-
assert new_mgr.get("h").dtype == np.float16
516+
assert new_mgr.iget(0).dtype == np.int64
517+
assert new_mgr.iget(1).dtype == np.float64
518+
assert new_mgr.iget(2).dtype == np.object_
519+
assert new_mgr.iget(3).dtype == np.int32
520+
assert new_mgr.iget(4).dtype == np.bool_
521+
assert new_mgr.iget(5).dtype.type, np.datetime64
522+
assert new_mgr.iget(6).dtype == np.int64
523+
assert new_mgr.iget(7).dtype == np.float64
524+
assert new_mgr.iget(8).dtype == np.float16
522525

523526
def test_invalid_ea_block(self):
524527
with pytest.raises(AssertionError, match="block.size != values.size"):
@@ -620,16 +623,16 @@ def test_reindex_items(self):
620623
assert reindexed.nblocks == 2
621624
tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"]))
622625
tm.assert_almost_equal(
623-
mgr.get("g").internal_values(), reindexed.get("g").internal_values()
626+
mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
624627
)
625628
tm.assert_almost_equal(
626-
mgr.get("c").internal_values(), reindexed.get("c").internal_values()
629+
mgr.iget(2).internal_values(), reindexed.iget(1).internal_values()
627630
)
628631
tm.assert_almost_equal(
629-
mgr.get("a").internal_values(), reindexed.get("a").internal_values()
632+
mgr.iget(0).internal_values(), reindexed.iget(2).internal_values()
630633
)
631634
tm.assert_almost_equal(
632-
mgr.get("d").internal_values(), reindexed.get("d").internal_values()
635+
mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
633636
)
634637

635638
def test_get_numeric_data(self):
@@ -645,13 +648,15 @@ def test_get_numeric_data(self):
645648
numeric.items, pd.Index(["int", "float", "complex", "bool"])
646649
)
647650
tm.assert_almost_equal(
648-
mgr.get("float").internal_values(), numeric.get("float").internal_values()
651+
mgr.iget(mgr.items.get_loc("float")).internal_values(),
652+
numeric.iget(numeric.items.get_loc("float")).internal_values(),
649653
)
650654

651655
# Check sharing
652656
numeric.set("float", np.array([100.0, 200.0, 300.0]))
653657
tm.assert_almost_equal(
654-
mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
658+
mgr.iget(mgr.items.get_loc("float")).internal_values(),
659+
np.array([100.0, 200.0, 300.0]),
655660
)
656661

657662
numeric2 = mgr.get_numeric_data(copy=True)
@@ -660,7 +665,8 @@ def test_get_numeric_data(self):
660665
)
661666
numeric2.set("float", np.array([1000.0, 2000.0, 3000.0]))
662667
tm.assert_almost_equal(
663-
mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
668+
mgr.iget(mgr.items.get_loc("float")).internal_values(),
669+
np.array([100.0, 200.0, 300.0]),
664670
)
665671

666672
def test_get_bool_data(self):
@@ -674,19 +680,22 @@ def test_get_bool_data(self):
674680
bools = mgr.get_bool_data()
675681
tm.assert_index_equal(bools.items, pd.Index(["bool"]))
676682
tm.assert_almost_equal(
677-
mgr.get("bool").internal_values(), bools.get("bool").internal_values()
683+
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
684+
bools.iget(bools.items.get_loc("bool")).internal_values(),
678685
)
679686

680687
bools.set("bool", np.array([True, False, True]))
681688
tm.assert_numpy_array_equal(
682-
mgr.get("bool").internal_values(), np.array([True, False, True])
689+
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
690+
np.array([True, False, True]),
683691
)
684692

685693
# Check sharing
686694
bools2 = mgr.get_bool_data(copy=True)
687695
bools2.set("bool", np.array([False, True, False]))
688696
tm.assert_numpy_array_equal(
689-
mgr.get("bool").internal_values(), np.array([True, False, True])
697+
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
698+
np.array([True, False, True]),
690699
)
691700

692701
def test_unicode_repr_doesnt_raise(self):

0 commit comments

Comments
 (0)