Skip to content

CLN: remove BlockManager.get #33052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2555,7 +2555,7 @@ def _ixs(self, i: int, axis: int = 0):
label = self.columns[i]

values = self._mgr.iget(i)
result = self._box_col_values(values, label)
result = self._box_col_values(values, i)

# this is a cached value, mark it so
result._set_as_cached(label, self)
Expand Down Expand Up @@ -2645,7 +2645,7 @@ def _getitem_bool_array(self, key):
def _getitem_multilevel(self, key):
# self.columns is a MultiIndex
loc = self.columns.get_loc(key)
if isinstance(loc, (slice, Series, np.ndarray, Index)):
if isinstance(loc, (slice, np.ndarray)):
new_columns = self.columns[loc]
result_columns = maybe_droplevels(new_columns, key)
if self._is_mixed_type:
Expand Down Expand Up @@ -2678,7 +2678,8 @@ def _getitem_multilevel(self, key):
result._set_is_copy(self)
return result
else:
return self._get_item_cache(key)
# loc is neither a slice nor ndarray, so must be an int
return self._ixs(loc, axis=1)

def _get_value(self, index, col, takeable: bool = False):
"""
Expand Down Expand Up @@ -2869,19 +2870,15 @@ def _ensure_valid_index(self, value):
value.index.copy(), axis=1, fill_value=np.nan
)

def _box_item_values(self, key, values):
items = self.columns[self.columns.get_loc(key)]
if values.ndim == 2:
return self._constructor(values.T, columns=items, index=self.index)
else:
return self._box_col_values(values, items)

def _box_col_values(self, values, items):
def _box_col_values(self, values, loc: int) -> Series:
"""
Provide boxed values for a column.
"""
# Lookup in columns so that if e.g. a str datetime was passed
# we attach the Timestamp object as the name.
name = self.columns[loc]
klass = self._constructor_sliced
return klass(values, index=self.index, name=items, fastpath=True)
return klass(values, index=self.index, name=name, fastpath=True)

# ----------------------------------------------------------------------
# Unsorted
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3547,18 +3547,20 @@ def _get_item_cache(self, item):
cache = self._item_cache
res = cache.get(item)
if res is None:
values = self._mgr.get(item)
res = self._box_item_values(item, values)
# All places that call _get_item_cache have unique columns,
# pending resolution of GH#33047

loc = self.columns.get_loc(item)
values = self._mgr.iget(loc)
res = self._box_col_values(values, loc)

cache[item] = res
res._set_as_cached(item, self)

# for a chain
res._is_copy = self._is_copy
return res

def _box_item_values(self, key, values):
raise AbstractMethodError(self)

def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
"""
Construct a slice of this container.
Expand Down
29 changes: 0 additions & 29 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,35 +963,6 @@ def _consolidate_inplace(self) -> None:
self._known_consolidated = True
self._rebuild_blknos_and_blklocs()

def get(self, item):
"""
Return values for selected item (ndarray or BlockManager).
"""
if self.items.is_unique:

if not isna(item):
loc = self.items.get_loc(item)
else:
indexer = np.arange(len(self.items))[isna(self.items)]

# allow a single nan location indexer
if not is_scalar(indexer):
if len(indexer) == 1:
loc = indexer.item()
else:
raise ValueError("cannot label index with a null key")

return self.iget(loc)
else:

if isna(item):
raise TypeError("cannot label index with a null key")

indexer = self.items.get_indexer_for([item])
return self.reindex_indexer(
new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True
)

def iget(self, i: int) -> "SingleBlockManager":
"""
Return the data as a SingleBlockManager.
Expand Down
107 changes: 58 additions & 49 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,45 +329,48 @@ def test_categorical_block_pickle(self):
smgr2 = tm.round_trip_pickle(smgr)
tm.assert_series_equal(Series(smgr), Series(smgr2))

def test_get(self):
def test_iget(self):
cols = Index(list("abc"))
values = np.random.rand(3, 3)
block = make_block(values=values.copy(), placement=np.arange(3))
mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])

tm.assert_almost_equal(mgr.get("a").internal_values(), values[0])
tm.assert_almost_equal(mgr.get("b").internal_values(), values[1])
tm.assert_almost_equal(mgr.get("c").internal_values(), values[2])
tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0])
tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1])
tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2])

def test_set(self):
mgr = create_mgr("a,b,c: int", item_shape=(3,))

mgr.set("d", np.array(["foo"] * 3))
mgr.set("b", np.array(["bar"] * 3))
tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3))
tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3))
tm.assert_numpy_array_equal(
mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_)
mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_)
)
tm.assert_numpy_array_equal(mgr.get("c").internal_values(), np.array([2] * 3))
tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3))
tm.assert_numpy_array_equal(
mgr.get("d").internal_values(), np.array(["foo"] * 3, dtype=np.object_)
mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_)
)

def test_set_change_dtype(self, mgr):
mgr.set("baz", np.zeros(N, dtype=bool))

mgr.set("baz", np.repeat("foo", N))
assert mgr.get("baz").dtype == np.object_
idx = mgr.items.get_loc("baz")
assert mgr.iget(idx).dtype == np.object_

mgr2 = mgr.consolidate()
mgr2.set("baz", np.repeat("foo", N))
assert mgr2.get("baz").dtype == np.object_
idx = mgr2.items.get_loc("baz")
assert mgr2.iget(idx).dtype == np.object_

mgr2.set("quux", tm.randn(N).astype(int))
assert mgr2.get("quux").dtype == np.int_
idx = mgr2.items.get_loc("quux")
assert mgr2.iget(idx).dtype == np.int_

mgr2.set("quux", tm.randn(N))
assert mgr2.get("quux").dtype == np.float_
assert mgr2.iget(idx).dtype == np.float_

def test_copy(self, mgr):
cp = mgr.copy(deep=False)
Expand Down Expand Up @@ -430,8 +433,8 @@ def test_as_array_datetime(self):

def test_as_array_datetime_tz(self):
mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
assert mgr.get("h").dtype == "datetime64[ns, US/Eastern]"
assert mgr.get("g").dtype == "datetime64[ns, CET]"
assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]"
assert mgr.iget(1).dtype == "datetime64[ns, CET]"
assert mgr.as_array().dtype == "object"

@pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
Expand All @@ -441,26 +444,26 @@ def test_astype(self, t):

t = np.dtype(t)
tmgr = mgr.astype(t)
assert tmgr.get("c").dtype.type == t
assert tmgr.get("d").dtype.type == t
assert tmgr.get("e").dtype.type == t
assert tmgr.iget(0).dtype.type == t
assert tmgr.iget(1).dtype.type == t
assert tmgr.iget(2).dtype.type == t

# mixed
mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")

t = np.dtype(t)
tmgr = mgr.astype(t, errors="ignore")
assert tmgr.get("c").dtype.type == t
assert tmgr.get("e").dtype.type == t
assert tmgr.get("f").dtype.type == t
assert tmgr.get("g").dtype.type == t
assert tmgr.iget(2).dtype.type == t
assert tmgr.iget(4).dtype.type == t
assert tmgr.iget(5).dtype.type == t
assert tmgr.iget(6).dtype.type == t

assert tmgr.get("a").dtype.type == np.object_
assert tmgr.get("b").dtype.type == np.object_
assert tmgr.iget(0).dtype.type == np.object_
assert tmgr.iget(1).dtype.type == np.object_
if t != np.int64:
assert tmgr.get("d").dtype.type == np.datetime64
assert tmgr.iget(3).dtype.type == np.datetime64
else:
assert tmgr.get("d").dtype.type == t
assert tmgr.iget(3).dtype.type == t

def test_convert(self):
def _compare(old_mgr, new_mgr):
Expand Down Expand Up @@ -497,11 +500,11 @@ def _compare(old_mgr, new_mgr):
mgr.set("b", np.array(["2."] * N, dtype=np.object_))
mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
new_mgr = mgr.convert(numeric=True)
assert new_mgr.get("a").dtype == np.int64
assert new_mgr.get("b").dtype == np.float64
assert new_mgr.get("foo").dtype == np.object_
assert new_mgr.get("f").dtype == np.int64
assert new_mgr.get("g").dtype == np.float64
assert new_mgr.iget(0).dtype == np.int64
assert new_mgr.iget(1).dtype == np.float64
assert new_mgr.iget(2).dtype == np.object_
assert new_mgr.iget(3).dtype == np.int64
assert new_mgr.iget(4).dtype == np.float64

mgr = create_mgr(
"a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
Expand All @@ -510,15 +513,15 @@ def _compare(old_mgr, new_mgr):
mgr.set("b", np.array(["2."] * N, dtype=np.object_))
mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
new_mgr = mgr.convert(numeric=True)
assert new_mgr.get("a").dtype == np.int64
assert new_mgr.get("b").dtype == np.float64
assert new_mgr.get("foo").dtype == np.object_
assert new_mgr.get("f").dtype == np.int32
assert new_mgr.get("bool").dtype == np.bool_
assert new_mgr.get("dt").dtype.type, np.datetime64
assert new_mgr.get("i").dtype == np.int64
assert new_mgr.get("g").dtype == np.float64
assert new_mgr.get("h").dtype == np.float16
assert new_mgr.iget(0).dtype == np.int64
assert new_mgr.iget(1).dtype == np.float64
assert new_mgr.iget(2).dtype == np.object_
assert new_mgr.iget(3).dtype == np.int32
assert new_mgr.iget(4).dtype == np.bool_
assert new_mgr.iget(5).dtype.type, np.datetime64
assert new_mgr.iget(6).dtype == np.int64
assert new_mgr.iget(7).dtype == np.float64
assert new_mgr.iget(8).dtype == np.float16

def test_invalid_ea_block(self):
with pytest.raises(AssertionError, match="block.size != values.size"):
Expand Down Expand Up @@ -620,16 +623,16 @@ def test_reindex_items(self):
assert reindexed.nblocks == 2
tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"]))
tm.assert_almost_equal(
mgr.get("g").internal_values(), reindexed.get("g").internal_values()
mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
)
tm.assert_almost_equal(
mgr.get("c").internal_values(), reindexed.get("c").internal_values()
mgr.iget(2).internal_values(), reindexed.iget(1).internal_values()
)
tm.assert_almost_equal(
mgr.get("a").internal_values(), reindexed.get("a").internal_values()
mgr.iget(0).internal_values(), reindexed.iget(2).internal_values()
)
tm.assert_almost_equal(
mgr.get("d").internal_values(), reindexed.get("d").internal_values()
mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
)

def test_get_numeric_data(self):
Expand All @@ -645,13 +648,15 @@ def test_get_numeric_data(self):
numeric.items, pd.Index(["int", "float", "complex", "bool"])
)
tm.assert_almost_equal(
mgr.get("float").internal_values(), numeric.get("float").internal_values()
mgr.iget(mgr.items.get_loc("float")).internal_values(),
numeric.iget(numeric.items.get_loc("float")).internal_values(),
)

# Check sharing
numeric.set("float", np.array([100.0, 200.0, 300.0]))
tm.assert_almost_equal(
mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([100.0, 200.0, 300.0]),
)

numeric2 = mgr.get_numeric_data(copy=True)
Expand All @@ -660,7 +665,8 @@ def test_get_numeric_data(self):
)
numeric2.set("float", np.array([1000.0, 2000.0, 3000.0]))
tm.assert_almost_equal(
mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([100.0, 200.0, 300.0]),
)

def test_get_bool_data(self):
Expand All @@ -674,19 +680,22 @@ def test_get_bool_data(self):
bools = mgr.get_bool_data()
tm.assert_index_equal(bools.items, pd.Index(["bool"]))
tm.assert_almost_equal(
mgr.get("bool").internal_values(), bools.get("bool").internal_values()
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
bools.iget(bools.items.get_loc("bool")).internal_values(),
)

bools.set("bool", np.array([True, False, True]))
tm.assert_numpy_array_equal(
mgr.get("bool").internal_values(), np.array([True, False, True])
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, False, True]),
)

# Check sharing
bools2 = mgr.get_bool_data(copy=True)
bools2.set("bool", np.array([False, True, False]))
tm.assert_numpy_array_equal(
mgr.get("bool").internal_values(), np.array([True, False, True])
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, False, True]),
)

def test_unicode_repr_doesnt_raise(self):
Expand Down