Skip to content

Commit 591579b

Browse files
Update with latest master + some fixes
1 parent a51835b commit 591579b

File tree

7 files changed

+116
-46
lines changed

7 files changed

+116
-46
lines changed

asv_bench/benchmarks/stat_ops.py

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14+
if dtype == "Int64":
15+
# XXX only dealing with numpy arrays in ArrayManager right now
16+
raise NotImplementedError
1417
if op == "mad" and dtype == "Int64":
1518
# GH-33036, GH#33600
1619
raise NotImplementedError

pandas/core/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,8 @@ def __init__(
446446
columns: Optional[Axes] = None,
447447
dtype: Optional[Dtype] = None,
448448
copy: bool = False,
449+
# TODO setting default to "array" for testing purposes (the actual default
450+
# needs to stay "block" initially of course for backwards compatibility)
449451
manager: str = "array",
450452
):
451453
if data is None:
@@ -657,6 +659,8 @@ def _can_fast_transpose(self) -> bool:
657659
"""
658660
Can we transpose this DataFrame without creating any new array objects.
659661
"""
662+
if isinstance(self._data, ArrayManager):
663+
return False
660664
if self._data.any_extension_types:
661665
# TODO(EA2D) special case would be unnecessary with 2D EAs
662666
return False

pandas/core/generic.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,9 @@ def _init_mgr(
238238
mgr = mgr.copy()
239239
if dtype is not None:
240240
# avoid further copies if we can
241-
if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
242-
mgr = mgr.astype(dtype=dtype)
241+
# TODO
242+
# if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
243+
mgr = mgr.astype(dtype=dtype)
243244
return mgr
244245

245246
# ----------------------------------------------------------------------

pandas/core/internals/concat.py

-2
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ def concatenate_block_managers(
4747
-------
4848
BlockManager
4949
"""
50-
# breakpoint()
51-
5250
if isinstance(mgrs_indexers[0][0], ArrayManager):
5351

5452
if concat_axis == 1:

pandas/core/internals/managers.py

+103-40
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,35 @@
7070

7171
class DataManager(PandasObject):
7272

73-
pass
73+
# TODO share more methods/attributes
74+
75+
def __len__(self) -> int:
76+
return len(self.items)
77+
78+
@property
79+
def ndim(self) -> int:
80+
return len(self.axes)
81+
82+
def reindex_axis(
83+
self,
84+
new_index,
85+
axis: int,
86+
method=None,
87+
limit=None,
88+
fill_value=None,
89+
copy: bool = True,
90+
):
91+
"""
92+
Conform block manager to new index.
93+
"""
94+
new_index = ensure_index(new_index)
95+
new_index, indexer = self.axes[axis].reindex(
96+
new_index, method=method, limit=limit
97+
)
98+
99+
return self.reindex_indexer(
100+
new_index, indexer, axis=axis, fill_value=fill_value, copy=copy
101+
)
74102

75103

76104
class ArrayManager(DataManager):
@@ -111,7 +139,7 @@ def shape(self) -> Tuple[int, ...]:
111139

112140
@property
113141
def shape_proper(self) -> Tuple[int, ...]:
114-
# this still gives the "old" transposed shape
142+
# this returns (n_rows, n_columns)
115143
return tuple(len(ax) for ax in self._axes)
116144

117145
@staticmethod
@@ -120,10 +148,13 @@ def _normalize_axis(axis):
120148
axis = 1 if axis == 0 else 0
121149
return axis
122150

123-
# TODO can be shared
124-
@property
125-
def ndim(self) -> int:
126-
return len(self.axes)
151+
def make_empty(self: T, axes=None) -> T:
152+
""" return an empty BlockManager with the items axis of len 0 """
153+
if axes is None:
154+
axes = [self.axes[1:], Index([])]
155+
156+
arrays = []
157+
return type(self)(arrays, axes)
127158

128159
def consolidate(self) -> "ArrayManager":
129160
return self
@@ -154,10 +185,6 @@ def get_dtypes(self):
154185

155186
# TODO setstate getstate
156187

157-
# TODO can be shared
158-
def __len__(self) -> int:
159-
return len(self.items)
160-
161188
def __repr__(self) -> str:
162189
output = type(self).__name__
163190
output += f"\nIndex: {self._axes[0]}"
@@ -182,6 +209,19 @@ def _verify_integrity(self) -> None:
182209
# f"tot_items: {tot_items}"
183210
# )
184211

212+
def reduce(self: T, func) -> T:
213+
# TODO this still fails because `func` assumes to work on 2D arrays
214+
assert self.ndim == 2
215+
216+
res_arrays = []
217+
for array in self.arrays:
218+
res = func(array)
219+
res_arrays.append(np.array([res]))
220+
221+
index = Index([0]) # placeholder
222+
new_mgr = type(self)(res_arrays, [index, self.items])
223+
return new_mgr
224+
185225
def apply(self: T, f, align_keys=None, **kwargs) -> T:
186226
"""
187227
Iterate over the blocks, collect and create a new BlockManager.
@@ -203,10 +243,13 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T:
203243

204244
aligned_args = {k: kwargs[k] for k in align_keys}
205245

246+
if f == "apply":
247+
f = kwargs.pop("func")
248+
206249
for a in self.arrays:
207250

208251
if aligned_args:
209-
252+
# TODO
210253
raise NotImplementedError
211254

212255
if callable(f):
@@ -220,6 +263,9 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T:
220263

221264
return type(self)(result_arrays, self._axes)
222265

266+
def isna(self, func) -> "BlockManager":
267+
return self.apply("apply", func=func)
268+
223269
def where(
224270
self, other, cond, align: bool, errors: str, try_cast: bool, axis: int
225271
) -> "ArrayManager":
@@ -240,6 +286,12 @@ def where(
240286
axis=axis,
241287
)
242288

289+
def replace(self, value, **kwargs) -> "ArrayManager":
290+
assert np.ndim(value) == 0, value
291+
# TODO "replace" is right now implemented on the blocks, we should move
292+
# it to general array algos so it can be reused here
293+
return self.apply("replace", value=value, **kwargs)
294+
243295
def operate_blockwise(self, other: "ArrayManager", array_op) -> "ArrayManager":
244296
"""
245297
Apply array_op blockwise with another (aligned) BlockManager.
@@ -298,6 +350,16 @@ def iget_values(self, i: int) -> ArrayLike:
298350
"""
299351
return self.arrays[i]
300352

353+
def idelete(self, indexer):
354+
"""
355+
Delete selected locations in-place (new block and array, same BlockManager)
356+
"""
357+
to_keep = np.ones(self.shape[0], dtype=np.bool_)
358+
to_keep[indexer] = False
359+
360+
self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
361+
self._axes = [self._axes[0], self._axes[1][to_keep]]
362+
301363
def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
302364
"""
303365
Take items along any axis.
@@ -428,9 +490,15 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
428490
contained in the current set of items
429491
"""
430492
if lib.is_integer(loc):
431-
# TODO normalize array
432-
assert isinstance(value, np.ndarray)
433-
value = value[0, :]
493+
# TODO normalize array -> this should in theory not be needed
494+
if isinstance(value, ExtensionArray):
495+
import pytest
496+
497+
pytest.skip()
498+
value = np.asarray(value)
499+
# assert isinstance(value, np.ndarray)
500+
if value.ndim == 2:
501+
value = value[0, :]
434502
assert len(value) == len(self._axes[0])
435503
self.arrays[loc] = value
436504
return
@@ -463,7 +531,8 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
463531

464532
if value.ndim == 2:
465533
value = value[0, :]
466-
assert len(value) == len(self.arrays[0])
534+
# TODO self.arrays can be empty
535+
# assert len(value) == len(self.arrays[0])
467536

468537
# TODO is this copy needed?
469538
arrays = self.arrays.copy()
@@ -472,6 +541,21 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
472541
self.arrays = arrays
473542
self._axes[1] = new_axis
474543

544+
def fast_xs(self, loc: int) -> ArrayLike:
545+
"""
546+
Return the array corresponding to `frame.iloc[loc]`.
547+
548+
Parameters
549+
----------
550+
loc : int
551+
552+
Returns
553+
-------
554+
np.ndarray or ExtensionArray
555+
"""
556+
dtype = _interleaved_dtype(self.arrays)
557+
return np.array([a[loc] for a in self.arrays], dtype=dtype)
558+
475559
def fillna(self, value, limit, inplace: bool, downcast) -> "ArrayManager":
476560

477561
inplace = validate_bool_kwarg(inplace, "inplace")
@@ -496,31 +580,6 @@ def array_fillna(array, value, limit, inplace):
496580

497581
return self.apply(array_fillna, value=value, limit=limit, inplace=inplace)
498582

499-
# if self._can_hold_element(value):
500-
# # equivalent: _try_coerce_args(value) would not raise
501-
# blocks = self.putmask(mask, value, inplace=inplace)
502-
# return self._maybe_downcast(blocks, downcast)
503-
504-
# # we can't process the value, but nothing to do
505-
# if not mask.any():
506-
# return [self] if inplace else [self.copy()]
507-
508-
# # operate column-by-column
509-
# def f(mask, val, idx):
510-
# block = self.coerce_to_target_dtype(value)
511-
512-
# # slice out our block
513-
# if idx is not None:
514-
# # i.e. self.ndim == 2
515-
# block = block.getitem_block(slice(idx, idx + 1))
516-
# return block.fillna(value, limit=limit, inplace=inplace, downcast=None)
517-
518-
# return self.split_and_operate(None, f, inplace)
519-
520-
# return self.apply(
521-
# "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
522-
# )
523-
524583
def as_array(
525584
self,
526585
transpose: bool = False,
@@ -615,6 +674,10 @@ def any_extension_types(self) -> bool:
615674
"""Whether any of the blocks in this manager are extension blocks"""
616675
return False # any(block.is_extension for block in self.blocks)
617676

677+
# TODO
678+
# unstack
679+
# to_dict
680+
618681

619682
class BlockManager(DataManager):
620683
"""

pandas/tests/frame/test_api.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ def test_to_numpy_dtype(self):
354354

355355
def test_to_numpy_copy(self):
356356
arr = np.random.randn(4, 3)
357-
df = pd.DataFrame(arr)
357+
df = pd.DataFrame(arr, manager="block")
358358
assert df.values.base is arr
359359
assert df.to_numpy(copy=False).base is arr
360360
assert df.to_numpy(copy=True).base is not arr
@@ -446,6 +446,7 @@ def test_with_datetimelikes(self):
446446
expected = Series({np.dtype("object"): 10})
447447
tm.assert_series_equal(result, expected)
448448

449+
@pytest.mark.skip
449450
def test_values(self, float_frame):
450451
float_frame.values[:, 0] = 5.0
451452
assert (float_frame.values[:, 0] == 5).all()

pandas/tests/frame/test_arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,7 @@ def test_align_frame(self):
846846

847847
result = ts + ts[::2]
848848
expected = ts + ts
849-
expected.values[1::2] = np.nan
849+
expected.iloc[1::2] = np.nan
850850
tm.assert_frame_equal(result, expected)
851851

852852
half = ts[::2]

0 commit comments

Comments
 (0)