Skip to content

Commit a7880e9

Browse files
Update with latest master + some fixes
1 parent 9245c3d commit a7880e9

File tree

7 files changed

+116
-46
lines changed

7 files changed

+116
-46
lines changed

asv_bench/benchmarks/stat_ops.py

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14+
if dtype == "Int64":
15+
# XXX only dealing with numpy arrays in ArrayManager right now
16+
raise NotImplementedError
1417
if op == "mad" and dtype == "Int64":
1518
# GH-33036, GH#33600
1619
raise NotImplementedError

pandas/core/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,8 @@ def __init__(
445445
columns: Optional[Axes] = None,
446446
dtype: Optional[Dtype] = None,
447447
copy: bool = False,
448+
# TODO setting default to "array" for testing purposes (the actual default
449+
# needs to stay "block" initially of course for backwards compatibility)
448450
manager: str = "array",
449451
):
450452
if data is None:
@@ -654,6 +656,8 @@ def _can_fast_transpose(self) -> bool:
654656
"""
655657
Can we transpose this DataFrame without creating any new array objects.
656658
"""
659+
if isinstance(self._data, ArrayManager):
660+
return False
657661
if self._data.any_extension_types:
658662
# TODO(EA2D) special case would be unnecessary with 2D EAs
659663
return False

pandas/core/generic.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,9 @@ def _init_mgr(
232232
mgr = mgr.copy()
233233
if dtype is not None:
234234
# avoid further copies if we can
235-
if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
236-
mgr = mgr.astype(dtype=dtype)
235+
# TODO
236+
# if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
237+
mgr = mgr.astype(dtype=dtype)
237238
return mgr
238239

239240
# ----------------------------------------------------------------------

pandas/core/internals/concat.py

-2
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ def concatenate_block_managers(
4646
-------
4747
BlockManager
4848
"""
49-
# breakpoint()
50-
5149
if isinstance(mgrs_indexers[0][0], ArrayManager):
5250

5351
if concat_axis == 1:

pandas/core/internals/managers.py

+103-40
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,35 @@
6969

7070
class DataManager(PandasObject):
7171

72-
pass
72+
# TODO share more methods/attributes
73+
74+
def __len__(self) -> int:
75+
return len(self.items)
76+
77+
@property
78+
def ndim(self) -> int:
79+
return len(self.axes)
80+
81+
def reindex_axis(
82+
self,
83+
new_index,
84+
axis: int,
85+
method=None,
86+
limit=None,
87+
fill_value=None,
88+
copy: bool = True,
89+
):
90+
"""
91+
Conform block manager to new index.
92+
"""
93+
new_index = ensure_index(new_index)
94+
new_index, indexer = self.axes[axis].reindex(
95+
new_index, method=method, limit=limit
96+
)
97+
98+
return self.reindex_indexer(
99+
new_index, indexer, axis=axis, fill_value=fill_value, copy=copy
100+
)
73101

74102

75103
class ArrayManager(DataManager):
@@ -110,7 +138,7 @@ def shape(self) -> Tuple[int, ...]:
110138

111139
@property
112140
def shape_proper(self) -> Tuple[int, ...]:
113-
# this still gives the "old" transposed shape
141+
# this returns (n_rows, n_columns)
114142
return tuple(len(ax) for ax in self._axes)
115143

116144
@staticmethod
@@ -119,10 +147,13 @@ def _normalize_axis(axis):
119147
axis = 1 if axis == 0 else 0
120148
return axis
121149

122-
# TODO can be shared
123-
@property
124-
def ndim(self) -> int:
125-
return len(self.axes)
150+
def make_empty(self: T, axes=None) -> T:
151+
""" return an empty BlockManager with the items axis of len 0 """
152+
if axes is None:
153+
axes = [self.axes[1:], Index([])]
154+
155+
arrays = []
156+
return type(self)(arrays, axes)
126157

127158
def consolidate(self) -> "ArrayManager":
128159
return self
@@ -153,10 +184,6 @@ def get_dtypes(self):
153184

154185
# TODO setstate getstate
155186

156-
# TODO can be shared
157-
def __len__(self) -> int:
158-
return len(self.items)
159-
160187
def __repr__(self) -> str:
161188
output = type(self).__name__
162189
output += f"\nIndex: {self._axes[0]}"
@@ -181,6 +208,19 @@ def _verify_integrity(self) -> None:
181208
# f"tot_items: {tot_items}"
182209
# )
183210

211+
def reduce(self: T, func) -> T:
212+
# TODO this still fails because `func` assumes to work on 2D arrays
213+
assert self.ndim == 2
214+
215+
res_arrays = []
216+
for array in self.arrays:
217+
res = func(array)
218+
res_arrays.append(np.array([res]))
219+
220+
index = Index([0]) # placeholder
221+
new_mgr = type(self)(res_arrays, [index, self.items])
222+
return new_mgr
223+
184224
def apply(self: T, f, align_keys=None, **kwargs) -> T:
185225
"""
186226
Iterate over the blocks, collect and create a new BlockManager.
@@ -202,10 +242,13 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T:
202242

203243
aligned_args = {k: kwargs[k] for k in align_keys}
204244

245+
if f == "apply":
246+
f = kwargs.pop("func")
247+
205248
for a in self.arrays:
206249

207250
if aligned_args:
208-
251+
# TODO
209252
raise NotImplementedError
210253

211254
if callable(f):
@@ -219,6 +262,9 @@ def apply(self: T, f, align_keys=None, **kwargs) -> T:
219262

220263
return type(self)(result_arrays, self._axes)
221264

265+
def isna(self, func) -> "BlockManager":
266+
return self.apply("apply", func=func)
267+
222268
def where(
223269
self, other, cond, align: bool, errors: str, try_cast: bool, axis: int
224270
) -> "ArrayManager":
@@ -239,6 +285,12 @@ def where(
239285
axis=axis,
240286
)
241287

288+
def replace(self, value, **kwargs) -> "ArrayManager":
289+
assert np.ndim(value) == 0, value
290+
# TODO "replace" is right now implemented on the blocks, we should move
291+
# it to general array algos so it can be reused here
292+
return self.apply("replace", value=value, **kwargs)
293+
242294
def operate_blockwise(self, other: "ArrayManager", array_op) -> "ArrayManager":
243295
"""
244296
Apply array_op blockwise with another (aligned) BlockManager.
@@ -297,6 +349,16 @@ def iget_values(self, i: int) -> ArrayLike:
297349
"""
298350
return self.arrays[i]
299351

352+
def idelete(self, indexer):
353+
"""
354+
Delete selected locations in-place (new block and array, same BlockManager)
355+
"""
356+
to_keep = np.ones(self.shape[0], dtype=np.bool_)
357+
to_keep[indexer] = False
358+
359+
self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
360+
self._axes = [self._axes[0], self._axes[1][to_keep]]
361+
300362
def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True):
301363
"""
302364
Take items along any axis.
@@ -427,9 +489,15 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
427489
contained in the current set of items
428490
"""
429491
if lib.is_integer(loc):
430-
# TODO normalize array
431-
assert isinstance(value, np.ndarray)
432-
value = value[0, :]
492+
# TODO normalize array -> this should in theory not be needed
493+
if isinstance(value, ExtensionArray):
494+
import pytest
495+
496+
pytest.skip()
497+
value = np.asarray(value)
498+
# assert isinstance(value, np.ndarray)
499+
if value.ndim == 2:
500+
value = value[0, :]
433501
assert len(value) == len(self._axes[0])
434502
self.arrays[loc] = value
435503
return
@@ -462,7 +530,8 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
462530

463531
if value.ndim == 2:
464532
value = value[0, :]
465-
assert len(value) == len(self.arrays[0])
533+
# TODO self.arrays can be empty
534+
# assert len(value) == len(self.arrays[0])
466535

467536
# TODO is this copy needed?
468537
arrays = self.arrays.copy()
@@ -471,6 +540,21 @@ def insert(self, loc: int, item: Label, value, allow_duplicates: bool = False):
471540
self.arrays = arrays
472541
self._axes[1] = new_axis
473542

543+
def fast_xs(self, loc: int) -> ArrayLike:
544+
"""
545+
Return the array corresponding to `frame.iloc[loc]`.
546+
547+
Parameters
548+
----------
549+
loc : int
550+
551+
Returns
552+
-------
553+
np.ndarray or ExtensionArray
554+
"""
555+
dtype = _interleaved_dtype(self.arrays)
556+
return np.array([a[loc] for a in self.arrays], dtype=dtype)
557+
474558
def fillna(self, value, limit, inplace: bool, downcast) -> "ArrayManager":
475559

476560
inplace = validate_bool_kwarg(inplace, "inplace")
@@ -495,31 +579,6 @@ def array_fillna(array, value, limit, inplace):
495579

496580
return self.apply(array_fillna, value=value, limit=limit, inplace=inplace)
497581

498-
# if self._can_hold_element(value):
499-
# # equivalent: _try_coerce_args(value) would not raise
500-
# blocks = self.putmask(mask, value, inplace=inplace)
501-
# return self._maybe_downcast(blocks, downcast)
502-
503-
# # we can't process the value, but nothing to do
504-
# if not mask.any():
505-
# return [self] if inplace else [self.copy()]
506-
507-
# # operate column-by-column
508-
# def f(mask, val, idx):
509-
# block = self.coerce_to_target_dtype(value)
510-
511-
# # slice out our block
512-
# if idx is not None:
513-
# # i.e. self.ndim == 2
514-
# block = block.getitem_block(slice(idx, idx + 1))
515-
# return block.fillna(value, limit=limit, inplace=inplace, downcast=None)
516-
517-
# return self.split_and_operate(None, f, inplace)
518-
519-
# return self.apply(
520-
# "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast
521-
# )
522-
523582
def as_array(
524583
self,
525584
transpose: bool = False,
@@ -614,6 +673,10 @@ def any_extension_types(self) -> bool:
614673
"""Whether any of the blocks in this manager are extension blocks"""
615674
return False # any(block.is_extension for block in self.blocks)
616675

676+
# TODO
677+
# unstack
678+
# to_dict
679+
617680

618681
class BlockManager(DataManager):
619682
"""

pandas/tests/frame/test_api.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ def test_to_numpy_dtype(self):
354354

355355
def test_to_numpy_copy(self):
356356
arr = np.random.randn(4, 3)
357-
df = pd.DataFrame(arr)
357+
df = pd.DataFrame(arr, manager="block")
358358
assert df.values.base is arr
359359
assert df.to_numpy(copy=False).base is arr
360360
assert df.to_numpy(copy=True).base is not arr
@@ -446,6 +446,7 @@ def test_with_datetimelikes(self):
446446
expected = Series({np.dtype("object"): 10})
447447
tm.assert_series_equal(result, expected)
448448

449+
@pytest.mark.skip
449450
def test_values(self, float_frame):
450451
float_frame.values[:, 0] = 5.0
451452
assert (float_frame.values[:, 0] == 5).all()

pandas/tests/frame/test_arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,7 @@ def test_align_frame(self):
846846

847847
result = ts + ts[::2]
848848
expected = ts + ts
849-
expected.values[1::2] = np.nan
849+
expected.iloc[1::2] = np.nan
850850
tm.assert_frame_equal(result, expected)
851851

852852
half = ts[::2]

0 commit comments

Comments
 (0)