Skip to content

Commit 874023e

Browse files
committed
Merge branch 'master' into ref-hybrid-3
2 parents e8d5ebd + b176dcd commit 874023e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+567
-458
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ repos:
3535
exclude: ^pandas/_libs/src/(klib|headers)/
3636
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
3737
- repo: https://gitlab.com/pycqa/flake8
38-
rev: 3.9.0
38+
rev: 3.9.1
3939
hooks:
4040
- id: flake8
4141
additional_dependencies:
@@ -75,7 +75,7 @@ repos:
7575
hooks:
7676
- id: yesqa
7777
additional_dependencies:
78-
- flake8==3.9.0
78+
- flake8==3.9.1
7979
- flake8-comprehensions==3.1.0
8080
- flake8-bugbear==21.3.2
8181
- pandas-dev-flaker==0.2.0

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ dependencies:
2020
# code checks
2121
- black=20.8b1
2222
- cpplint
23-
- flake8=3.9.0
23+
- flake8=3.9.1
2424
- flake8-bugbear=21.3.2 # used by flake8, find likely bugs
2525
- flake8-comprehensions=3.1.0 # used by flake8, linting of unnecessary comprehensions
2626
- isort>=5.2.1 # check that imports are in the right order

pandas/_libs/internals.pyi

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ from pandas._typing import (
1111
T,
1212
)
1313

14+
from pandas import Index
15+
from pandas.core.internals.blocks import Block as B
16+
1417
def slice_len(slc: slice, objlen: int = ...) -> int: ...
1518

1619

@@ -66,3 +69,15 @@ class NumpyBlock(SharedBlock):
6669

6770
class Block(SharedBlock):
6871
...
72+
73+
class BlockManager:
74+
blocks: tuple[B, ...]
75+
axes: list[Index]
76+
_known_consolidated: bool
77+
_is_consolidated: bool
78+
_blknos: np.ndarray
79+
_blklocs: np.ndarray
80+
81+
def __init__(self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=True): ...
82+
83+
def get_slice(self: T, slobj: slice, axis: int=...) -> T: ...

pandas/_libs/internals.pyx

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ cdef class NumpyBlock(SharedBlock):
515515
self.values = values
516516

517517
# @final # not useful in cython, but we _would_ annotate with @final
518-
def getitem_block_index(self, slicer: slice) -> NumpyBlock:
518+
cpdef NumpyBlock getitem_block_index(self, slice slicer):
519519
"""
520520
Perform __getitem__-like specialized to slicing along index.
521521
@@ -533,3 +533,107 @@ cdef class Block(SharedBlock):
533533
# set values here the (implicit) call to SharedBlock.__cinit__ will
534534
# set placement and ndim
535535
self.values = values
536+
537+
538+
@cython.freelist(64)
539+
cdef class BlockManager:
540+
cdef:
541+
public tuple blocks
542+
public list axes
543+
public bint _known_consolidated, _is_consolidated
544+
public ndarray _blknos, _blklocs
545+
546+
def __cinit__(self, blocks, axes, verify_integrity=True):
547+
if isinstance(blocks, list):
548+
# Backward compat for e.g. pyarrow
549+
blocks = tuple(blocks)
550+
551+
self.blocks = blocks
552+
self.axes = axes.copy() # copy to make sure we are not remotely-mutable
553+
554+
# Populate known_consolidate, blknos, and blklocs lazily
555+
self._known_consolidated = False
556+
self._is_consolidated = False
557+
# error: Incompatible types in assignment (expression has type "None",
558+
# variable has type "ndarray")
559+
self._blknos = None # type: ignore[assignment]
560+
# error: Incompatible types in assignment (expression has type "None",
561+
# variable has type "ndarray")
562+
self._blklocs = None # type: ignore[assignment]
563+
564+
# -------------------------------------------------------------------
565+
# Pickle
566+
567+
cpdef __reduce__(self):
568+
if len(self.axes) == 1:
569+
# SingleBlockManager, __init__ expects Block, axis
570+
args = (self.blocks[0], self.axes[0])
571+
else:
572+
args = (self.blocks, self.axes)
573+
return type(self), args
574+
575+
cpdef __setstate__(self, state):
576+
from pandas.core.construction import extract_array
577+
from pandas.core.internals.blocks import (
578+
ensure_block_shape,
579+
new_block,
580+
)
581+
from pandas.core.internals.managers import ensure_index
582+
583+
if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
584+
state = state[3]["0.14.1"]
585+
axes = [ensure_index(ax) for ax in state["axes"]]
586+
ndim = len(axes)
587+
588+
for blk in state["blocks"]:
589+
vals = blk["values"]
590+
# older versions may hold e.g. DatetimeIndex instead of DTA
591+
vals = extract_array(vals, extract_numpy=True)
592+
blk["values"] = ensure_block_shape(vals, ndim=ndim)
593+
594+
nbs = [
595+
new_block(blk["values"], blk["mgr_locs"], ndim=ndim)
596+
for blk in state["blocks"]
597+
]
598+
blocks = tuple(nbs)
599+
self.blocks = blocks
600+
self.axes = axes
601+
602+
else:
603+
raise NotImplementedError("pre-0.14.1 pickles are no longer supported")
604+
605+
self._post_setstate()
606+
607+
def _post_setstate(self) -> None:
608+
self._is_consolidated = False
609+
self._known_consolidated = False
610+
self._rebuild_blknos_and_blklocs()
611+
612+
# -------------------------------------------------------------------
613+
# Indexing
614+
615+
cdef BlockManager _get_index_slice(self, slobj):
616+
cdef:
617+
SharedBlock blk, nb
618+
619+
nbs = []
620+
for blk in self.blocks:
621+
nb = blk.getitem_block_index(slobj)
622+
nbs.append(nb)
623+
624+
new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
625+
return type(self)(tuple(nbs), new_axes, verify_integrity=False)
626+
627+
def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
628+
629+
if axis == 0:
630+
new_blocks = self._slice_take_blocks_ax0(slobj)
631+
elif axis == 1:
632+
return self._get_index_slice(slobj)
633+
else:
634+
raise IndexError("Requested axis not found in manager")
635+
636+
new_axes = list(self.axes)
637+
new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
638+
639+
return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)

pandas/_libs/reduction.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,6 @@ cdef class BlockSlider:
489489
Ensure that we have the original blocks, blknos, and blklocs.
490490
"""
491491
mgr = self.dummy._mgr
492-
mgr.blocks = self.blocks
492+
mgr.blocks = tuple(self.blocks)
493493
mgr._blklocs = self.orig_blklocs
494494
mgr._blknos = self.orig_blknos

pandas/compat/pickle_compat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
PeriodArray,
2222
TimedeltaArray,
2323
)
24+
from pandas.core.internals import BlockManager
2425

2526
if TYPE_CHECKING:
2627
from pandas import (
@@ -222,7 +223,8 @@ def load_newobj(self):
222223
elif issubclass(cls, TimedeltaArray) and not args:
223224
arr = np.array([], dtype="m8[ns]")
224225
obj = cls.__new__(cls, arr, arr.dtype)
225-
226+
elif cls is BlockManager and not args:
227+
obj = cls.__new__(cls, (), [], False)
226228
else:
227229
obj = cls.__new__(cls, *args)
228230

pandas/core/algorithms.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,29 +1876,33 @@ def _sort_tuples(values: np.ndarray) -> np.ndarray:
18761876
return values[indexer]
18771877

18781878

1879-
def union_with_duplicates(lvals: np.ndarray, rvals: np.ndarray) -> np.ndarray:
1879+
def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike:
18801880
"""
18811881
Extracts the union from lvals and rvals with respect to duplicates and nans in
18821882
both arrays.
18831883
18841884
Parameters
18851885
----------
1886-
lvals: np.ndarray
1886+
lvals: np.ndarray or ExtensionArray
18871887
left values which is ordered in front.
1888-
rvals: np.ndarray
1888+
rvals: np.ndarray or ExtensionArray
18891889
right values ordered after lvals.
18901890
18911891
Returns
18921892
-------
1893-
np.ndarray containing the unsorted union of both arrays
1893+
np.ndarray or ExtensionArray
1894+
Containing the unsorted union of both arrays.
18941895
"""
18951896
indexer = []
18961897
l_count = value_counts(lvals, dropna=False)
18971898
r_count = value_counts(rvals, dropna=False)
18981899
l_count, r_count = l_count.align(r_count, fill_value=0)
18991900
unique_array = unique(np.append(lvals, rvals))
1900-
if is_extension_array_dtype(lvals) or is_extension_array_dtype(rvals):
1901-
unique_array = pd_array(unique_array)
1901+
if not isinstance(lvals, np.ndarray):
1902+
# i.e. ExtensionArray
1903+
# Note: we only get here with lvals.dtype == rvals.dtype
1904+
# TODO: are there any cases where union won't be type/dtype preserving?
1905+
unique_array = type(lvals)._from_sequence(unique_array, dtype=lvals.dtype)
19021906
for i, value in enumerate(unique_array):
19031907
indexer += [i] * int(max(l_count[value], r_count[value]))
19041908
return unique_array.take(indexer)

pandas/core/arrays/string_arrow.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -675,13 +675,18 @@ def value_counts(self, dropna: bool = True) -> Series:
675675

676676
vc = self._data.value_counts()
677677

678-
# Index cannot hold ExtensionArrays yet
679-
index = Index(type(self)(vc.field(0)).astype(object))
678+
values = vc.field(0)
679+
counts = vc.field(1)
680+
if dropna and self._data.null_count > 0:
681+
mask = values.is_valid()
682+
values = values.filter(mask)
683+
counts = counts.filter(mask)
684+
680685
# No missing values so we can adhere to the interface and return a numpy array.
681-
counts = np.array(vc.field(1))
686+
counts = np.array(counts)
682687

683-
if dropna and self._data.null_count > 0:
684-
raise NotImplementedError("yo")
688+
# Index cannot hold ExtensionArrays yet
689+
index = Index(type(self)(values)).astype(object)
685690

686691
return Series(counts, index=index).astype("Int64")
687692

pandas/core/computation/engines.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
align_terms,
1313
reconstruct_object,
1414
)
15+
from pandas.core.computation.expr import Expr
1516
from pandas.core.computation.ops import (
1617
MATHOPS,
1718
REDUCTIONS,
@@ -26,13 +27,13 @@ class NumExprClobberingError(NameError):
2627
pass
2728

2829

29-
def _check_ne_builtin_clash(expr):
30+
def _check_ne_builtin_clash(expr: Expr) -> None:
3031
"""
3132
Attempt to prevent foot-shooting in a helpful way.
3233
3334
Parameters
3435
----------
35-
terms : Term
36+
expr : Expr
3637
Terms can contain
3738
"""
3839
names = expr.names

pandas/core/computation/eval.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
"""
22
Top level ``eval`` module.
33
"""
4+
from __future__ import annotations
45

56
import tokenize
6-
from typing import Optional
77
import warnings
88

99
from pandas._libs.lib import no_default
@@ -14,13 +14,14 @@
1414
PARSERS,
1515
Expr,
1616
)
17+
from pandas.core.computation.ops import BinOp
1718
from pandas.core.computation.parsing import tokenize_string
1819
from pandas.core.computation.scope import ensure_scope
1920

2021
from pandas.io.formats.printing import pprint_thing
2122

2223

23-
def _check_engine(engine: Optional[str]) -> str:
24+
def _check_engine(engine: str | None) -> str:
2425
"""
2526
Make sure a valid engine is passed.
2627
@@ -161,9 +162,9 @@ def _check_for_locals(expr: str, stack_level: int, parser: str):
161162

162163

163164
def eval(
164-
expr,
165-
parser="pandas",
166-
engine: Optional[str] = None,
165+
expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users
166+
parser: str = "pandas",
167+
engine: str | None = None,
167168
truediv=no_default,
168169
local_dict=None,
169170
global_dict=None,
@@ -309,10 +310,12 @@ def eval(
309310
stacklevel=2,
310311
)
311312

313+
exprs: list[str | BinOp]
312314
if isinstance(expr, str):
313315
_check_expression(expr)
314316
exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
315317
else:
318+
# ops.BinOp; for internal compat, not intended to be passed by users
316319
exprs = [expr]
317320
multi_line = len(exprs) > 1
318321

pandas/core/computation/pytables.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ class PyTablesExpr(expr.Expr):
546546

547547
_visitor: PyTablesExprVisitor | None
548548
env: PyTablesScope
549+
expr: str
549550

550551
def __init__(
551552
self,
@@ -570,7 +571,7 @@ def __init__(
570571
local_dict = where.env.scope
571572
_where = where.expr
572573

573-
elif isinstance(where, (list, tuple)):
574+
elif is_list_like(where):
574575
where = list(where)
575576
for idx, w in enumerate(where):
576577
if isinstance(w, PyTablesExpr):
@@ -580,6 +581,7 @@ def __init__(
580581
where[idx] = w
581582
_where = " & ".join(f"({w})" for w in com.flatten(where))
582583
else:
584+
# _validate_where ensures we otherwise have a string
583585
_where = where
584586

585587
self.expr = _where

0 commit comments

Comments
 (0)