jbrockmendel
diff --git a/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎environment.yml
Lines changed: 1 addition & 1 deletion b/‎environment.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/internals.pyi
Lines changed: 15 additions & 0 deletions b/‎pandas/_libs/internals.pyi
Lines changed: 15 additions & 0 deletions
diff --git a/‎pandas/_libs/internals.pyx
Lines changed: 105 additions & 1 deletion b/‎pandas/_libs/internals.pyx
Lines changed: 105 additions & 1 deletion
diff --git a/‎pandas/_libs/reduction.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/reduction.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/compat/pickle_compat.py
Lines changed: 3 additions & 1 deletion b/‎pandas/compat/pickle_compat.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/core/algorithms.py
Lines changed: 10 additions & 6 deletions b/‎pandas/core/algorithms.py
Lines changed: 10 additions & 6 deletions
diff --git a/‎pandas/core/arrays/string_arrow.py
Lines changed: 10 additions & 5 deletions b/‎pandas/core/arrays/string_arrow.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎pandas/core/computation/engines.py
Lines changed: 3 additions & 2 deletions b/‎pandas/core/computation/engines.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/core/computation/eval.py
Lines changed: 8 additions & 5 deletions b/‎pandas/core/computation/eval.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎pandas/core/computation/pytables.py
Lines changed: 3 additions & 1 deletion b/‎pandas/core/computation/pytables.py
Lines changed: 3 additions & 1 deletion
@@ -35,7 +35,7 @@ repos:
         exclude: ^pandas/_libs/src/(klib|headers)/
         args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.0
+    rev: 3.9.1
     hooks:
     -   id: flake8
         additional_dependencies:
@@ -75,7 +75,7 @@ repos:
     hooks:
     -   id: yesqa
         additional_dependencies:
-            - flake8==3.9.0
+            - flake8==3.9.1
             - flake8-comprehensions==3.1.0
             - flake8-bugbear==21.3.2
             - pandas-dev-flaker==0.2.0
 
@@ -20,7 +20,7 @@ dependencies:
   # code checks
   - black=20.8b1
   - cpplint
-  - flake8=3.9.0
+  - flake8=3.9.1
   - flake8-bugbear=21.3.2  # used by flake8, find likely bugs
   - flake8-comprehensions=3.1.0  # used by flake8, linting of unnecessary comprehensions
   - isort>=5.2.1  # check that imports are in the right order
 
@@ -11,6 +11,9 @@ from pandas._typing import (
     T,
 )
 
+from pandas import Index
+from pandas.core.internals.blocks import Block as B
+
 def slice_len(slc: slice, objlen: int = ...) -> int: ...
 
 
@@ -66,3 +69,15 @@ class NumpyBlock(SharedBlock):
 
 class Block(SharedBlock):
     ...
+
+class BlockManager:
+    blocks: tuple[B, ...]
+    axes: list[Index]
+    _known_consolidated: bool
+    _is_consolidated: bool
+    _blknos: np.ndarray
+    _blklocs: np.ndarray
+
+    def __init__(self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=True): ...
+
+    def get_slice(self: T, slobj: slice, axis: int=...) -> T: ...
@@ -515,7 +515,7 @@ cdef class NumpyBlock(SharedBlock):
         self.values = values
 
     # @final  # not useful in cython, but we _would_ annotate with @final
-    def getitem_block_index(self, slicer: slice) -> NumpyBlock:
+    cpdef NumpyBlock getitem_block_index(self, slice slicer):
         """
         Perform __getitem__-like specialized to slicing along index.
 
@@ -533,3 +533,107 @@ cdef class Block(SharedBlock):
         # set values here the (implicit) call to SharedBlock.__cinit__ will
         #  set placement and ndim
         self.values = values
+
+
+@cython.freelist(64)
+cdef class BlockManager:
+    cdef:
+        public tuple blocks
+        public list axes
+        public bint _known_consolidated, _is_consolidated
+        public ndarray _blknos, _blklocs
+
+    def __cinit__(self, blocks, axes, verify_integrity=True):
+        if isinstance(blocks, list):
+            # Backward compat for e.g. pyarrow
+            blocks = tuple(blocks)
+
+        self.blocks = blocks
+        self.axes = axes.copy()  # copy to make sure we are not remotely-mutable
+
+        # Populate known_consolidate, blknos, and blklocs lazily
+        self._known_consolidated = False
+        self._is_consolidated = False
+        # error: Incompatible types in assignment (expression has type "None",
+        # variable has type "ndarray")
+        self._blknos = None  # type: ignore[assignment]
+        # error: Incompatible types in assignment (expression has type "None",
+        # variable has type "ndarray")
+        self._blklocs = None  # type: ignore[assignment]
+
+    # -------------------------------------------------------------------
+    # Pickle
+
+    cpdef __reduce__(self):
+        if len(self.axes) == 1:
+            # SingleBlockManager, __init__ expects Block, axis
+            args = (self.blocks[0], self.axes[0])
+        else:
+            args = (self.blocks, self.axes)
+        return type(self), args
+
+    cpdef __setstate__(self, state):
+        from pandas.core.construction import extract_array
+        from pandas.core.internals.blocks import (
+            ensure_block_shape,
+            new_block,
+        )
+        from pandas.core.internals.managers import ensure_index
+
+        if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
+            state = state[3]["0.14.1"]
+            axes = [ensure_index(ax) for ax in state["axes"]]
+            ndim = len(axes)
+
+            for blk in state["blocks"]:
+                vals = blk["values"]
+                # older versions may hold e.g. DatetimeIndex instead of DTA
+                vals = extract_array(vals, extract_numpy=True)
+                blk["values"] = ensure_block_shape(vals, ndim=ndim)
+
+            nbs = [
+                new_block(blk["values"], blk["mgr_locs"], ndim=ndim)
+                for blk in state["blocks"]
+            ]
+            blocks = tuple(nbs)
+            self.blocks = blocks
+            self.axes = axes
+
+        else:
+            raise NotImplementedError("pre-0.14.1 pickles are no longer supported")
+
+        self._post_setstate()
+
+    def _post_setstate(self) -> None:
+        self._is_consolidated = False
+        self._known_consolidated = False
+        self._rebuild_blknos_and_blklocs()
+
+    # -------------------------------------------------------------------
+    # Indexing
+
+    cdef BlockManager _get_index_slice(self, slobj):
+        cdef:
+            SharedBlock blk, nb
+
+        nbs = []
+        for blk in self.blocks:
+            nb = blk.getitem_block_index(slobj)
+            nbs.append(nb)
+
+        new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
+        return type(self)(tuple(nbs), new_axes, verify_integrity=False)
+
+    def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
+
+        if axis == 0:
+            new_blocks = self._slice_take_blocks_ax0(slobj)
+        elif axis == 1:
+            return self._get_index_slice(slobj)
+        else:
+            raise IndexError("Requested axis not found in manager")
+
+        new_axes = list(self.axes)
+        new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
+
+        return type(self)(tuple(new_blocks), new_axes, verify_integrity=False)
@@ -489,6 +489,6 @@ cdef class BlockSlider:
         Ensure that we have the original blocks, blknos, and blklocs.
         """
         mgr = self.dummy._mgr
-        mgr.blocks = self.blocks
+        mgr.blocks = tuple(self.blocks)
         mgr._blklocs = self.orig_blklocs
         mgr._blknos = self.orig_blknos
@@ -21,6 +21,7 @@
     PeriodArray,
     TimedeltaArray,
 )
+from pandas.core.internals import BlockManager
 
 if TYPE_CHECKING:
     from pandas import (
@@ -222,7 +223,8 @@ def load_newobj(self):
     elif issubclass(cls, TimedeltaArray) and not args:
         arr = np.array([], dtype="m8[ns]")
         obj = cls.__new__(cls, arr, arr.dtype)
-
+    elif cls is BlockManager and not args:
+        obj = cls.__new__(cls, (), [], False)
     else:
         obj = cls.__new__(cls, *args)
 
 
@@ -1876,29 +1876,33 @@ def _sort_tuples(values: np.ndarray) -> np.ndarray:
     return values[indexer]
 
 
-def union_with_duplicates(lvals: np.ndarray, rvals: np.ndarray) -> np.ndarray:
+def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike:
     """
     Extracts the union from lvals and rvals with respect to duplicates and nans in
     both arrays.
 
     Parameters
     ----------
-    lvals: np.ndarray
+    lvals: np.ndarray or ExtensionArray
         left values which is ordered in front.
-    rvals: np.ndarray
+    rvals: np.ndarray or ExtensionArray
         right values ordered after lvals.
 
     Returns
     -------
-    np.ndarray containing the unsorted union of both arrays
+    np.ndarray or ExtensionArray
+        Containing the unsorted union of both arrays.
     """
     indexer = []
     l_count = value_counts(lvals, dropna=False)
     r_count = value_counts(rvals, dropna=False)
     l_count, r_count = l_count.align(r_count, fill_value=0)
     unique_array = unique(np.append(lvals, rvals))
-    if is_extension_array_dtype(lvals) or is_extension_array_dtype(rvals):
-        unique_array = pd_array(unique_array)
+    if not isinstance(lvals, np.ndarray):
+        # i.e. ExtensionArray
+        # Note: we only get here with lvals.dtype == rvals.dtype
+        # TODO: are there any cases where union won't be type/dtype preserving?
+        unique_array = type(lvals)._from_sequence(unique_array, dtype=lvals.dtype)
     for i, value in enumerate(unique_array):
         indexer += [i] * int(max(l_count[value], r_count[value]))
     return unique_array.take(indexer)
@@ -675,13 +675,18 @@ def value_counts(self, dropna: bool = True) -> Series:
 
         vc = self._data.value_counts()
 
-        # Index cannot hold ExtensionArrays yet
-        index = Index(type(self)(vc.field(0)).astype(object))
+        values = vc.field(0)
+        counts = vc.field(1)
+        if dropna and self._data.null_count > 0:
+            mask = values.is_valid()
+            values = values.filter(mask)
+            counts = counts.filter(mask)
+
         # No missing values so we can adhere to the interface and return a numpy array.
-        counts = np.array(vc.field(1))
+        counts = np.array(counts)
 
-        if dropna and self._data.null_count > 0:
-            raise NotImplementedError("yo")
+        # Index cannot hold ExtensionArrays yet
+        index = Index(type(self)(values)).astype(object)
 
         return Series(counts, index=index).astype("Int64")
 
 
@@ -12,6 +12,7 @@
     align_terms,
     reconstruct_object,
 )
+from pandas.core.computation.expr import Expr
 from pandas.core.computation.ops import (
     MATHOPS,
     REDUCTIONS,
@@ -26,13 +27,13 @@ class NumExprClobberingError(NameError):
     pass
 
 
-def _check_ne_builtin_clash(expr):
+def _check_ne_builtin_clash(expr: Expr) -> None:
     """
     Attempt to prevent foot-shooting in a helpful way.
 
     Parameters
     ----------
-    terms : Term
+    expr : Expr
         Terms can contain
     """
     names = expr.names
 
@@ -1,9 +1,9 @@
 """
 Top level ``eval`` module.
 """
+from __future__ import annotations
 
 import tokenize
-from typing import Optional
 import warnings
 
 from pandas._libs.lib import no_default
@@ -14,13 +14,14 @@
     PARSERS,
     Expr,
 )
+from pandas.core.computation.ops import BinOp
 from pandas.core.computation.parsing import tokenize_string
 from pandas.core.computation.scope import ensure_scope
 
 from pandas.io.formats.printing import pprint_thing
 
 
-def _check_engine(engine: Optional[str]) -> str:
+def _check_engine(engine: str | None) -> str:
     """
     Make sure a valid engine is passed.
 
@@ -161,9 +162,9 @@ def _check_for_locals(expr: str, stack_level: int, parser: str):
 
 
 def eval(
-    expr,
-    parser="pandas",
-    engine: Optional[str] = None,
+    expr: str | BinOp,  # we leave BinOp out of the docstr bc it isn't for users
+    parser: str = "pandas",
+    engine: str | None = None,
     truediv=no_default,
     local_dict=None,
     global_dict=None,
@@ -309,10 +310,12 @@ def eval(
             stacklevel=2,
         )
 
+    exprs: list[str | BinOp]
     if isinstance(expr, str):
         _check_expression(expr)
         exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
     else:
+        # ops.BinOp; for internal compat, not intended to be passed by users
         exprs = [expr]
     multi_line = len(exprs) > 1
 
 
@@ -546,6 +546,7 @@ class PyTablesExpr(expr.Expr):
 
     _visitor: PyTablesExprVisitor | None
     env: PyTablesScope
+    expr: str
 
     def __init__(
         self,
@@ -570,7 +571,7 @@ def __init__(
             local_dict = where.env.scope
             _where = where.expr
 
-        elif isinstance(where, (list, tuple)):
+        elif is_list_like(where):
             where = list(where)
             for idx, w in enumerate(where):
                 if isinstance(w, PyTablesExpr):
@@ -580,6 +581,7 @@ def __init__(
                     where[idx] = w
             _where = " & ".join(f"({w})" for w in com.flatten(where))
         else:
+            # _validate_where ensures we otherwise have a string
             _where = where
 
         self.expr = _where