From c58f17633bd7d80cf128787917fdddb4993f77d8 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 29 Oct 2022 12:37:47 -0700 Subject: [PATCH 1/2] DEPR: internals --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/internals/__init__.py | 19 ---------- pandas/core/internals/blocks.py | 47 ++---------------------- pandas/core/internals/managers.py | 34 ++--------------- pandas/tests/internals/test_internals.py | 14 ------- pandas/tests/io/__init__.py | 3 -- pandas/tests/io/test_common.py | 3 -- pandas/tests/io/test_feather.py | 1 - pandas/tests/io/test_orc.py | 4 -- pandas/tests/io/test_parquet.py | 6 --- pandas/tests/test_downstream.py | 1 - 11 files changed, 8 insertions(+), 125 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1f245b585df48..4aa7cfb3a39a6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -156,6 +156,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed deprecated :class:`CategoricalBlock`, :meth:`Block.is_categorical`, require datetime64 and timedelta64 values to be wrapped in :class:`DatetimeArray` or :class:`TimedeltaArray` before passing to :meth:`Block.make_block_same_class`, require ``DatetimeTZBlock.values`` to have the correct ndim when passing to the :class:`BlockManager` constructor, and removed the "fastpath" keyword from the :class:`SingleBlockManager` constructor (:issue:`40226`, :issue:`40571`) - Removed deprecated :meth:`Categorical.to_dense`, use ``np.asarray(cat)`` instead (:issue:`32639`) - Removed deprecated :meth:`Categorical.take_nd` (:issue:`27745`) - Removed deprecated :meth:`Categorical.mode`, use ``Series(cat).mode()`` instead (:issue:`45033`) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index ea69b567611e4..0797e62de7a9f 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -38,22 +38,3 @@ # this is preserved here for downstream compatibility (GH-33892) "create_block_manager_from_blocks", ] - - -def __getattr__(name: str): - import warnings - - from pandas.util._exceptions import find_stack_level - - if name == "CategoricalBlock": - warnings.warn( - "CategoricalBlock is deprecated and will be removed in a future version. " - "Use ExtensionBlock instead.", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - from pandas.core.internals.blocks import CategoricalBlock - - return CategoricalBlock - - raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f944c74ac37fd..e9541a878299c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -59,7 +59,6 @@ is_string_dtype, ) from pandas.core.dtypes.dtypes import ( - CategoricalDtype, ExtensionDtype, PandasDtype, PeriodDtype, @@ -175,18 +174,6 @@ def _can_hold_na(self) -> bool: return dtype.kind not in ["b", "i", "u"] return dtype._can_hold_na - @final - @cache_readonly - def is_categorical(self) -> bool: - warnings.warn( - "Block.is_categorical is deprecated and will be removed in a " - "future version. Use isinstance(block.values, Categorical) " - "instead. See https://github.com/pandas-dev/pandas/issues/40226", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - return isinstance(self.values, Categorical) - @final @property def is_bool(self) -> bool: @@ -240,24 +227,11 @@ def make_block_same_class( self, values, placement: BlockPlacement | None = None ) -> Block: """Wrap given values in a block of same type as self.""" + # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet + # relied on it, as of 2.0 the caller is responsible for this. if placement is None: placement = self._mgr_locs - if values.dtype.kind in ["m", "M"]: - - new_values = ensure_wrapped_if_datetimelike(values) - if new_values is not values: - # TODO(2.0): remove once fastparquet has stopped relying on it - warnings.warn( - "In a future version, Block.make_block_same_class will " - "assume that datetime64 and timedelta64 ndarrays have " - "already been cast to DatetimeArray and TimedeltaArray, " - "respectively.", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - values = new_values - # We assume maybe_coerce_values has already been called return type(self)(values, placement=placement, ndim=self.ndim) @@ -1647,7 +1621,7 @@ class ExtensionBlock(libinternals.Block, EABackedBlock): Notes ----- This holds all 3rd-party extension array types. It's also the immediate - parent class for our internal extension types' blocks, CategoricalBlock. + parent class for our internal extension types' blocks. ExtensionArrays are limited to 1-D. """ @@ -2064,17 +2038,6 @@ def convert( return [self.make_block(res_values)] -class CategoricalBlock(ExtensionBlock): - # this Block type is kept for backwards-compatibility - __slots__ = () - - # GH#43232, GH#43334 self.values.dtype can be changed inplace until 2.0, - # so this cannot be cached - @property - def dtype(self) -> DtypeObj: - return self.values.dtype - - # ----------------------------------------------------------------- # Constructor Helpers @@ -2130,8 +2093,6 @@ def get_block_type(dtype: DtypeObj): if isinstance(dtype, SparseDtype): # Need this first(ish) so that Sparse[datetime] is sparse cls = ExtensionBlock - elif isinstance(dtype, CategoricalDtype): - cls = CategoricalBlock elif vtype is Timestamp: cls = DatetimeTZBlock elif isinstance(dtype, PeriodDtype): @@ -2372,7 +2333,7 @@ def external_values(values: ArrayLike) -> ArrayLike: elif isinstance(values, (DatetimeArray, TimedeltaArray)): # NB: for datetime64tz this is different from np.asarray(values), since # that returns an object-dtype ndarray of Timestamps. - # Avoid FutureWarning in .astype in casting from dt64tz to dt64 + # Avoid raising in .astype in casting from dt64tz to dt64 return values._data else: return values diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f21c02a7823ae..f515dbeeb90c6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -74,7 +74,6 @@ ) from pandas.core.internals.blocks import ( Block, - DatetimeTZBlock, NumpyBlock, ensure_block_shape, extend_blocks, @@ -1008,27 +1007,9 @@ def __init__( f"Number of Block dimensions ({block.ndim}) must equal " f"number of axes ({self.ndim})" ) - if isinstance(block, DatetimeTZBlock) and block.values.ndim == 1: - # TODO(2.0): remove once fastparquet no longer needs this - warnings.warn( - "In a future version, the BlockManager constructor " - "will assume that a DatetimeTZBlock with block.ndim==2 " - "has block.values.ndim == 2.", - DeprecationWarning, - stacklevel=find_stack_level(), - ) - - # error: Incompatible types in assignment (expression has type - # "Union[ExtensionArray, ndarray]", variable has type - # "DatetimeArray") - block.values = ensure_block_shape( # type: ignore[assignment] - block.values, self.ndim - ) - try: - block._cache.clear() - except AttributeError: - # _cache not initialized - pass + # As of 2.0, the caller is responsible for ensuring that + # DatetimeTZBlock with block.ndim == 2 has block.values.ndim ==2; + # previously there was a special check for fastparquet compat. self._verify_integrity() @@ -1876,20 +1857,11 @@ def __init__( axis: Index, refs: list[weakref.ref | None] | None = None, verify_integrity: bool = False, - fastpath=lib.no_default, ) -> None: # Assertions disabled for performance # assert isinstance(block, Block), type(block) # assert isinstance(axis, Index), type(axis) - if fastpath is not lib.no_default: - warnings.warn( - "The `fastpath` keyword is deprecated and will be removed " - "in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - self.axes = [axis] self.blocks = (block,) self.refs = refs diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b64220d90f9a2..ecf247efd74bf 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -356,12 +356,6 @@ def test_split(self): for res, exp in zip(result, expected): assert_block_equal(res, exp) - def test_is_categorical_deprecated(self, fblock): - # GH#40571 - blk = fblock - with tm.assert_produces_warning(DeprecationWarning): - blk.is_categorical - class TestBlockManager: def test_attrs(self): @@ -1432,11 +1426,3 @@ def test_make_block_no_pandas_array(block_maker): ) assert result.dtype.kind in ["i", "u"] assert result.is_extension is False - - -def test_single_block_manager_fastpath_deprecated(): - # GH#33092 - ser = Series(range(3)) - blk = ser._data.blocks[0] - with tm.assert_produces_warning(FutureWarning): - SingleBlockManager(blk, ser.index, fastpath=True) diff --git a/pandas/tests/io/__init__.py b/pandas/tests/io/__init__.py index c99d03afc8320..15294fd0cabbc 100644 --- a/pandas/tests/io/__init__.py +++ b/pandas/tests/io/__init__.py @@ -5,9 +5,6 @@ pytest.mark.filterwarnings( "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning" ), - pytest.mark.filterwarnings( - "ignore:Block.is_categorical is deprecated:DeprecationWarning" - ), pytest.mark.filterwarnings( r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning" ), diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ec48357e0395d..145682b484100 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -317,9 +317,6 @@ def test_read_expands_user_home_dir( ), ], ) - @pytest.mark.filterwarnings( - "ignore:CategoricalBlock is deprecated:DeprecationWarning" - ) @pytest.mark.filterwarnings( # pytables np.object usage "ignore:`np.object` is a deprecated alias:DeprecationWarning" ) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 236a7f9e1a9c1..eaeb769a94c38 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -15,7 +15,6 @@ @filter_sparse @pytest.mark.single_cpu -@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestFeather: def check_error_on_write(self, df, exc, err_msg): # check that we are raising the exception diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index 0bb320907b813..a0acf160854ac 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -14,10 +14,6 @@ pytest.importorskip("pyarrow.orc") -pytestmark = pytest.mark.filterwarnings( - "ignore:RangeIndex.* is deprecated:DeprecationWarning" -) - @pytest.fixture def dirpath(datapath): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9c85ab4ba4a57..75683a1d96bfb 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -53,11 +53,6 @@ _HAVE_FASTPARQUET = False -pytestmark = pytest.mark.filterwarnings( - "ignore:RangeIndex.* is deprecated:DeprecationWarning" -) - - # TODO(ArrayManager) fastparquet relies on BlockManager internals # setup engines & skips @@ -688,7 +683,6 @@ def test_read_empty_array(self, pa, dtype): ) -@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 119ffd8cfd5a1..cea9484fbbf80 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -234,7 +234,6 @@ def test_geopandas(): # Cython import warning @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") -@pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning") def test_pyarrow(df): pyarrow = import_module("pyarrow") From 22cad4aba025a43e76cda520d6988608112266b3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Oct 2022 11:28:57 -0700 Subject: [PATCH 2/2] bump fastparquet minimum --- ci/deps/actions-38-minimum_versions.yaml | 2 +- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/compat/_optional.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index 5540ba01a8f36..c2f40dfbfb250 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -25,7 +25,7 @@ dependencies: - blosc=1.21.0 - bottleneck=1.3.2 - brotlipy=0.7.0 - - fastparquet=0.4.0 + - fastparquet=0.6.3 - fsspec=2021.07.0 - html5lib=1.1 - hypothesis=6.13.0 diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9e8d381a01888..d7325f6014c86 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -124,6 +124,8 @@ Optional libraries below the lowest tested version may still work, but are not c +=================+=================+=========+ | pyarrow | 6.0.0 | X | +-----------------+-----------------+---------+ +| fastparquet | 0.6.3 | X | ++-----------------+-----------------+---------+ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 856fb5e4cb66b..1bfef131aac1d 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -16,7 +16,7 @@ "blosc": "1.21.0", "bottleneck": "1.3.2", "brotli": "0.7.0", - "fastparquet": "0.4.0", + "fastparquet": "0.6.3", "fsspec": "2021.07.0", "html5lib": "1.1", "hypothesis": "6.13.0",