Skip to content

Commit b2a622e

Browse files
authored
DEPR: accepting Manager objects in DataFrame/Series (pandas-dev#52419)
1 parent 3ccdc5b commit b2a622e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+437
-95
lines changed

doc/source/user_guide/10min.rst

+2
Original file line numberDiff line numberDiff line change
@@ -763,12 +763,14 @@ Parquet
763763
Writing to a Parquet file:
764764

765765
.. ipython:: python
766+
:okwarning:
766767
767768
df.to_parquet("foo.parquet")
768769
769770
Reading from a Parquet file Store using :func:`read_parquet`:
770771

771772
.. ipython:: python
773+
:okwarning:
772774
773775
pd.read_parquet("foo.parquet")
774776

doc/source/user_guide/io.rst

+2
Original file line numberDiff line numberDiff line change
@@ -2247,6 +2247,7 @@ For line-delimited json files, pandas can also return an iterator which reads in
22472247
Line-limited json can also be read using the pyarrow reader by specifying ``engine="pyarrow"``.
22482248

22492249
.. ipython:: python
2250+
:okwarning:
22502251
22512252
from io import BytesIO
22522253
df = pd.read_json(BytesIO(jsonl.encode()), lines=True, engine="pyarrow")
@@ -5554,6 +5555,7 @@ Read from an orc file.
55545555
Read only certain columns of an orc file.
55555556

55565557
.. ipython:: python
5558+
:okwarning:
55575559
55585560
result = pd.read_orc(
55595561
"example_pa.orc",

doc/source/user_guide/pyarrow.rst

+3
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ To convert a :external+pyarrow:py:class:`pyarrow.Table` to a :class:`DataFrame`,
104104
:external+pyarrow:py:meth:`pyarrow.Table.to_pandas` method with ``types_mapper=pd.ArrowDtype``.
105105

106106
.. ipython:: python
107+
:okwarning:
107108
108109
table = pa.table([pa.array([1, 2, 3], type=pa.int64())], names=["a"])
109110
@@ -164,6 +165,7 @@ functions provide an ``engine`` keyword that can dispatch to PyArrow to accelera
164165
* :func:`read_feather`
165166

166167
.. ipython:: python
168+
:okwarning:
167169
168170
import io
169171
data = io.StringIO("""a,b,c
@@ -178,6 +180,7 @@ PyArrow-backed data by specifying the parameter ``dtype_backend="pyarrow"``. A r
178180
``engine="pyarrow"`` to necessarily return PyArrow-backed data.
179181

180182
.. ipython:: python
183+
:okwarning:
181184
182185
import io
183186
data = io.StringIO("""a,b,c,d,e,f,g,h,i

doc/source/user_guide/scale.rst

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ To load the columns we want, we have two options.
5151
Option 1 loads in all the data and then filters to what we need.
5252

5353
.. ipython:: python
54+
:okwarning:
5455
5556
columns = ["id_0", "name_0", "x_0", "y_0"]
5657
@@ -59,6 +60,7 @@ Option 1 loads in all the data and then filters to what we need.
5960
Option 2 only loads the columns we request.
6061

6162
.. ipython:: python
63+
:okwarning:
6264
6365
pd.read_parquet("timeseries_wide.parquet", columns=columns)
6466
@@ -200,6 +202,7 @@ counts up to this point. As long as each individual file fits in memory, this wi
200202
work for arbitrary-sized datasets.
201203

202204
.. ipython:: python
205+
:okwarning:
203206
204207
%%time
205208
files = pathlib.Path("data/timeseries/").glob("ts*.parquet")

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ When this keyword is set to ``"pyarrow"``, then these functions will return pyar
152152
* :meth:`Series.convert_dtypes`
153153

154154
.. ipython:: python
155+
:okwarning:
155156
156157
import io
157158
data = io.StringIO("""a,b,c,d,e,f,g,h,i

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ Other Deprecations
249249
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
250250
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
251251
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`)
252+
- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
252253
- Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
253254
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
254255
- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)

pandas/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ def pytest_collection_modifyitems(items, config) -> None:
178178
"DataFrameGroupBy.fillna",
179179
"DataFrame.fillna with 'method' is deprecated",
180180
),
181+
("read_parquet", "Passing a BlockManager to DataFrame is deprecated"),
181182
]
182183

183184
for item in items:

pandas/core/arraylike.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,10 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
263263
Series,
264264
)
265265
from pandas.core.generic import NDFrame
266-
from pandas.core.internals import BlockManager
266+
from pandas.core.internals import (
267+
ArrayManager,
268+
BlockManager,
269+
)
267270

268271
cls = type(self)
269272

@@ -347,7 +350,7 @@ def _reconstruct(result):
347350
if method == "outer":
348351
raise NotImplementedError
349352
return result
350-
if isinstance(result, BlockManager):
353+
if isinstance(result, (BlockManager, ArrayManager)):
351354
# we went through BlockManager.apply e.g. np.sqrt
352355
result = self._constructor_from_mgr(result, axes=result.axes)
353356
else:

pandas/core/frame.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,6 @@ def _constructor(self) -> Callable[..., DataFrame]:
644644

645645
def _constructor_from_mgr(self, mgr, axes):
646646
df = self._from_mgr(mgr, axes=axes)
647-
648647
if type(self) is DataFrame:
649648
# fastpath avoiding constructor call
650649
return df
@@ -677,17 +676,29 @@ def __init__(
677676
dtype: Dtype | None = None,
678677
copy: bool | None = None,
679678
) -> None:
679+
allow_mgr = False
680680
if dtype is not None:
681681
dtype = self._validate_dtype(dtype)
682682

683683
if isinstance(data, DataFrame):
684684
data = data._mgr
685+
allow_mgr = True
685686
if not copy:
686687
# if not copying data, ensure to still return a shallow copy
687688
# to avoid the result sharing the same Manager
688689
data = data.copy(deep=False)
689690

690691
if isinstance(data, (BlockManager, ArrayManager)):
692+
if not allow_mgr:
693+
# GH#52419
694+
warnings.warn(
695+
f"Passing a {type(data).__name__} to {type(self).__name__} "
696+
"is deprecated and will raise in a future version. "
697+
"Use public APIs instead.",
698+
DeprecationWarning,
699+
stacklevel=find_stack_level(),
700+
)
701+
691702
if using_copy_on_write():
692703
data = data.copy(deep=False)
693704
# first check if a Manager is passed without any other arguments
@@ -2462,7 +2473,7 @@ def maybe_reorder(
24622473
manager = _get_option("mode.data_manager", silent=True)
24632474
mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager)
24642475

2465-
return cls(mgr)
2476+
return cls._from_mgr(mgr, axes=mgr.axes)
24662477

24672478
def to_records(
24682479
self, index: bool = True, column_dtypes=None, index_dtypes=None
@@ -2672,7 +2683,7 @@ def _from_arrays(
26722683
verify_integrity=verify_integrity,
26732684
typ=manager,
26742685
)
2675-
return cls(mgr)
2686+
return cls._from_mgr(mgr, axes=mgr.axes)
26762687

26772688
@doc(
26782689
storage_options=_shared_docs["storage_options"],

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,8 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self
829829
if not using_copy_on_write() and copy is not False:
830830
new_mgr = new_mgr.copy(deep=True)
831831

832-
return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
832+
out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
833+
return out.__finalize__(self, method="swapaxes")
833834

834835
return self._constructor(
835836
new_values,

pandas/core/series.py

+41-9
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,22 @@ def __init__(
390390
else:
391391
fastpath = False
392392

393+
allow_mgr = False
393394
if (
394395
isinstance(data, (SingleBlockManager, SingleArrayManager))
395396
and index is None
396397
and dtype is None
397398
and (copy is False or copy is None)
398399
):
400+
if not allow_mgr:
401+
# GH#52419
402+
warnings.warn(
403+
f"Passing a {type(data).__name__} to {type(self).__name__} "
404+
"is deprecated and will raise in a future version. "
405+
"Use public APIs instead.",
406+
DeprecationWarning,
407+
stacklevel=find_stack_level(),
408+
)
399409
if using_copy_on_write():
400410
data = data.copy(deep=False)
401411
# GH#33357 called with just the SingleBlockManager
@@ -423,8 +433,19 @@ def __init__(
423433
data = SingleBlockManager.from_array(data, index)
424434
elif manager == "array":
425435
data = SingleArrayManager.from_array(data, index)
436+
allow_mgr = True
426437
elif using_copy_on_write() and not copy:
427438
data = data.copy(deep=False)
439+
440+
if not allow_mgr:
441+
warnings.warn(
442+
f"Passing a {type(data).__name__} to {type(self).__name__} "
443+
"is deprecated and will raise in a future version. "
444+
"Use public APIs instead.",
445+
DeprecationWarning,
446+
stacklevel=find_stack_level(),
447+
)
448+
428449
if copy:
429450
data = data.copy()
430451
# skips validation of the name
@@ -435,6 +456,15 @@ def __init__(
435456
if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy:
436457
data = data.copy(deep=False)
437458

459+
if not allow_mgr:
460+
warnings.warn(
461+
f"Passing a {type(data).__name__} to {type(self).__name__} "
462+
"is deprecated and will raise in a future version. "
463+
"Use public APIs instead.",
464+
DeprecationWarning,
465+
stacklevel=find_stack_level(),
466+
)
467+
438468
name = ibase.maybe_extract_name(name, data, type(self))
439469

440470
if index is not None:
@@ -500,6 +530,16 @@ def __init__(
500530
"`index` argument. `copy` must be False."
501531
)
502532

533+
if not allow_mgr:
534+
warnings.warn(
535+
f"Passing a {type(data).__name__} to {type(self).__name__} "
536+
"is deprecated and will raise in a future version. "
537+
"Use public APIs instead.",
538+
DeprecationWarning,
539+
stacklevel=find_stack_level(),
540+
)
541+
allow_mgr = True
542+
503543
elif isinstance(data, ExtensionArray):
504544
pass
505545
else:
@@ -612,22 +652,14 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]:
612652
return DataFrame
613653

614654
def _expanddim_from_mgr(self, mgr, axes) -> DataFrame:
615-
# https://github.com/pandas-dev/pandas/pull/52132#issuecomment-1481491828
616-
# This is a short-term implementation that will be replaced
617-
# with self._constructor_expanddim._constructor_from_mgr(...)
618-
# once downstream packages (geopandas) have had a chance to implement
619-
# their own overrides.
620-
# error: "Callable[..., DataFrame]" has no attribute "_from_mgr" [attr-defined]
621-
from pandas import DataFrame
655+
from pandas.core.frame import DataFrame
622656

623657
return DataFrame._from_mgr(mgr, axes=mgr.axes)
624658

625659
def _constructor_expanddim_from_mgr(self, mgr, axes):
626660
df = self._expanddim_from_mgr(mgr, axes)
627661
if type(self) is Series:
628-
# fastpath avoiding constructor
629662
return df
630-
assert axes is mgr.axes
631663
return self._constructor_expanddim(df, copy=False)
632664

633665
# types

pandas/tests/arrays/interval/test_interval.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -337,20 +337,26 @@ def test_arrow_table_roundtrip(breaks):
337337

338338
table = pa.table(df)
339339
assert isinstance(table.field("a").type, ArrowIntervalType)
340-
result = table.to_pandas()
340+
msg = "Passing a BlockManager to DataFrame is deprecated"
341+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
342+
result = table.to_pandas()
341343
assert isinstance(result["a"].dtype, pd.IntervalDtype)
342344
tm.assert_frame_equal(result, df)
343345

344346
table2 = pa.concat_tables([table, table])
345-
result = table2.to_pandas()
347+
msg = "Passing a BlockManager to DataFrame is deprecated"
348+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
349+
result = table2.to_pandas()
346350
expected = pd.concat([df, df], ignore_index=True)
347351
tm.assert_frame_equal(result, expected)
348352

349353
# GH-41040
350354
table = pa.table(
351355
[pa.chunked_array([], type=table.column(0).type)], schema=table.schema
352356
)
353-
result = table.to_pandas()
357+
msg = "Passing a BlockManager to DataFrame is deprecated"
358+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
359+
result = table.to_pandas()
354360
tm.assert_frame_equal(result, expected[0:0])
355361

356362

@@ -371,7 +377,9 @@ def test_arrow_table_roundtrip_without_metadata(breaks):
371377
table = table.replace_schema_metadata()
372378
assert table.schema.metadata is None
373379

374-
result = table.to_pandas()
380+
msg = "Passing a BlockManager to DataFrame is deprecated"
381+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
382+
result = table.to_pandas()
375383
assert isinstance(result["a"].dtype, pd.IntervalDtype)
376384
tm.assert_frame_equal(result, df)
377385

pandas/tests/arrays/masked/test_arrow_compat.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ def test_arrow_roundtrip(data):
3535
df = pd.DataFrame({"a": data})
3636
table = pa.table(df)
3737
assert table.field("a").type == str(data.dtype.numpy_dtype)
38-
result = table.to_pandas()
38+
39+
msg = "Passing a BlockManager to DataFrame is deprecated"
40+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
41+
result = table.to_pandas()
3942
assert result["a"].dtype == data.dtype
4043
tm.assert_frame_equal(result, df)
4144

@@ -53,7 +56,9 @@ def types_mapper(arrow_type):
5356
record_batch = pa.RecordBatch.from_arrays(
5457
[bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"]
5558
)
56-
result = record_batch.to_pandas(types_mapper=types_mapper)
59+
msg = "Passing a BlockManager to DataFrame is deprecated"
60+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
61+
result = record_batch.to_pandas(types_mapper=types_mapper)
5762
bools = pd.Series([True, None, False], dtype="boolean")
5863
ints = pd.Series([1, None, 2], dtype="Int64")
5964
small_ints = pd.Series([-1, 0, 7], dtype="Int64")
@@ -70,7 +75,9 @@ def test_arrow_load_from_zero_chunks(data):
7075
table = pa.table(
7176
[pa.chunked_array([], type=table.field("a").type)], schema=table.schema
7277
)
73-
result = table.to_pandas()
78+
msg = "Passing a BlockManager to DataFrame is deprecated"
79+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
80+
result = table.to_pandas()
7481
assert result["a"].dtype == data.dtype
7582
tm.assert_frame_equal(result, df)
7683

@@ -91,14 +98,18 @@ def test_arrow_sliced(data):
9198

9299
df = pd.DataFrame({"a": data})
93100
table = pa.table(df)
94-
result = table.slice(2, None).to_pandas()
101+
msg = "Passing a BlockManager to DataFrame is deprecated"
102+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
103+
result = table.slice(2, None).to_pandas()
95104
expected = df.iloc[2:].reset_index(drop=True)
96105
tm.assert_frame_equal(result, expected)
97106

98107
# no missing values
99108
df2 = df.fillna(data[0])
100109
table = pa.table(df2)
101-
result = table.slice(2, None).to_pandas()
110+
msg = "Passing a BlockManager to DataFrame is deprecated"
111+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
112+
result = table.slice(2, None).to_pandas()
102113
expected = df2.iloc[2:].reset_index(drop=True)
103114
tm.assert_frame_equal(result, expected)
104115

0 commit comments

Comments
 (0)