Skip to content

Commit 7b8f638

Browse files
authored
DEPR: ignoring dtype in DataFrame constructor failures (#41733)
1 parent c58ea4f commit 7b8f638

File tree

7 files changed

+41
-15
lines changed

7 files changed

+41
-15
lines changed

doc/source/whatsnew/v1.3.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ Other enhancements
233233
- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
234234
- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
235235
- :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
236-
- Improved error message in ``corr` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
236+
- Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
237237

238238
.. ---------------------------------------------------------------------------
239239
@@ -686,6 +686,7 @@ Deprecations
686686
- Deprecated passing arguments (apart from ``cond`` and ``other``) as positional in :meth:`DataFrame.mask` and :meth:`Series.mask` (:issue:`41485`)
687687
- Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`)
688688
- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`)
689+
- Deprecated behavior of :class:`DataFrame` constructor when a ``dtype`` is passed and the data cannot be cast to that dtype. In a future version, this will raise instead of being silently ignored (:issue:`24435`)
689690
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
690691
- Deprecated passing arguments as positional in :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, and :meth:`Series.bfill` (:issue:`41485`)
691692
- Deprecated passing arguments as positional in :meth:`DataFrame.sort_values` (other than ``"by"``) and :meth:`Series.sort_values` (:issue:`41485`)

pandas/core/construction.py

+12
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
Sequence,
1313
cast,
1414
)
15+
import warnings
1516

1617
import numpy as np
1718
import numpy.ma as ma
@@ -745,6 +746,17 @@ def _try_cast(
745746
if raise_cast_failure:
746747
raise
747748
else:
749+
# we only get here with raise_cast_failure False, which means
750+
# called via the DataFrame constructor
751+
# GH#24435
752+
warnings.warn(
753+
f"Could not cast to {dtype}, falling back to object. This "
754+
"behavior is deprecated. In a future version, when a dtype is "
755+
"passed to 'DataFrame', either all columns will be cast to that "
756+
"dtype, or a TypeError will be raised",
757+
FutureWarning,
758+
stacklevel=7,
759+
)
748760
subarr = np.array(arr, dtype=object, copy=copy)
749761
return subarr
750762

pandas/tests/frame/test_block_internals.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,11 @@ def f(dtype):
258258
f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
259259

260260
# these work (though results may be unexpected)
261-
f("int64")
262-
f("float64")
261+
depr_msg = "either all columns will be cast to that dtype, or a TypeError will"
262+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
263+
f("int64")
264+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
265+
f("float64")
263266

264267
# 10822
265268
# invalid error message on dt inference

pandas/tests/frame/test_constructors.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,9 @@ def test_constructor_mixed(self, float_string_frame):
207207
assert float_string_frame["foo"].dtype == np.object_
208208

209209
def test_constructor_cast_failure(self):
210-
foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64)
210+
msg = "either all columns will be cast to that dtype, or a TypeError will"
211+
with tm.assert_produces_warning(FutureWarning, match=msg):
212+
foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64)
211213
assert foo["a"].dtype == object
212214

213215
# GH 3010, constructing with odd arrays
@@ -683,7 +685,10 @@ def test_constructor_dict_cast2(self):
683685
"A": dict(zip(range(20), tm.makeStringIndex(20))),
684686
"B": dict(zip(range(15), np.random.randn(15))),
685687
}
686-
frame = DataFrame(test_data, dtype=float)
688+
msg = "either all columns will be cast to that dtype, or a TypeError will"
689+
with tm.assert_produces_warning(FutureWarning, match=msg):
690+
frame = DataFrame(test_data, dtype=float)
691+
687692
assert len(frame) == 20
688693
assert frame["A"].dtype == np.object_
689694
assert frame["B"].dtype == np.float64

pandas/tests/indexing/multiindex/test_getitem.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -206,27 +206,26 @@ def test_frame_getitem_nan_multiindex(nulls_fixture):
206206
df = DataFrame(
207207
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
208208
columns=cols,
209-
dtype="int64",
210209
).set_index(["a", "b"])
210+
df["c"] = df["c"].astype("int64")
211211

212212
idx = (21, n)
213213
result = df.loc[:idx]
214-
expected = DataFrame(
215-
[[11, n, 13], [21, n, 23]], columns=cols, dtype="int64"
216-
).set_index(["a", "b"])
214+
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
215+
expected["c"] = expected["c"].astype("int64")
217216
tm.assert_frame_equal(result, expected)
218217

219218
result = df.loc[idx:]
220219
expected = DataFrame(
221-
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols, dtype="int64"
220+
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
222221
).set_index(["a", "b"])
222+
expected["c"] = expected["c"].astype("int64")
223223
tm.assert_frame_equal(result, expected)
224224

225225
idx1, idx2 = (21, n), (31, n)
226226
result = df.loc[idx1:idx2]
227-
expected = DataFrame(
228-
[[21, n, 23], [31, n, 33]], columns=cols, dtype="int64"
229-
).set_index(["a", "b"])
227+
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
228+
expected["c"] = expected["c"].astype("int64")
230229
tm.assert_frame_equal(result, expected)
231230

232231

pandas/tests/indexing/test_loc.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,12 @@ def test_loc_setitem_dtype(self):
284284
df.loc[:, cols] = df.loc[:, cols].astype("float32")
285285

286286
expected = DataFrame(
287-
{"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}, dtype="float32"
287+
{
288+
"id": ["A"],
289+
"a": np.array([1.2], dtype="float32"),
290+
"b": np.array([0.0], dtype="float32"),
291+
"c": np.array([-2.5], dtype="float32"),
292+
}
288293
) # id is inferred as object
289294

290295
tm.assert_frame_equal(df, expected)

pandas/tests/reshape/test_get_dummies.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,9 @@ def test_dataframe_dummies_subset(self, df, sparse):
272272
"from_A_a": [1, 0, 1],
273273
"from_A_b": [0, 1, 0],
274274
},
275-
dtype=np.uint8,
276275
)
276+
cols = expected.columns
277+
expected[cols[1:]] = expected[cols[1:]].astype(np.uint8)
277278
expected[["C"]] = df[["C"]]
278279
if sparse:
279280
cols = ["from_A_a", "from_A_b"]

0 commit comments

Comments
 (0)