Skip to content

Commit ee7e30c

Browse files
authored
Backport PR pandas-dev#51871 on branch 2.0.x (ERR: Check that dtype_backend is valid) (pandas-dev#51964)
ERR: Check that dtype_backend is valid (pandas-dev#51871)
1 parent 4ec8ed9 commit ee7e30c

28 files changed

+188
-7
lines changed

pandas/core/generic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
from pandas.util._decorators import doc
9595
from pandas.util._exceptions import find_stack_level
9696
from pandas.util._validators import (
97+
check_dtype_backend,
9798
validate_ascending,
9899
validate_bool_kwarg,
99100
validate_fillna_kwargs,
@@ -6534,8 +6535,8 @@ def convert_dtypes(
65346535
65356536
.. versionadded:: 1.2.0
65366537
dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
6537-
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
6538-
arrays, nullable dtypes are used for all dtypes that have a nullable
6538+
Which dtype_backend to use, e.g. whether a DataFrame should use nullable
6539+
dtypes for all dtypes that have a nullable
65396540
implementation when "numpy_nullable" is set, pyarrow is used for all
65406541
dtypes if "pyarrow" is set.
65416542
@@ -6654,6 +6655,7 @@ def convert_dtypes(
66546655
2 <NA>
66556656
dtype: string
66566657
"""
6658+
check_dtype_backend(dtype_backend)
66576659
if self.ndim == 1:
66586660
return self._convert_dtypes(
66596661
infer_objects,

pandas/core/internals/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def convert_object_array(
991991
----------
992992
content: List[np.ndarray]
993993
dtype: np.dtype or ExtensionDtype
994-
dtype_backend: Controls if nullable dtypes are returned.
994+
dtype_backend: Controls if nullable/pyarrow dtypes are returned.
995995
coerce_float: Cast floats that are integers to int.
996996
997997
Returns

pandas/core/tools/numeric.py

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
DtypeBackend,
1111
npt,
1212
)
13+
from pandas.util._validators import check_dtype_backend
1314

1415
from pandas.core.dtypes.cast import maybe_downcast_numeric
1516
from pandas.core.dtypes.common import (
@@ -161,6 +162,8 @@ def to_numeric(
161162
if errors not in ("ignore", "raise", "coerce"):
162163
raise ValueError("invalid error value specified")
163164

165+
check_dtype_backend(dtype_backend)
166+
164167
is_series = False
165168
is_index = False
166169
is_scalars = False

pandas/io/clipboards.py

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pandas._libs import lib
99
from pandas.util._exceptions import find_stack_level
10+
from pandas.util._validators import check_dtype_backend
1011

1112
from pandas.core.dtypes.generic import ABCDataFrame
1213

@@ -58,6 +59,8 @@ def read_clipboard(
5859
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
5960
raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
6061

62+
check_dtype_backend(dtype_backend)
63+
6164
from pandas.io.clipboard import clipboard_get
6265
from pandas.io.parsers import read_csv
6366

pandas/io/excel/_base.py

+3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
Appender,
4646
doc,
4747
)
48+
from pandas.util._validators import check_dtype_backend
4849

4950
from pandas.core.dtypes.common import (
5051
is_bool,
@@ -469,6 +470,8 @@ def read_excel(
469470
storage_options: StorageOptions = None,
470471
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
471472
) -> DataFrame | dict[IntStrT, DataFrame]:
473+
check_dtype_backend(dtype_backend)
474+
472475
should_close = False
473476
if not isinstance(io, ExcelFile):
474477
should_close = True

pandas/io/feather_format.py

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from pandas.compat._optional import import_optional_dependency
1818
from pandas.util._decorators import doc
19+
from pandas.util._validators import check_dtype_backend
1920

2021
import pandas as pd
2122
from pandas.core.api import (
@@ -138,6 +139,8 @@ def read_feather(
138139
import_optional_dependency("pyarrow")
139140
from pyarrow import feather
140141

142+
check_dtype_backend(dtype_backend)
143+
141144
with get_handle(
142145
path, "rb", storage_options=storage_options, is_text=False
143146
) as handles:

pandas/io/html.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
AbstractMethodError,
3131
EmptyDataError,
3232
)
33+
from pandas.util._validators import check_dtype_backend
3334

3435
from pandas.core.dtypes.common import is_list_like
3536

@@ -1204,6 +1205,7 @@ def read_html(
12041205
f'"{extract_links}"'
12051206
)
12061207
validate_header_arg(header)
1208+
check_dtype_backend(dtype_backend)
12071209

12081210
io = stringify_path(io)
12091211

pandas/io/json/_json.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from pandas.compat._optional import import_optional_dependency
4343
from pandas.errors import AbstractMethodError
4444
from pandas.util._decorators import doc
45+
from pandas.util._validators import check_dtype_backend
4546

4647
from pandas.core.dtypes.common import (
4748
ensure_str,
@@ -744,6 +745,8 @@ def read_json(
744745
if orient == "table" and convert_axes:
745746
raise ValueError("cannot pass both convert_axes and orient='table'")
746747

748+
check_dtype_backend(dtype_backend)
749+
747750
if dtype is None and orient != "table":
748751
# error: Incompatible types in assignment (expression has type "bool", variable
749752
# has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],
@@ -944,14 +947,18 @@ def read(self) -> DataFrame | Series:
944947
if self.engine == "pyarrow":
945948
pyarrow_json = import_optional_dependency("pyarrow.json")
946949
pa_table = pyarrow_json.read_json(self.data)
950+
951+
mapping: type[ArrowDtype] | None | Callable
947952
if self.dtype_backend == "pyarrow":
948-
return pa_table.to_pandas(types_mapper=ArrowDtype)
953+
mapping = ArrowDtype
949954
elif self.dtype_backend == "numpy_nullable":
950955
from pandas.io._util import _arrow_dtype_mapping
951956

952-
mapping = _arrow_dtype_mapping()
953-
return pa_table.to_pandas(types_mapper=mapping.get)
954-
return pa_table.to_pandas()
957+
mapping = _arrow_dtype_mapping().get
958+
else:
959+
mapping = None
960+
961+
return pa_table.to_pandas(types_mapper=mapping)
955962
elif self.engine == "ujson":
956963
if self.lines:
957964
if self.chunksize:

pandas/io/orc.py

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
WriteBuffer,
1717
)
1818
from pandas.compat._optional import import_optional_dependency
19+
from pandas.util._validators import check_dtype_backend
1920

2021
from pandas.core.dtypes.common import (
2122
is_categorical_dtype,
@@ -78,6 +79,8 @@ def read_orc(
7879

7980
orc = import_optional_dependency("pyarrow.orc")
8081

82+
check_dtype_backend(dtype_backend)
83+
8184
with get_handle(path, "rb", is_text=False) as handles:
8285
orc_file = orc.ORCFile(handles.handle)
8386
pa_table = orc_file.read(columns=columns, **kwargs)

pandas/io/parquet.py

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pandas.errors import AbstractMethodError
2323
from pandas.util._decorators import doc
2424
from pandas.util._exceptions import find_stack_level
25+
from pandas.util._validators import check_dtype_backend
2526

2627
import pandas as pd
2728
from pandas import (
@@ -513,6 +514,7 @@ def read_parquet(
513514
DataFrame
514515
"""
515516
impl = get_engine(engine)
517+
516518
if use_nullable_dtypes is not lib.no_default:
517519
msg = (
518520
"The argument 'use_nullable_dtypes' is deprecated and will be removed "
@@ -525,6 +527,7 @@ def read_parquet(
525527
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
526528
else:
527529
use_nullable_dtypes = False
530+
check_dtype_backend(dtype_backend)
528531

529532
return impl.read(
530533
path,

pandas/io/parsers/readers.py

+5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
)
4343
from pandas.util._decorators import Appender
4444
from pandas.util._exceptions import find_stack_level
45+
from pandas.util._validators import check_dtype_backend
4546

4647
from pandas.core.dtypes.common import (
4748
is_file_like,
@@ -1346,6 +1347,8 @@ def read_fwf(
13461347
kwds["colspecs"] = colspecs
13471348
kwds["infer_nrows"] = infer_nrows
13481349
kwds["engine"] = "python-fwf"
1350+
1351+
check_dtype_backend(dtype_backend)
13491352
kwds["dtype_backend"] = dtype_backend
13501353
return _read(filepath_or_buffer, kwds)
13511354

@@ -1999,6 +2002,8 @@ def _refine_defaults_read(
19992002
else:
20002003
raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines")
20012004

2005+
check_dtype_backend(dtype_backend)
2006+
20022007
kwds["dtype_backend"] = dtype_backend
20032008

20042009
return kwds

pandas/io/spss.py

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas._libs import lib
1010
from pandas.compat._optional import import_optional_dependency
11+
from pandas.util._validators import check_dtype_backend
1112

1213
from pandas.core.dtypes.inference import is_list_like
1314

@@ -51,6 +52,7 @@ def read_spss(
5152
DataFrame
5253
"""
5354
pyreadstat = import_optional_dependency("pyreadstat")
55+
check_dtype_backend(dtype_backend)
5456

5557
if usecols is not None:
5658
if not is_list_like(usecols):

pandas/io/sql.py

+4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
DatabaseError,
4646
)
4747
from pandas.util._exceptions import find_stack_level
48+
from pandas.util._validators import check_dtype_backend
4849

4950
from pandas.core.dtypes.common import (
5051
is_datetime64tz_dtype,
@@ -326,6 +327,7 @@ def read_sql_table(
326327
>>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP
327328
"""
328329

330+
check_dtype_backend(dtype_backend)
329331
if dtype_backend is lib.no_default:
330332
dtype_backend = "numpy" # type: ignore[assignment]
331333

@@ -457,6 +459,7 @@ def read_sql_query(
457459
parameter will be converted to UTC.
458460
"""
459461

462+
check_dtype_backend(dtype_backend)
460463
if dtype_backend is lib.no_default:
461464
dtype_backend = "numpy" # type: ignore[assignment]
462465

@@ -621,6 +624,7 @@ def read_sql(
621624
1 1 2010-11-12
622625
"""
623626

627+
check_dtype_backend(dtype_backend)
624628
if dtype_backend is lib.no_default:
625629
dtype_backend = "numpy" # type: ignore[assignment]
626630

pandas/io/xml.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
ParserError,
3131
)
3232
from pandas.util._decorators import doc
33+
from pandas.util._validators import check_dtype_backend
3334

3435
from pandas.core.dtypes.common import is_list_like
3536

@@ -1112,6 +1113,7 @@ def read_xml(
11121113
1 circle 360 NaN
11131114
2 triangle 180 3.0
11141115
"""
1116+
check_dtype_backend(dtype_backend)
11151117

11161118
return _parse(
11171119
path_or_buffer=path_or_buffer,

pandas/tests/frame/methods/test_convert_dtypes.py

+10
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,13 @@ def test_pyarrow_dtype_empty_object(self):
124124
expected = pd.DataFrame(columns=[0])
125125
result = expected.convert_dtypes(dtype_backend="pyarrow")
126126
tm.assert_frame_equal(result, expected)
127+
128+
def test_pyarrow_engine_lines_false(self):
129+
# GH 48893
130+
df = pd.DataFrame({"a": [1, 2, 3]})
131+
msg = (
132+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
133+
"'pyarrow' are allowed."
134+
)
135+
with pytest.raises(ValueError, match=msg):
136+
df.convert_dtypes(dtype_backend="numpy")

pandas/tests/io/json/test_pandas.py

+8
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,14 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
19441944

19451945
tm.assert_series_equal(result, expected)
19461946

1947+
def test_invalid_dtype_backend(self):
1948+
msg = (
1949+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
1950+
"'pyarrow' are allowed."
1951+
)
1952+
with pytest.raises(ValueError, match=msg):
1953+
read_json("test", dtype_backend="numpy")
1954+
19471955

19481956
def test_invalid_engine():
19491957
# GH 48893

pandas/tests/io/parser/test_read_fwf.py

+9
Original file line numberDiff line numberDiff line change
@@ -1001,3 +1001,12 @@ def test_dtype_backend(string_storage, dtype_backend):
10011001
expected["i"] = ArrowExtensionArray(pa.array([None, None]))
10021002

10031003
tm.assert_frame_equal(result, expected)
1004+
1005+
1006+
def test_invalid_dtype_backend():
1007+
msg = (
1008+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
1009+
"'pyarrow' are allowed."
1010+
)
1011+
with pytest.raises(ValueError, match=msg):
1012+
read_fwf("test", dtype_backend="numpy")

pandas/tests/io/parser/test_unsupported.py

+10
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,13 @@ def test_invalid_file_inputs(request, all_parsers):
200200

201201
with pytest.raises(ValueError, match="Invalid"):
202202
parser.read_csv([])
203+
204+
205+
def test_invalid_dtype_backend(all_parsers):
206+
parser = all_parsers
207+
msg = (
208+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
209+
"'pyarrow' are allowed."
210+
)
211+
with pytest.raises(ValueError, match=msg):
212+
parser.read_csv("test", dtype_backend="numpy")

pandas/tests/io/test_clipboard.py

+8
Original file line numberDiff line numberDiff line change
@@ -467,3 +467,11 @@ def test_read_clipboard_dtype_backend(
467467
expected["g"] = ArrowExtensionArray(pa.array([None, None]))
468468

469469
tm.assert_frame_equal(result, expected)
470+
471+
def test_invalid_dtype_backend(self):
472+
msg = (
473+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
474+
"'pyarrow' are allowed."
475+
)
476+
with pytest.raises(ValueError, match=msg):
477+
read_clipboard(dtype_backend="numpy")

pandas/tests/io/test_feather.py

+11
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,14 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
244244
)
245245

246246
tm.assert_frame_equal(result, expected)
247+
248+
def test_invalid_dtype_backend(self):
249+
msg = (
250+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
251+
"'pyarrow' are allowed."
252+
)
253+
df = pd.DataFrame({"int": list(range(1, 4))})
254+
with tm.ensure_clean("tmp.feather") as path:
255+
df.to_feather(path)
256+
with pytest.raises(ValueError, match=msg):
257+
read_feather(path, dtype_backend="numpy")

pandas/tests/io/test_html.py

+8
Original file line numberDiff line numberDiff line change
@@ -1469,3 +1469,11 @@ def test_extract_links_all_no_header(self):
14691469
result = self.read_html(data, extract_links="all")[0]
14701470
expected = DataFrame([[("Google.com", "https://google.com")]])
14711471
tm.assert_frame_equal(result, expected)
1472+
1473+
def test_invalid_dtype_backend(self):
1474+
msg = (
1475+
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
1476+
"'pyarrow' are allowed."
1477+
)
1478+
with pytest.raises(ValueError, match=msg):
1479+
read_html("test", dtype_backend="numpy")

0 commit comments

Comments
 (0)