Skip to content

Backport PR #51871 on branch 2.0.x (ERR: Check that dtype_backend is valid) #51964

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import (
check_dtype_backend,
validate_ascending,
validate_bool_kwarg,
validate_fillna_kwargs,
Expand Down Expand Up @@ -6534,8 +6535,8 @@ def convert_dtypes(

.. versionadded:: 1.2.0
dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
arrays, nullable dtypes are used for all dtypes that have a nullable
Which dtype_backend to use, e.g. whether a DataFrame should use nullable
dtypes for all dtypes that have a nullable
implementation when "numpy_nullable" is set, pyarrow is used for all
dtypes if "pyarrow" is set.

Expand Down Expand Up @@ -6654,6 +6655,7 @@ def convert_dtypes(
2 <NA>
dtype: string
"""
check_dtype_backend(dtype_backend)
if self.ndim == 1:
return self._convert_dtypes(
infer_objects,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ def convert_object_array(
----------
content: List[np.ndarray]
dtype: np.dtype or ExtensionDtype
dtype_backend: Controls if nullable dtypes are returned.
dtype_backend: Controls if nullable/pyarrow dtypes are returned.
coerce_float: Cast floats that are integers to int.

Returns
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DtypeBackend,
npt,
)
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.cast import maybe_downcast_numeric
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -161,6 +162,8 @@ def to_numeric(
if errors not in ("ignore", "raise", "coerce"):
raise ValueError("invalid error value specified")

check_dtype_backend(dtype_backend)

is_series = False
is_index = False
is_scalars = False
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from pandas._libs import lib
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.generic import ABCDataFrame

Expand Down Expand Up @@ -58,6 +59,8 @@ def read_clipboard(
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
raise NotImplementedError("reading from clipboard only supports utf-8 encoding")

check_dtype_backend(dtype_backend)

from pandas.io.clipboard import clipboard_get
from pandas.io.parsers import read_csv

Expand Down
3 changes: 3 additions & 0 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
Appender,
doc,
)
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import (
is_bool,
Expand Down Expand Up @@ -469,6 +470,8 @@ def read_excel(
storage_options: StorageOptions = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
) -> DataFrame | dict[IntStrT, DataFrame]:
check_dtype_backend(dtype_backend)

should_close = False
if not isinstance(io, ExcelFile):
should_close = True
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
from pandas.util._validators import check_dtype_backend

import pandas as pd
from pandas.core.api import (
Expand Down Expand Up @@ -138,6 +139,8 @@ def read_feather(
import_optional_dependency("pyarrow")
from pyarrow import feather

check_dtype_backend(dtype_backend)

with get_handle(
path, "rb", storage_options=storage_options, is_text=False
) as handles:
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
AbstractMethodError,
EmptyDataError,
)
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import is_list_like

Expand Down Expand Up @@ -1204,6 +1205,7 @@ def read_html(
f'"{extract_links}"'
)
validate_header_arg(header)
check_dtype_backend(dtype_backend)

io = stringify_path(io)

Expand Down
15 changes: 11 additions & 4 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from pandas.compat._optional import import_optional_dependency
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import (
ensure_str,
Expand Down Expand Up @@ -744,6 +745,8 @@ def read_json(
if orient == "table" and convert_axes:
raise ValueError("cannot pass both convert_axes and orient='table'")

check_dtype_backend(dtype_backend)

if dtype is None and orient != "table":
# error: Incompatible types in assignment (expression has type "bool", variable
# has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],
Expand Down Expand Up @@ -944,14 +947,18 @@ def read(self) -> DataFrame | Series:
if self.engine == "pyarrow":
pyarrow_json = import_optional_dependency("pyarrow.json")
pa_table = pyarrow_json.read_json(self.data)

mapping: type[ArrowDtype] | None | Callable
if self.dtype_backend == "pyarrow":
return pa_table.to_pandas(types_mapper=ArrowDtype)
mapping = ArrowDtype
elif self.dtype_backend == "numpy_nullable":
from pandas.io._util import _arrow_dtype_mapping

mapping = _arrow_dtype_mapping()
return pa_table.to_pandas(types_mapper=mapping.get)
return pa_table.to_pandas()
mapping = _arrow_dtype_mapping().get
else:
mapping = None

return pa_table.to_pandas(types_mapper=mapping)
elif self.engine == "ujson":
if self.lines:
if self.chunksize:
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
WriteBuffer,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -78,6 +79,8 @@ def read_orc(

orc = import_optional_dependency("pyarrow.orc")

check_dtype_backend(dtype_backend)

with get_handle(path, "rb", is_text=False) as handles:
orc_file = orc.ORCFile(handles.handle)
pa_table = orc_file.read(columns=columns, **kwargs)
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -513,6 +514,7 @@ def read_parquet(
DataFrame
"""
impl = get_engine(engine)

if use_nullable_dtypes is not lib.no_default:
msg = (
"The argument 'use_nullable_dtypes' is deprecated and will be removed "
Expand All @@ -525,6 +527,7 @@ def read_parquet(
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
else:
use_nullable_dtypes = False
check_dtype_backend(dtype_backend)

return impl.read(
path,
Expand Down
5 changes: 5 additions & 0 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
)
from pandas.util._decorators import Appender
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import (
is_file_like,
Expand Down Expand Up @@ -1346,6 +1347,8 @@ def read_fwf(
kwds["colspecs"] = colspecs
kwds["infer_nrows"] = infer_nrows
kwds["engine"] = "python-fwf"

check_dtype_backend(dtype_backend)
kwds["dtype_backend"] = dtype_backend
return _read(filepath_or_buffer, kwds)

Expand Down Expand Up @@ -1999,6 +2002,8 @@ def _refine_defaults_read(
else:
raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines")

check_dtype_backend(dtype_backend)

kwds["dtype_backend"] = dtype_backend

return kwds
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/spss.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.inference import is_list_like

Expand Down Expand Up @@ -51,6 +52,7 @@ def read_spss(
DataFrame
"""
pyreadstat = import_optional_dependency("pyreadstat")
check_dtype_backend(dtype_backend)

if usecols is not None:
if not is_list_like(usecols):
Expand Down
4 changes: 4 additions & 0 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
DatabaseError,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import (
is_datetime64tz_dtype,
Expand Down Expand Up @@ -326,6 +327,7 @@ def read_sql_table(
>>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP
"""

check_dtype_backend(dtype_backend)
if dtype_backend is lib.no_default:
dtype_backend = "numpy" # type: ignore[assignment]

Expand Down Expand Up @@ -457,6 +459,7 @@ def read_sql_query(
parameter will be converted to UTC.
"""

check_dtype_backend(dtype_backend)
if dtype_backend is lib.no_default:
dtype_backend = "numpy" # type: ignore[assignment]

Expand Down Expand Up @@ -621,6 +624,7 @@ def read_sql(
1 1 2010-11-12
"""

check_dtype_backend(dtype_backend)
if dtype_backend is lib.no_default:
dtype_backend = "numpy" # type: ignore[assignment]

Expand Down
2 changes: 2 additions & 0 deletions pandas/io/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ParserError,
)
from pandas.util._decorators import doc
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import is_list_like

Expand Down Expand Up @@ -1112,6 +1113,7 @@ def read_xml(
1 circle 360 NaN
2 triangle 180 3.0
"""
check_dtype_backend(dtype_backend)

return _parse(
path_or_buffer=path_or_buffer,
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,13 @@ def test_pyarrow_dtype_empty_object(self):
expected = pd.DataFrame(columns=[0])
result = expected.convert_dtypes(dtype_backend="pyarrow")
tm.assert_frame_equal(result, expected)

def test_pyarrow_engine_lines_false(self):
# GH 48893
df = pd.DataFrame({"a": [1, 2, 3]})
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
with pytest.raises(ValueError, match=msg):
df.convert_dtypes(dtype_backend="numpy")
8 changes: 8 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1944,6 +1944,14 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):

tm.assert_series_equal(result, expected)

def test_invalid_dtype_backend(self):
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
with pytest.raises(ValueError, match=msg):
read_json("test", dtype_backend="numpy")


def test_invalid_engine():
# GH 48893
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/io/parser/test_read_fwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,3 +1001,12 @@ def test_dtype_backend(string_storage, dtype_backend):
expected["i"] = ArrowExtensionArray(pa.array([None, None]))

tm.assert_frame_equal(result, expected)


def test_invalid_dtype_backend():
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
with pytest.raises(ValueError, match=msg):
read_fwf("test", dtype_backend="numpy")
10 changes: 10 additions & 0 deletions pandas/tests/io/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,13 @@ def test_invalid_file_inputs(request, all_parsers):

with pytest.raises(ValueError, match="Invalid"):
parser.read_csv([])


def test_invalid_dtype_backend(all_parsers):
parser = all_parsers
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
with pytest.raises(ValueError, match=msg):
parser.read_csv("test", dtype_backend="numpy")
8 changes: 8 additions & 0 deletions pandas/tests/io/test_clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,3 +467,11 @@ def test_read_clipboard_dtype_backend(
expected["g"] = ArrowExtensionArray(pa.array([None, None]))

tm.assert_frame_equal(result, expected)

def test_invalid_dtype_backend(self):
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
with pytest.raises(ValueError, match=msg):
read_clipboard(dtype_backend="numpy")
11 changes: 11 additions & 0 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,14 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
)

tm.assert_frame_equal(result, expected)

def test_invalid_dtype_backend(self):
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
df = pd.DataFrame({"int": list(range(1, 4))})
with tm.ensure_clean("tmp.feather") as path:
df.to_feather(path)
with pytest.raises(ValueError, match=msg):
read_feather(path, dtype_backend="numpy")
8 changes: 8 additions & 0 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,3 +1469,11 @@ def test_extract_links_all_no_header(self):
result = self.read_html(data, extract_links="all")[0]
expected = DataFrame([[("Google.com", "https://google.com")]])
tm.assert_frame_equal(result, expected)

def test_invalid_dtype_backend(self):
msg = (
"dtype_backend numpy is invalid, only 'numpy_nullable' and "
"'pyarrow' are allowed."
)
with pytest.raises(ValueError, match=msg):
read_html("test", dtype_backend="numpy")
Loading