Skip to content

Commit 1951b51

Browse files
authored
ENH: Add global nullable option (#50748)
1 parent be260f1 commit 1951b51

26 files changed

+351
-52
lines changed

doc/source/whatsnew/v2.0.0.rst

+10
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,16 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
4949
* :func:`read_feather`
5050
* :func:`to_numeric`
5151

52+
To simplify opting-in to nullable dtypes for these functions, a new option ``nullable_dtypes`` was added that allows setting
53+
the keyword argument globally to ``True`` if not specified directly. The option can be enabled
54+
through:
55+
56+
.. ipython:: python
57+
58+
pd.options.mode.nullable_dtypes = True
59+
60+
The option will only work for functions with the keyword ``use_nullable_dtypes``.
61+
5262
Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
5363
to select the nullable dtypes implementation.
5464

pandas/_config/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,8 @@
3333
def using_copy_on_write():
3434
_mode_options = _global_config["mode"]
3535
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
36+
37+
38+
def using_nullable_dtypes():
39+
_mode_options = _global_config["mode"]
40+
return _mode_options["nullable_dtypes"]

pandas/core/config_init.py

+16
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,22 @@ def use_inf_as_na_cb(key) -> None:
560560
validator=is_one_of_factory(["pandas", "pyarrow"]),
561561
)
562562

563+
564+
nullable_dtypes_doc = """
565+
: bool
566+
If nullable dtypes should be returned. This is only applicable to functions
567+
where the ``use_nullable_dtypes`` keyword is implemented.
568+
"""
569+
570+
with cf.config_prefix("mode"):
571+
cf.register_option(
572+
"nullable_dtypes",
573+
False,
574+
nullable_dtypes_doc,
575+
validator=is_bool,
576+
)
577+
578+
563579
# Set up the io.excel specific reader configuration.
564580
reader_engine_doc = """
565581
: string

pandas/core/tools/numeric.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
import numpy as np
66

7-
from pandas._config import get_option
7+
from pandas._config import (
8+
get_option,
9+
using_nullable_dtypes,
10+
)
811

912
from pandas._libs import lib
1013
from pandas._typing import (
@@ -38,7 +41,7 @@ def to_numeric(
3841
arg,
3942
errors: DateTimeErrorChoices = "raise",
4043
downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
41-
use_nullable_dtypes: bool = False,
44+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
4245
):
4346
"""
4447
Convert argument to a numeric type.
@@ -157,6 +160,12 @@ def to_numeric(
157160
if errors not in ("ignore", "raise", "coerce"):
158161
raise ValueError("invalid error value specified")
159162

163+
_use_nullable_dtypes = (
164+
use_nullable_dtypes
165+
if use_nullable_dtypes is not lib.no_default
166+
else using_nullable_dtypes()
167+
)
168+
160169
is_series = False
161170
is_index = False
162171
is_scalars = False
@@ -204,11 +213,11 @@ def to_numeric(
204213
values = ensure_object(values)
205214
coerce_numeric = errors not in ("ignore", "raise")
206215
try:
207-
values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload]
216+
values, new_mask = lib.maybe_convert_numeric(
208217
values,
209218
set(),
210219
coerce_numeric=coerce_numeric,
211-
convert_to_masked_nullable=use_nullable_dtypes,
220+
convert_to_masked_nullable=_use_nullable_dtypes,
212221
)
213222
except (ValueError, TypeError):
214223
if errors == "raise":
@@ -218,7 +227,7 @@ def to_numeric(
218227
# Remove unnecessary values, is expected later anyway and enables
219228
# downcasting
220229
values = values[~new_mask]
221-
elif use_nullable_dtypes and new_mask is None:
230+
elif _use_nullable_dtypes and new_mask is None:
222231
new_mask = np.zeros(values.shape, dtype=np.bool_)
223232

224233
# attempt downcast only if the data has been successfully converted

pandas/io/clipboards.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from io import StringIO
55
import warnings
66

7+
from pandas._config import using_nullable_dtypes
8+
9+
from pandas._libs import lib
710
from pandas.util._exceptions import find_stack_level
811

912
from pandas.core.dtypes.generic import ABCDataFrame
@@ -15,7 +18,9 @@
1518

1619

1720
def read_clipboard(
18-
sep: str = r"\s+", use_nullable_dtypes: bool = False, **kwargs
21+
sep: str = r"\s+",
22+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
23+
**kwargs,
1924
): # pragma: no cover
2025
r"""
2126
Read text from clipboard and pass to read_csv.
@@ -56,6 +61,12 @@ def read_clipboard(
5661
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
5762
raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
5863

64+
use_nullable_dtypes = (
65+
use_nullable_dtypes
66+
if use_nullable_dtypes is not lib.no_default
67+
else using_nullable_dtypes()
68+
)
69+
5970
from pandas.io.clipboard import clipboard_get
6071
from pandas.io.parsers import read_csv
6172

pandas/io/excel/_base.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,12 @@
2323
)
2424
import zipfile
2525

26-
from pandas._config import config
26+
from pandas._config import (
27+
config,
28+
using_nullable_dtypes,
29+
)
2730

31+
from pandas._libs import lib
2832
from pandas._libs.parsers import STR_NA_VALUES
2933
from pandas._typing import (
3034
DtypeArg,
@@ -380,7 +384,7 @@ def read_excel(
380384
comment: str | None = ...,
381385
skipfooter: int = ...,
382386
storage_options: StorageOptions = ...,
383-
use_nullable_dtypes: bool = ...,
387+
use_nullable_dtypes: bool | lib.NoDefault = ...,
384388
) -> DataFrame:
385389
...
386390

@@ -419,7 +423,7 @@ def read_excel(
419423
comment: str | None = ...,
420424
skipfooter: int = ...,
421425
storage_options: StorageOptions = ...,
422-
use_nullable_dtypes: bool = ...,
426+
use_nullable_dtypes: bool | lib.NoDefault = ...,
423427
) -> dict[IntStrT, DataFrame]:
424428
...
425429

@@ -458,7 +462,7 @@ def read_excel(
458462
comment: str | None = None,
459463
skipfooter: int = 0,
460464
storage_options: StorageOptions = None,
461-
use_nullable_dtypes: bool = False,
465+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
462466
) -> DataFrame | dict[IntStrT, DataFrame]:
463467

464468
should_close = False
@@ -471,6 +475,12 @@ def read_excel(
471475
"an ExcelFile - ExcelFile already has the engine set"
472476
)
473477

478+
use_nullable_dtypes = (
479+
use_nullable_dtypes
480+
if use_nullable_dtypes is not lib.no_default
481+
else using_nullable_dtypes()
482+
)
483+
474484
try:
475485
data = io.parse(
476486
sheet_name=sheet_name,

pandas/io/feather_format.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
Sequence,
77
)
88

9+
from pandas._config import using_nullable_dtypes
10+
11+
from pandas._libs import lib
912
from pandas._typing import (
1013
FilePath,
1114
ReadBuffer,
@@ -103,7 +106,7 @@ def read_feather(
103106
columns: Sequence[Hashable] | None = None,
104107
use_threads: bool = True,
105108
storage_options: StorageOptions = None,
106-
use_nullable_dtypes: bool = False,
109+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
107110
):
108111
"""
109112
Load a feather-format object from the file path.
@@ -143,6 +146,12 @@ def read_feather(
143146
import_optional_dependency("pyarrow")
144147
from pyarrow import feather
145148

149+
use_nullable_dtypes = (
150+
use_nullable_dtypes
151+
if use_nullable_dtypes is not lib.no_default
152+
else using_nullable_dtypes()
153+
)
154+
146155
with get_handle(
147156
path, "rb", storage_options=storage_options, is_text=False
148157
) as handles:

pandas/io/html.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
cast,
1919
)
2020

21+
from pandas._config import using_nullable_dtypes
22+
23+
from pandas._libs import lib
2124
from pandas._typing import (
2225
BaseBuffer,
2326
FilePath,
@@ -1036,7 +1039,7 @@ def read_html(
10361039
keep_default_na: bool = True,
10371040
displayed_only: bool = True,
10381041
extract_links: Literal[None, "header", "footer", "body", "all"] = None,
1039-
use_nullable_dtypes: bool = False,
1042+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
10401043
) -> list[DataFrame]:
10411044
r"""
10421045
Read HTML tables into a ``list`` of ``DataFrame`` objects.
@@ -1206,6 +1209,12 @@ def read_html(
12061209
)
12071210
validate_header_arg(header)
12081211

1212+
use_nullable_dtypes = (
1213+
use_nullable_dtypes
1214+
if use_nullable_dtypes is not lib.no_default
1215+
else using_nullable_dtypes()
1216+
)
1217+
12091218
io = stringify_path(io)
12101219

12111220
return _parse(

pandas/io/json/_json.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121

2222
import numpy as np
2323

24+
from pandas._config import using_nullable_dtypes
25+
26+
from pandas._libs import lib
2427
from pandas._libs.json import (
2528
dumps,
2629
loads,
@@ -496,7 +499,7 @@ def read_json(
496499
compression: CompressionOptions = "infer",
497500
nrows: int | None = None,
498501
storage_options: StorageOptions = None,
499-
use_nullable_dtypes: bool = False,
502+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
500503
) -> DataFrame | Series | JsonReader:
501504
"""
502505
Convert a JSON string to pandas object.
@@ -732,6 +735,12 @@ def read_json(
732735
if orient == "table" and convert_axes:
733736
raise ValueError("cannot pass both convert_axes and orient='table'")
734737

738+
use_nullable_dtypes = (
739+
use_nullable_dtypes
740+
if use_nullable_dtypes is not lib.no_default
741+
else using_nullable_dtypes()
742+
)
743+
735744
if dtype is None and orient != "table":
736745
# error: Incompatible types in assignment (expression has type "bool", variable
737746
# has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],

pandas/io/orc.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@
88
Literal,
99
)
1010

11-
from pandas._config import get_option
11+
from pandas._config import (
12+
get_option,
13+
using_nullable_dtypes,
14+
)
1215

16+
from pandas._libs import lib
1317
from pandas._typing import (
1418
FilePath,
1519
ReadBuffer,
@@ -33,7 +37,7 @@
3337
def read_orc(
3438
path: FilePath | ReadBuffer[bytes],
3539
columns: list[str] | None = None,
36-
use_nullable_dtypes: bool = False,
40+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
3741
**kwargs,
3842
) -> DataFrame:
3943
"""
@@ -86,6 +90,12 @@ def read_orc(
8690

8791
orc = import_optional_dependency("pyarrow.orc")
8892

93+
use_nullable_dtypes = (
94+
use_nullable_dtypes
95+
if use_nullable_dtypes is not lib.no_default
96+
else using_nullable_dtypes()
97+
)
98+
8999
with get_handle(path, "rb", is_text=False) as handles:
90100
orc_file = orc.ORCFile(handles.handle)
91101
pa_table = orc_file.read(columns=columns, **kwargs)

pandas/io/parquet.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
)
1010
from warnings import catch_warnings
1111

12+
from pandas._config import using_nullable_dtypes
13+
14+
from pandas._libs import lib
1215
from pandas._typing import (
1316
FilePath,
1417
ReadBuffer,
@@ -453,7 +456,7 @@ def read_parquet(
453456
engine: str = "auto",
454457
columns: list[str] | None = None,
455458
storage_options: StorageOptions = None,
456-
use_nullable_dtypes: bool = False,
459+
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
457460
**kwargs,
458461
) -> DataFrame:
459462
"""
@@ -511,6 +514,12 @@ def read_parquet(
511514
"""
512515
impl = get_engine(engine)
513516

517+
use_nullable_dtypes = (
518+
use_nullable_dtypes
519+
if use_nullable_dtypes is not lib.no_default
520+
else using_nullable_dtypes()
521+
)
522+
514523
return impl.read(
515524
path,
516525
columns=columns,

0 commit comments

Comments
 (0)