Skip to content

Commit 07822fa

Browse files
authored
ENH: Add use_nullable_dtypes to read_clipboard (#50502)
* ENH: Add use_nullable_dtypes to read_clipboard * Adjust whatsnew * Add gh ref * Remove import * Add comment * Remove engine
1 parent 18c4365 commit 07822fa

File tree

4 files changed

+89
-2
lines changed

4 files changed

+89
-2
lines changed

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
3636
The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
3737

3838
* :func:`read_csv`
39+
* :func:`read_clipboard`
3940
* :func:`read_fwf`
4041
* :func:`read_excel`
4142
* :func:`read_html`
@@ -49,6 +50,7 @@ Additionally a new global configuration, ``mode.dtype_backend`` can now be used
4950
to select the nullable dtypes implementation.
5051

5152
* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
53+
* :func:`read_clipboard` (with ``engine="python"``)
5254
* :func:`read_excel`
5355
* :func:`read_html`
5456
* :func:`read_xml`

pandas/io/clipboards.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
)
1515

1616

17-
def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover
17+
def read_clipboard(
18+
sep: str = r"\s+", use_nullable_dtypes: bool = False, **kwargs
19+
): # pragma: no cover
1820
r"""
1921
Read text from clipboard and pass to read_csv.
2022
@@ -24,6 +26,21 @@ def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover
2426
A string or regex delimiter. The default of '\s+' denotes
2527
one or more whitespace characters.
2628
29+
use_nullable_dtypes : bool = False
30+
Whether or not to use nullable dtypes as default when reading data. If
31+
set to True, nullable dtypes are used for all dtypes that have a nullable
32+
implementation, even if no nulls are present.
33+
34+
The nullable dtype implementation can be configured by calling
35+
``pd.set_option("mode.dtype_backend", "pandas")`` to use
36+
numpy-backed nullable dtypes or
37+
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
38+
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
39+
This is only implemented for the ``python``
40+
engine.
41+
42+
.. versionadded:: 2.0
43+
2744
**kwargs
2845
See read_csv for the full argument list.
2946
@@ -85,7 +102,9 @@ def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover
85102
stacklevel=find_stack_level(),
86103
)
87104

88-
return read_csv(StringIO(text), sep=sep, **kwargs)
105+
return read_csv(
106+
StringIO(text), sep=sep, use_nullable_dtypes=use_nullable_dtypes, **kwargs
107+
)
89108

90109

91110
def to_clipboard(

pandas/io/parsers/readers.py

+2
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,8 @@
403403
numpy-backed nullable dtypes or
404404
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
405405
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
406+
This is only implemented for the ``pyarrow`` or ``python``
407+
engines.
406408
407409
.. versionadded:: 2.0
408410

pandas/tests/io/test_clipboard.py

+64
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,19 @@
1010
PyperclipWindowsException,
1111
)
1212

13+
import pandas as pd
1314
from pandas import (
15+
NA,
1416
DataFrame,
17+
Series,
1518
get_option,
1619
read_clipboard,
1720
)
1821
import pandas._testing as tm
22+
from pandas.core.arrays import (
23+
ArrowStringArray,
24+
StringArray,
25+
)
1926

2027
from pandas.io.clipboard import (
2128
CheckedCall,
@@ -402,3 +409,60 @@ def test_raw_roundtrip(self, data):
402409
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
403410
clipboard_set(data)
404411
assert data == clipboard_get()
412+
413+
@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
414+
@pytest.mark.parametrize("engine", ["c", "python"])
415+
def test_read_clipboard_nullable_dtypes(
416+
self, request, mock_clipboard, string_storage, dtype_backend, engine
417+
):
418+
# GH#50502
419+
if string_storage == "pyarrow" or dtype_backend == "pyarrow":
420+
pa = pytest.importorskip("pyarrow")
421+
422+
if dtype_backend == "pyarrow" and engine == "c":
423+
pytest.skip(reason="c engine not yet supported")
424+
425+
if string_storage == "python":
426+
string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
427+
string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
428+
429+
else:
430+
string_array = ArrowStringArray(pa.array(["x", "y"]))
431+
string_array_na = ArrowStringArray(pa.array(["x", None]))
432+
433+
text = """a,b,c,d,e,f,g,h,i
434+
x,1,4.0,x,2,4.0,,True,False
435+
y,2,5.0,,,,,False,"""
436+
mock_clipboard[request.node.name] = text
437+
438+
with pd.option_context("mode.string_storage", string_storage):
439+
with pd.option_context("mode.dtype_backend", dtype_backend):
440+
result = read_clipboard(
441+
sep=",", use_nullable_dtypes=True, engine=engine
442+
)
443+
444+
expected = DataFrame(
445+
{
446+
"a": string_array,
447+
"b": Series([1, 2], dtype="Int64"),
448+
"c": Series([4.0, 5.0], dtype="Float64"),
449+
"d": string_array_na,
450+
"e": Series([2, NA], dtype="Int64"),
451+
"f": Series([4.0, NA], dtype="Float64"),
452+
"g": Series([NA, NA], dtype="Int64"),
453+
"h": Series([True, False], dtype="boolean"),
454+
"i": Series([False, NA], dtype="boolean"),
455+
}
456+
)
457+
if dtype_backend == "pyarrow":
458+
from pandas.arrays import ArrowExtensionArray
459+
460+
expected = DataFrame(
461+
{
462+
col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
463+
for col in expected.columns
464+
}
465+
)
466+
expected["g"] = ArrowExtensionArray(pa.array([None, None]))
467+
468+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)