diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5b725eb4d2a98..fa17bf20635e0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -36,6 +36,7 @@ Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`) * :func:`read_csv` +* :func:`read_clipboard` * :func:`read_fwf` * :func:`read_excel` * :func:`read_html` @@ -49,6 +50,7 @@ Additionally a new global configuration, ``mode.dtype_backend`` can now be used to select the nullable dtypes implementation. * :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``) +* :func:`read_clipboard` (with ``engine="python"``) * :func:`read_excel` * :func:`read_html` * :func:`read_xml` diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index a3e778e552439..44bee11518cd3 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -14,7 +14,9 @@ ) -def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover +def read_clipboard( + sep: str = r"\s+", use_nullable_dtypes: bool = False, **kwargs +): # pragma: no cover r""" Read text from clipboard and pass to read_csv. @@ -24,6 +26,21 @@ def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover A string or regex delimiter. The default of '\s+' denotes one or more whitespace characters. + use_nullable_dtypes : bool = False + Whether or not to use nullable dtypes as default when reading data. If + set to True, nullable dtypes are used for all dtypes that have a nullable + implementation, even if no nulls are present. + + The nullable dtype implementation can be configured by calling + ``pd.set_option("mode.dtype_backend", "pandas")`` to use + numpy-backed nullable dtypes or + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use + pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). + This is only implemented for the ``python`` + engine. + + .. versionadded:: 2.0 + **kwargs See read_csv for the full argument list. @@ -85,7 +102,9 @@ def read_clipboard(sep: str = r"\s+", **kwargs): # pragma: no cover stacklevel=find_stack_level(), ) - return read_csv(StringIO(text), sep=sep, **kwargs) + return read_csv( + StringIO(text), sep=sep, use_nullable_dtypes=use_nullable_dtypes, **kwargs + ) def to_clipboard( diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index ccfefa59c65b8..9aa927ffe447c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -403,6 +403,8 @@ numpy-backed nullable dtypes or ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). + This is only implemented for the ``pyarrow`` or ``python`` + engines. .. versionadded:: 2.0 diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index c47a963e0fa3c..ae9c5aacf6e6b 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -10,12 +10,19 @@ PyperclipWindowsException, ) +import pandas as pd from pandas import ( + NA, DataFrame, + Series, get_option, read_clipboard, ) import pandas._testing as tm +from pandas.core.arrays import ( + ArrowStringArray, + StringArray, +) from pandas.io.clipboard import ( CheckedCall, @@ -402,3 +409,60 @@ def test_raw_roundtrip(self, data): # PR #25040 wide unicode wasn't copied correctly on PY3 on windows clipboard_set(data) assert data == clipboard_get() + + @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_read_clipboard_nullable_dtypes( + self, request, mock_clipboard, string_storage, dtype_backend, engine + ): + # GH#50502 + if string_storage == "pyarrow" or dtype_backend == "pyarrow": + pa = pytest.importorskip("pyarrow") + + if dtype_backend == "pyarrow" and engine == "c": + pytest.skip(reason="c engine not yet supported") + + if string_storage == "python": + string_array = StringArray(np.array(["x", "y"], dtype=np.object_)) + string_array_na = StringArray(np.array(["x", NA], dtype=np.object_)) + + else: + string_array = ArrowStringArray(pa.array(["x", "y"])) + string_array_na = ArrowStringArray(pa.array(["x", None])) + + text = """a,b,c,d,e,f,g,h,i +x,1,4.0,x,2,4.0,,True,False +y,2,5.0,,,,,False,""" + mock_clipboard[request.node.name] = text + + with pd.option_context("mode.string_storage", string_storage): + with pd.option_context("mode.dtype_backend", dtype_backend): + result = read_clipboard( + sep=",", use_nullable_dtypes=True, engine=engine + ) + + expected = DataFrame( + { + "a": string_array, + "b": Series([1, 2], dtype="Int64"), + "c": Series([4.0, 5.0], dtype="Float64"), + "d": string_array_na, + "e": Series([2, NA], dtype="Int64"), + "f": Series([4.0, NA], dtype="Float64"), + "g": Series([NA, NA], dtype="Int64"), + "h": Series([True, False], dtype="boolean"), + "i": Series([False, NA], dtype="boolean"), + } + ) + if dtype_backend == "pyarrow": + from pandas.arrays import ArrowExtensionArray + + expected = DataFrame( + { + col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True)) + for col in expected.columns + } + ) + expected["g"] = ArrowExtensionArray(pa.array([None, None])) + + tm.assert_frame_equal(result, expected)