ENH: Add use_nullable_dtypes to read_clipboard (#50502)

phofl · web-flow · commit 07822fa37884 · 2023-01-06T11:03:21.000-08:00
* ENH: Add use_nullable_dtypes to read_clipboard

* Adjust whatsnew

* Add gh ref

* Remove import

* Add comment

* Remove engine
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -36,6 +36,7 @@ Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
 The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
 
 * :func:`read_csv`
+* :func:`read_clipboard`
 * :func:`read_fwf`
 * :func:`read_excel`
 * :func:`read_html`
@@ -49,6 +50,7 @@ Additionally a new global configuration, ``mode.dtype_backend`` can now be used
 to select the nullable dtypes implementation.
 
 * :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
+* :func:`read_clipboard` (with ``engine="python"``)
 * :func:`read_excel`
 * :func:`read_html`
 * :func:`read_xml`
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
@@ -14,7 +14,9 @@
 )
 
 
-def read_clipboard(sep: str = r"\s+", **kwargs):  # pragma: no cover
+def read_clipboard(
+    sep: str = r"\s+", use_nullable_dtypes: bool = False, **kwargs
+):  # pragma: no cover
     r"""
     Read text from clipboard and pass to read_csv.
 
@@ -24,6 +26,21 @@ def read_clipboard(sep: str = r"\s+", **kwargs):  # pragma: no cover
         A string or regex delimiter. The default of '\s+' denotes
         one or more whitespace characters.
 
+    use_nullable_dtypes : bool = False
+        Whether or not to use nullable dtypes as default when reading data. If
+        set to True, nullable dtypes are used for all dtypes that have a nullable
+        implementation, even if no nulls are present.
+
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+        This is only implemented for the ``python``
+        engine.
+
+        .. versionadded:: 2.0
+
     **kwargs
         See read_csv for the full argument list.
 
@@ -85,7 +102,9 @@ def read_clipboard(sep: str = r"\s+", **kwargs):  # pragma: no cover
             stacklevel=find_stack_level(),
         )
 
-    return read_csv(StringIO(text), sep=sep, **kwargs)
+    return read_csv(
+        StringIO(text), sep=sep, use_nullable_dtypes=use_nullable_dtypes, **kwargs
+    )
 
 
 def to_clipboard(
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -403,6 +403,8 @@
     numpy-backed nullable dtypes or
     ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
     pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+    This is only implemented for the ``pyarrow`` or ``python``
+    engines.
 
     .. versionadded:: 2.0
 
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
@@ -10,12 +10,19 @@
     PyperclipWindowsException,
 )
 
+import pandas as pd
 from pandas import (
+    NA,
     DataFrame,
+    Series,
     get_option,
     read_clipboard,
 )
 import pandas._testing as tm
+from pandas.core.arrays import (
+    ArrowStringArray,
+    StringArray,
+)
 
 from pandas.io.clipboard import (
     CheckedCall,
@@ -402,3 +409,60 @@ def test_raw_roundtrip(self, data):
         # PR #25040 wide unicode wasn't copied correctly on PY3 on windows
         clipboard_set(data)
         assert data == clipboard_get()
+
+    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
+    @pytest.mark.parametrize("engine", ["c", "python"])
+    def test_read_clipboard_nullable_dtypes(
+        self, request, mock_clipboard, string_storage, dtype_backend, engine
+    ):
+        # GH#50502
+        if string_storage == "pyarrow" or dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+
+        if dtype_backend == "pyarrow" and engine == "c":
+            pytest.skip(reason="c engine not yet supported")
+
+        if string_storage == "python":
+            string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
+            string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
+
+        else:
+            string_array = ArrowStringArray(pa.array(["x", "y"]))
+            string_array_na = ArrowStringArray(pa.array(["x", None]))
+
+        text = """a,b,c,d,e,f,g,h,i
+x,1,4.0,x,2,4.0,,True,False
+y,2,5.0,,,,,False,"""
+        mock_clipboard[request.node.name] = text
+
+        with pd.option_context("mode.string_storage", string_storage):
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                result = read_clipboard(
+                    sep=",", use_nullable_dtypes=True, engine=engine
+                )
+
+        expected = DataFrame(
+            {
+                "a": string_array,
+                "b": Series([1, 2], dtype="Int64"),
+                "c": Series([4.0, 5.0], dtype="Float64"),
+                "d": string_array_na,
+                "e": Series([2, NA], dtype="Int64"),
+                "f": Series([4.0, NA], dtype="Float64"),
+                "g": Series([NA, NA], dtype="Int64"),
+                "h": Series([True, False], dtype="boolean"),
+                "i": Series([False, NA], dtype="boolean"),
+            }
+        )
+        if dtype_backend == "pyarrow":
+            from pandas.arrays import ArrowExtensionArray
+
+            expected = DataFrame(
+                {
+                    col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
+                    for col in expected.columns
+                }
+            )
+            expected["g"] = ArrowExtensionArray(pa.array([None, None]))
+
+        tm.assert_frame_equal(result, expected)