From d1c0b5161bf694b6be996ba290a047b8770762d8 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 21 Sep 2022 21:13:44 +0200
Subject: [PATCH 1/6] ENH: Add option to use nullable dtypes in read_csv

---
 pandas/_libs/parsers.pyx         |  9 ++++---
 pandas/io/parsers/base_parser.py | 46 +++++++++++++++++++++++++-------
 pandas/io/parsers/readers.py     | 17 ++++++++++++
 3 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 07bf7f69ec907..6657cf6b8b46d 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -342,6 +342,7 @@ cdef class TextReader:
         object index_col
         object skiprows
         object dtype
+        bint use_nullable_dtypes
         object usecols
         set unnamed_cols  # set[str]
 
@@ -380,7 +381,8 @@ cdef class TextReader:
                   bint mangle_dupe_cols=True,
                   float_precision=None,
                   bint skip_blank_lines=True,
-                  encoding_errors=b"strict"):
+                  encoding_errors=b"strict",
+                  use_nullable_dtypes=False):
 
         # set encoding for native Python and C library
         if isinstance(encoding_errors, str):
@@ -505,6 +507,7 @@ cdef class TextReader:
         # - DtypeObj
         # - dict[Any, DtypeObj]
         self.dtype = dtype
+        self.use_nullable_dtypes = use_nullable_dtypes
 
         # XXX
         self.noconvert = set()
@@ -1053,8 +1056,8 @@ cdef class TextReader:
                     self._free_na_set(na_hashset)
 
             # don't try to upcast EAs
-            if na_count > 0 and not is_extension_array_dtype(col_dtype):
-                col_res = _maybe_upcast(col_res)
+            if na_count > 0 and not is_extension_array_dtype(col_dtype) or self.use_nullable_dtypes:
+                col_res = _maybe_upcast(col_res, use_nullable_dtypes=self.use_nullable_dtypes)
 
             if col_res is None:
                 raise ParserError(f'Unable to parse column {i}')
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index f90a0549a4320..f4a395fa4cc98 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -50,6 +50,7 @@
     is_dict_like,
     is_dtype_equal,
     is_extension_array_dtype,
+    is_float_dtype,
     is_integer,
     is_integer_dtype,
     is_list_like,
@@ -61,8 +62,14 @@
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna
 
+from pandas import StringDtype
 from pandas.core import algorithms
-from pandas.core.arrays import Categorical
+from pandas.core.arrays import (
+    BooleanArray,
+    Categorical,
+    FloatingArray,
+    IntegerArray,
+)
 from pandas.core.indexes.api import (
     Index,
     MultiIndex,
@@ -110,6 +117,7 @@ def __init__(self, kwds) -> None:
 
         self.dtype = copy(kwds.get("dtype", None))
         self.converters = kwds.get("converters")
+        self.use_nullable_dtypes = kwds.get("use_nullable_dtypes")
 
         self.true_values = kwds.get("true_values")
         self.false_values = kwds.get("false_values")
@@ -589,10 +597,7 @@ def _convert_to_ndarrays(
                 )
 
                 # type specified in dtype param or cast_type is an EA
-                if cast_type and (
-                    not is_dtype_equal(cvals, cast_type)
-                    or is_extension_array_dtype(cast_type)
-                ):
+                if cast_type and (not is_dtype_equal(cvals, cast_type) or is_ea):
                     if not is_ea and na_count > 0:
                         try:
                             if is_bool_dtype(cast_type):
@@ -710,14 +715,36 @@ def _infer_types(self, values, na_values, try_num_bool: bool = True):
         if try_num_bool and is_object_dtype(values.dtype):
             # exclude e.g DatetimeIndex here
             try:
-                result, _ = lib.maybe_convert_numeric(values, na_values, False)
+                result, result_mask = lib.maybe_convert_numeric(
+                    values,
+                    na_values,
+                    False,
+                    convert_to_masked_nullable=self.use_nullable_dtypes,
+                )
             except (ValueError, TypeError):
                 # e.g. encountering datetime string gets ValueError
                 #  TypeError can be raised in floatify
-                result = values
-                na_count = parsers.sanitize_objects(result, na_values)
+                na_count = parsers.sanitize_objects(values, na_values)
+
+                if self.use_nullable_dtypes:
+                    result = StringDtype().construct_array_type()._from_sequence(values)
+                else:
+                    result = values
             else:
-                na_count = isna(result).sum()
+                if self.use_nullable_dtypes:
+                    if result_mask is None:
+                        result_mask = np.zeros(result.shape, dtype="bool")
+
+                    if is_integer_dtype(result):
+                        result = IntegerArray(result, result_mask)
+                    elif is_bool_dtype(result):
+                        result = BooleanArray(result, result_mask)
+                    elif is_float_dtype(result):
+                        result = FloatingArray(result, result_mask)
+
+                    na_count = result_mask.sum()
+                else:
+                    na_count = isna(result).sum()
         else:
             result = values
             if values.dtype == np.object_:
@@ -1146,6 +1173,7 @@ def converter(*date_cols):
     "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
     "error_bad_lines": None,
     "warn_bad_lines": None,
+    "use_nullable_dtypes": False,
 }
 
 
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index eaec4c6bd5991..9f0bc153e56d5 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -427,6 +427,13 @@
 
     .. versionadded:: 1.2
 
+use_nullable_dtypes: bool = False
+    Whether or not to use nullable dtypes as default when reading data. If
+    set to True, nullable dtypes are used for all dtypes that have a nullable
+    implementation, even if no nulls are present.
+
+    .. versionadded:: 2.0
+
 Returns
 -------
 DataFrame or TextFileReader
@@ -669,6 +676,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy"] | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> TextFileReader:
     ...
 
@@ -729,6 +737,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy"] | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> TextFileReader:
     ...
 
@@ -789,6 +798,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy"] | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> DataFrame:
     ...
 
@@ -849,6 +859,7 @@ def read_csv(
     memory_map: bool = ...,
     float_precision: Literal["high", "legacy"] | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> DataFrame | TextFileReader:
     ...
 
@@ -928,6 +939,7 @@ def read_csv(
     memory_map: bool = False,
     float_precision: Literal["high", "legacy"] | None = None,
     storage_options: StorageOptions = None,
+    use_nullable_dtypes: bool = False,
 ) -> DataFrame | TextFileReader:
     # locals() should never be modified
     kwds = locals().copy()
@@ -1008,6 +1020,7 @@ def read_table(
     memory_map: bool = ...,
     float_precision: str | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> TextFileReader:
     ...
 
@@ -1068,6 +1081,7 @@ def read_table(
     memory_map: bool = ...,
     float_precision: str | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> TextFileReader:
     ...
 
@@ -1128,6 +1142,7 @@ def read_table(
     memory_map: bool = ...,
     float_precision: str | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> DataFrame:
     ...
 
@@ -1188,6 +1203,7 @@ def read_table(
     memory_map: bool = ...,
     float_precision: str | None = ...,
     storage_options: StorageOptions = ...,
+    use_nullable_dtypes: bool = ...,
 ) -> DataFrame | TextFileReader:
     ...
 
@@ -1267,6 +1283,7 @@ def read_table(
     memory_map: bool = False,
     float_precision: str | None = None,
     storage_options: StorageOptions = None,
+    use_nullable_dtypes: bool = False,
 ) -> DataFrame | TextFileReader:
     # locals() should never be modified
     kwds = locals().copy()

From d7a7eca6c1a8f541e9fcba1b2ec93a5a05809f1d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 25 Sep 2022 13:38:21 -0700
Subject: [PATCH 2/6] Finish implementation

---
 doc/source/whatsnew/v1.6.0.rst                |  1 +
 pandas/_libs/parsers.pyx                      | 12 +++++-
 pandas/io/parsers/base_parser.py              | 35 ++++++++++------
 .../io/parser/dtypes/test_dtypes_basic.py     | 42 +++++++++++++++++++
 4 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst
index ae062ca30a9fa..ec2919e7caf8a 100644
--- a/doc/source/whatsnew/v1.6.0.rst
+++ b/doc/source/whatsnew/v1.6.0.rst
@@ -32,6 +32,7 @@ Other enhancements
 - :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
 - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
 - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
+- Added new argument ``use_nullable_dtypes`` to :func:`read_csv` to enable automatic conversion to nullable dtypes (:issue:`36712`)
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
 - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
 - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 6657cf6b8b46d..694858c3322c2 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -936,6 +936,7 @@ cdef class TextReader:
             bint na_filter = 0
             int64_t num_cols
             dict result
+            bint use_nullable_dtypes
 
         start = self.parser_start
 
@@ -1056,8 +1057,15 @@ cdef class TextReader:
                     self._free_na_set(na_hashset)
 
             # don't try to upcast EAs
-            if na_count > 0 and not is_extension_array_dtype(col_dtype) or self.use_nullable_dtypes:
-                col_res = _maybe_upcast(col_res, use_nullable_dtypes=self.use_nullable_dtypes)
+            print(col_dtype)
+            if (
+                na_count > 0 and not is_extension_array_dtype(col_dtype)
+                or self.use_nullable_dtypes
+            ):
+                use_nullable_dtypes = self.use_nullable_dtypes and col_dtype is None
+                col_res = _maybe_upcast(
+                    col_res, use_nullable_dtypes=use_nullable_dtypes
+                )
 
             if col_res is None:
                 raise ParserError(f'Unable to parse column {i}')
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index f4a395fa4cc98..3c16b3be07bc1 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -117,7 +117,7 @@ def __init__(self, kwds) -> None:
 
         self.dtype = copy(kwds.get("dtype", None))
         self.converters = kwds.get("converters")
-        self.use_nullable_dtypes = kwds.get("use_nullable_dtypes")
+        self.use_nullable_dtypes = kwds.get("use_nullable_dtypes", False)
 
         self.true_values = kwds.get("true_values")
         self.false_values = kwds.get("false_values")
@@ -516,7 +516,7 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
             )
 
             arr, _ = self._infer_types(
-                arr, col_na_values | col_na_fvalues, try_num_bool
+                arr, col_na_values | col_na_fvalues, cast_type, try_num_bool
             )
             arrays.append(arr)
 
@@ -582,7 +582,10 @@ def _convert_to_ndarrays(
                     values = lib.map_infer_mask(values, conv_f, mask)
 
                 cvals, na_count = self._infer_types(
-                    values, set(col_na_values) | col_na_fvalues, try_num_bool=False
+                    values,
+                    set(col_na_values) | col_na_fvalues,
+                    cast_type,
+                    try_num_bool=False,
                 )
             else:
                 is_ea = is_extension_array_dtype(cast_type)
@@ -593,7 +596,7 @@ def _convert_to_ndarrays(
 
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
-                    values, set(col_na_values) | col_na_fvalues, try_num_bool
+                    values, set(col_na_values) | col_na_fvalues, cast_type, try_num_bool
                 )
 
                 # type specified in dtype param or cast_type is an EA
@@ -684,7 +687,7 @@ def _set(x) -> int:
 
         return noconvert_columns
 
-    def _infer_types(self, values, na_values, try_num_bool: bool = True):
+    def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
         """
         Infer types of values, possibly casting
 
@@ -692,6 +695,7 @@ def _infer_types(self, values, na_values, try_num_bool: bool = True):
         ----------
         values : ndarray
         na_values : set
+        cast_type: Specifies if we want to cast explicitly
         try_num_bool : bool, default try
            try to cast values to numeric (first preference) or boolean
 
@@ -712,6 +716,8 @@ def _infer_types(self, values, na_values, try_num_bool: bool = True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
+        use_nullable_dtypes = self.use_nullable_dtypes and cast_type is None
+
         if try_num_bool and is_object_dtype(values.dtype):
             # exclude e.g DatetimeIndex here
             try:
@@ -719,19 +725,15 @@ def _infer_types(self, values, na_values, try_num_bool: bool = True):
                     values,
                     na_values,
                     False,
-                    convert_to_masked_nullable=self.use_nullable_dtypes,
+                    convert_to_masked_nullable=use_nullable_dtypes,
                 )
             except (ValueError, TypeError):
                 # e.g. encountering datetime string gets ValueError
                 #  TypeError can be raised in floatify
                 na_count = parsers.sanitize_objects(values, na_values)
-
-                if self.use_nullable_dtypes:
-                    result = StringDtype().construct_array_type()._from_sequence(values)
-                else:
-                    result = values
+                result = values
             else:
-                if self.use_nullable_dtypes:
+                if use_nullable_dtypes:
                     if result_mask is None:
                         result_mask = np.zeros(result.shape, dtype="bool")
 
@@ -751,11 +753,18 @@ def _infer_types(self, values, na_values, try_num_bool: bool = True):
                 na_count = parsers.sanitize_objects(values, na_values)
 
         if result.dtype == np.object_ and try_num_bool:
-            result, _ = libops.maybe_convert_bool(
+            result, mask = libops.maybe_convert_bool(
                 np.asarray(values),
                 true_values=self.true_values,
                 false_values=self.false_values,
+                convert_to_masked_nullable=use_nullable_dtypes,
             )
+            if result.dtype == np.bool_ and use_nullable_dtypes:
+                if mask is None:
+                    mask = np.zeros(result.shape, dtype=np.bool_)
+                result = BooleanArray(result, mask)
+            elif result.dtype == np.object_ and use_nullable_dtypes:
+                result = StringDtype().construct_array_type()._from_sequence(values)
 
         return result, na_count
 
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 2c18d461cddf8..1312308bcaa71 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -385,3 +385,45 @@ def test_dtypes_defaultdict_invalid(all_parsers):
     parser = all_parsers
     with pytest.raises(TypeError, match="not understood"):
         parser.read_csv(StringIO(data), dtype=dtype)
+
+
+def test_use_nullabla_dtypes(all_parsers):
+    # GH#36712
+
+    parser = all_parsers
+
+    data = """a,b,c,d,e,f,g,h,i
+1,2.5,True,a,,,,,12-31-2019
+3,4.5,False,b,6,7.5,True,a,12-31-2019
+"""
+    result = parser.read_csv(
+        StringIO(data), use_nullable_dtypes=True, parse_dates=["i"]
+    )
+    expected = DataFrame(
+        {
+            "a": pd.Series([1, 3], dtype="Int64"),
+            "b": pd.Series([2.5, 4.5], dtype="Float64"),
+            "c": pd.Series([True, False], dtype="boolean"),
+            "d": pd.Series(["a", "b"], dtype="string"),
+            "e": pd.Series([pd.NA, 6], dtype="Int64"),
+            "f": pd.Series([pd.NA, 7.5], dtype="Float64"),
+            "g": pd.Series([pd.NA, True], dtype="boolean"),
+            "h": pd.Series([pd.NA, "a"], dtype="string"),
+            "i": pd.Series([Timestamp("2019-12-31")] * 2),
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_use_nullabla_dtypes_and_dtype(all_parsers):
+    # GH#36712
+
+    parser = all_parsers
+
+    data = """a,b
+1,2.5
+,
+"""
+    result = parser.read_csv(StringIO(data), use_nullable_dtypes=True, dtype="float64")
+    expected = DataFrame({"a": [1.0, np.nan], "b": [2.5, np.nan]})
+    tm.assert_frame_equal(result, expected)

From 4f05540ed53303156a3a9aaed2c28244ac7b0254 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sun, 25 Sep 2022 13:45:02 -0700
Subject: [PATCH 3/6] Update

---
 doc/source/user_guide/io.rst     | 8 ++++++++
 pandas/_libs/parsers.pyx         | 1 -
 pandas/io/parsers/base_parser.py | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 15b3b894c68b6..543489194535d 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -197,6 +197,14 @@ dtype : Type name or dict of column -> type, default ``None``
      Support for defaultdict was added. Specify a defaultdict as input where
      the default determines the dtype of the columns which are not explicitly
      listed.
+
+use_nullable_dtypes: bool = False
+    Whether or not to use nullable dtypes as default when reading data. If
+    set to True, nullable dtypes are used for all dtypes that have a nullable
+    implementation, even if no nulls are present.
+
+    .. versionadded:: 2.0
+
 engine : {``'c'``, ``'python'``, ``'pyarrow'``}
   Parser engine to use. The C and pyarrow engines are faster, while the python engine
   is currently more feature-complete. Multithreading is currently only supported by
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 694858c3322c2..b77c141f39502 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1057,7 +1057,6 @@ cdef class TextReader:
                     self._free_na_set(na_hashset)
 
             # don't try to upcast EAs
-            print(col_dtype)
             if (
                 na_count > 0 and not is_extension_array_dtype(col_dtype)
                 or self.use_nullable_dtypes
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 3c16b3be07bc1..ffeafd6289999 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -735,7 +735,7 @@ def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
             else:
                 if use_nullable_dtypes:
                     if result_mask is None:
-                        result_mask = np.zeros(result.shape, dtype="bool")
+                        result_mask = np.zeros(result.shape, dtype=np.bool_)
 
                     if is_integer_dtype(result):
                         result = IntegerArray(result, result_mask)

From af6056bd660154935767aded2047918b5812738c Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Thu, 29 Sep 2022 10:11:43 +0200
Subject: [PATCH 4/6] Fix mypy

---
 doc/source/user_guide/io.rst     |  2 +-
 pandas/io/parsers/base_parser.py | 14 +++++++++-----
 pandas/io/parsers/readers.py     |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 543489194535d..1552f2a8d257b 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -198,7 +198,7 @@ dtype : Type name or dict of column -> type, default ``None``
      the default determines the dtype of the columns which are not explicitly
      listed.
 
-use_nullable_dtypes: bool = False
+use_nullable_dtypes : bool = False
     Whether or not to use nullable dtypes as default when reading data. If
     set to True, nullable dtypes are used for all dtypes that have a nullable
     implementation, even if no nulls are present.
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index ffeafd6289999..fdf806d883151 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -15,6 +15,7 @@
     Hashable,
     Iterable,
     List,
+    Literal,
     Mapping,
     Sequence,
     Tuple,
@@ -716,7 +717,10 @@ def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
                 np.putmask(values, mask, np.nan)
             return values, na_count
 
-        use_nullable_dtypes = self.use_nullable_dtypes and cast_type is None
+        use_nullable_dtypes: Literal[True] | Literal[False] = (
+            self.use_nullable_dtypes and cast_type is None
+        )
+        result: ArrayLike
 
         if try_num_bool and is_object_dtype(values.dtype):
             # exclude e.g DatetimeIndex here
@@ -753,16 +757,16 @@ def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
                 na_count = parsers.sanitize_objects(values, na_values)
 
         if result.dtype == np.object_ and try_num_bool:
-            result, mask = libops.maybe_convert_bool(
+            result, bool_mask = libops.maybe_convert_bool(
                 np.asarray(values),
                 true_values=self.true_values,
                 false_values=self.false_values,
                 convert_to_masked_nullable=use_nullable_dtypes,
             )
             if result.dtype == np.bool_ and use_nullable_dtypes:
-                if mask is None:
-                    mask = np.zeros(result.shape, dtype=np.bool_)
-                result = BooleanArray(result, mask)
+                if bool_mask is None:
+                    bool_mask = np.zeros(result.shape, dtype=np.bool_)
+                result = BooleanArray(result, bool_mask)
             elif result.dtype == np.object_ and use_nullable_dtypes:
                 result = StringDtype().construct_array_type()._from_sequence(values)
 
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 4a59966384897..ae8d778b9138d 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -427,7 +427,7 @@
 
     .. versionadded:: 1.2
 
-use_nullable_dtypes: bool = False
+use_nullable_dtypes : bool = False
     Whether or not to use nullable dtypes as default when reading data. If
     set to True, nullable dtypes are used for all dtypes that have a nullable
     implementation, even if no nulls are present.

From 8a4d2063a5a3fc31682730c10000a98a8373ea5e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Sep 2022 13:09:52 +0200
Subject: [PATCH 5/6] Add tests and fix call

---
 pandas/io/parsers/base_parser.py              | 23 +++++++---
 .../io/parser/dtypes/test_dtypes_basic.py     | 45 +++++++++++++++++--
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index fdf806d883151..ae9a66eeaf353 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -517,7 +517,7 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
             )
 
             arr, _ = self._infer_types(
-                arr, col_na_values | col_na_fvalues, cast_type, try_num_bool
+                arr, col_na_values | col_na_fvalues, cast_type is None, try_num_bool
             )
             arrays.append(arr)
 
@@ -585,7 +585,7 @@ def _convert_to_ndarrays(
                 cvals, na_count = self._infer_types(
                     values,
                     set(col_na_values) | col_na_fvalues,
-                    cast_type,
+                    cast_type is None,
                     try_num_bool=False,
                 )
             else:
@@ -597,7 +597,10 @@ def _convert_to_ndarrays(
 
                 # general type inference and conversion
                 cvals, na_count = self._infer_types(
-                    values, set(col_na_values) | col_na_fvalues, cast_type, try_num_bool
+                    values,
+                    set(col_na_values) | col_na_fvalues,
+                    cast_type is None,
+                    try_num_bool,
                 )
 
                 # type specified in dtype param or cast_type is an EA
@@ -688,7 +691,9 @@ def _set(x) -> int:
 
         return noconvert_columns
 
-    def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
+    def _infer_types(
+        self, values, na_values, no_dtype_specified, try_num_bool: bool = True
+    ):
         """
         Infer types of values, possibly casting
 
@@ -696,7 +701,7 @@ def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
         ----------
         values : ndarray
         na_values : set
-        cast_type: Specifies if we want to cast explicitly
+        no_dtype_specified: Specifies if we want to cast explicitly
         try_num_bool : bool, default try
            try to cast values to numeric (first preference) or boolean
 
@@ -718,7 +723,7 @@ def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
             return values, na_count
 
         use_nullable_dtypes: Literal[True] | Literal[False] = (
-            self.use_nullable_dtypes and cast_type is None
+            self.use_nullable_dtypes and no_dtype_specified
         )
         result: ArrayLike
 
@@ -741,7 +746,11 @@ def _infer_types(self, values, na_values, cast_type, try_num_bool: bool = True):
                     if result_mask is None:
                         result_mask = np.zeros(result.shape, dtype=np.bool_)
 
-                    if is_integer_dtype(result):
+                    if result_mask.all():
+                        result = IntegerArray(
+                            np.ones(result_mask.shape, dtype=np.int64), result_mask
+                        )
+                    elif is_integer_dtype(result):
                         result = IntegerArray(result, result_mask)
                     elif is_bool_dtype(result):
                         result = BooleanArray(result, result_mask)
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 1312308bcaa71..76803591951d6 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -9,6 +9,7 @@
 import pytest
 
 from pandas.errors import ParserWarning
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import (
@@ -16,6 +17,10 @@
     Timestamp,
 )
 import pandas._testing as tm
+from pandas.core.arrays import (
+    ArrowStringArray,
+    StringArray,
+)
 
 # TODO(1.4): Change me into xfail at release time
 # and xfail individual tests
@@ -392,9 +397,9 @@ def test_use_nullabla_dtypes(all_parsers):
 
     parser = all_parsers
 
-    data = """a,b,c,d,e,f,g,h,i
-1,2.5,True,a,,,,,12-31-2019
-3,4.5,False,b,6,7.5,True,a,12-31-2019
+    data = """a,b,c,d,e,f,g,h,i,j
+1,2.5,True,a,,,,,12-31-2019,
+3,4.5,False,b,6,7.5,True,a,12-31-2019,
 """
     result = parser.read_csv(
         StringIO(data), use_nullable_dtypes=True, parse_dates=["i"]
@@ -410,6 +415,7 @@ def test_use_nullabla_dtypes(all_parsers):
             "g": pd.Series([pd.NA, True], dtype="boolean"),
             "h": pd.Series([pd.NA, "a"], dtype="string"),
             "i": pd.Series([Timestamp("2019-12-31")] * 2),
+            "j": pd.Series([pd.NA, pd.NA], dtype="Int64"),
         }
     )
     tm.assert_frame_equal(result, expected)
@@ -427,3 +433,36 @@ def test_use_nullabla_dtypes_and_dtype(all_parsers):
     result = parser.read_csv(StringIO(data), use_nullable_dtypes=True, dtype="float64")
     expected = DataFrame({"a": [1.0, np.nan], "b": [2.5, np.nan]})
     tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("pyarrow")
+@pytest.mark.parametrize("storage", ["pyarrow", "python"])
+def test_use_nullabla_dtypes_string(all_parsers, storage):
+    # GH#36712
+    import pyarrow as pa
+
+    with pd.option_context("mode.string_storage", storage):
+
+        parser = all_parsers
+
+        data = """a,b
+a,x
+b,
+"""
+        result = parser.read_csv(StringIO(data), use_nullable_dtypes=True)
+
+        if storage == "python":
+            expected = DataFrame(
+                {
+                    "a": StringArray(np.array(["a", "b"], dtype=np.object_)),
+                    "b": StringArray(np.array(["x", pd.NA], dtype=np.object_)),
+                }
+            )
+        else:
+            expected = DataFrame(
+                {
+                    "a": ArrowStringArray(pa.array(["a", "b"])),
+                    "b": ArrowStringArray(pa.array(["x", None])),
+                }
+            )
+        tm.assert_frame_equal(result, expected)

From 30d68a81db859867cf3e577c9e981816c20489cc Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 7 Oct 2022 13:45:35 +0200
Subject: [PATCH 6/6] Fix typo

---
 pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 76803591951d6..345da0bca8668 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -392,7 +392,7 @@ def test_dtypes_defaultdict_invalid(all_parsers):
         parser.read_csv(StringIO(data), dtype=dtype)
 
 
-def test_use_nullabla_dtypes(all_parsers):
+def test_use_nullable_dtypes(all_parsers):
     # GH#36712
 
     parser = all_parsers