pandas-dev · jreback · Jan 9, 2021 · Jan 6, 2021 · Jan 6, 2021 · Jan 6, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -284,6 +284,7 @@ I/O
 - Bug in :func:`json_normalize` resulting in the first element of a generator object not being included in the returned ``DataFrame`` (:issue:`35923`)
 - Bug in :func:`read_excel` forward filling :class:`MultiIndex` names with multiple header and index columns specified (:issue:`34673`)
 - :func:`pandas.read_excel` now respects :func:``pandas.set_option`` (:issue:`34252`)
+- Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`)
 - Bug in :func:``read_json`` when ``orient="split"`` does not maintan numeric string index (:issue:`28556`)
 
 Period

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1084,11 +1084,18 @@ cdef class TextReader:
         elif is_extension_array_dtype(dtype):
             result, na_count = self._string_convert(i, start, end, na_filter,
                                                     na_hashset)
+
             array_type = dtype.construct_array_type()
             try:
                 # use _from_sequence_of_strings if the class defines it
-                result = array_type._from_sequence_of_strings(result,
-                                                              dtype=dtype)
+                if is_bool_dtype(dtype):
+                    true_values = [x.decode() for x in self.true_values]
+                    false_values = [x.decode() for x in self.false_values]
+                    result = array_type._from_sequence_of_strings(
+                        result, dtype=dtype, true_values=true_values,
+                        false_values=false_values)
+                else:
+                    result = array_type._from_sequence_of_strings(result, dtype=dtype)
             except NotImplementedError:
                 raise NotImplementedError(
                     f"Extension Array: {array_type} must implement "

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -257,6 +257,8 @@ class BooleanArray(BaseMaskedArray):
 
     # The value used to fill '_data' to avoid upcasting
     _internal_fill_value = False
+    _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
+    _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
 
     def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
         if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
@@ -282,14 +284,29 @@ def _from_sequence(
 
     @classmethod
     def _from_sequence_of_strings(
-        cls, strings: List[str], *, dtype: Optional[Dtype] = None, copy: bool = False
+        cls,
+        strings: List[str],
+        *,
+        dtype: Optional[Dtype] = None,
+        copy: bool = False,
+        true_values: Optional[List[str]] = None,
+        false_values: Optional[List[str]] = None,
     ) -> "BooleanArray":
+        if true_values is not None:
+            true_values_union = cls._TRUE_VALUES.union(true_values)
+        else:
+            true_values_union = cls._TRUE_VALUES
+        if false_values is not None:
+            false_values_union = cls._FALSE_VALUES.union(false_values)
+        else:
+            false_values_union = cls._FALSE_VALUES
+
         def map_string(s):
             if isna(s):
                 return s
-            elif s in ["True", "TRUE", "true", "1", "1.0"]:
+            elif s in true_values_union:
                 return True
-            elif s in ["False", "FALSE", "false", "0", "0.0"]:
+            elif s in false_values_union:
                 return False
             else:
                 raise ValueError(f"{s} cannot be cast to bool")

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1812,7 +1812,15 @@ def _cast_types(self, values, cast_type, column):
             cast_type = pandas_dtype(cast_type)
             array_type = cast_type.construct_array_type()
             try:
-                return array_type._from_sequence_of_strings(values, dtype=cast_type)
+                if is_bool_dtype(cast_type):
+                    return array_type._from_sequence_of_strings(
+                        values,
+                        dtype=cast_type,
+                        true_values=self.true_values,
+                        false_values=self.false_values,
+                    )
+                else:
+                    return array_type._from_sequence_of_strings(values, dtype=cast_type)
             except NotImplementedError as err:
                 raise NotImplementedError(
                     f"Extension Array: {array_type} must implement "

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -213,3 +213,25 @@ def decimal_number_check(parser, numeric_decimal, thousands, float_precision):
     )
     val = df.iloc[0, 0]
     assert val == numeric_decimal[1]
+
+
+def test_true_values_cast_to_bool(all_parsers):
+    # GH#34655
+    text = """a,b
+yes,xxx
+no,yyy
+1,zzz
+0,aaa
+    """
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(text),
+        true_values=["yes"],
+        false_values=["no"],
+        dtype={"a": "boolean"},
+    )
+    expected = DataFrame(
+        {"a": [True, False, True, False], "b": ["xxx", "yyy", "zzz", "aaa"]}
+    )
+    expected["a"] = expected["a"].astype("boolean")
+    tm.assert_frame_equal(result, expected)