Skip to content

Commit 1a95c79

Browse files
authored
BUG: True cannot be cast to bool in read_excel (#58994)
* Adding implementation, tests. Updating documentation * Fixing dependency error by moving test inside of TestReaders * Updating implementation based on reviewer feedback * Creating a more clean implementation * Fixing broken unit tests * Fixing docstring error * Updating implementation based on reviewer feedback. Adding additional unit tests * Updating implementation based on reviewer feedback * Updating implementation based on reviewer feedback * Using datapath fixture * Fixing failing unit test * Removing unneeded file * Fixing failing documentation test * Updating unit test based on reviewer feedback
1 parent f44ce13 commit 1a95c79

File tree

6 files changed

+42
-2
lines changed

6 files changed

+42
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,9 @@ I/O
558558
- Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
559559
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
560560
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
561+
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
561562
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
563+
-
562564

563565
Period
564566
^^^^^^

pandas/core/arrays/boolean.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -329,15 +329,21 @@ def _from_sequence_of_strings(
329329
copy: bool = False,
330330
true_values: list[str] | None = None,
331331
false_values: list[str] | None = None,
332+
none_values: list[str] | None = None,
332333
) -> BooleanArray:
333334
true_values_union = cls._TRUE_VALUES.union(true_values or [])
334335
false_values_union = cls._FALSE_VALUES.union(false_values or [])
335336

336-
def map_string(s) -> bool:
337+
if none_values is None:
338+
none_values = []
339+
340+
def map_string(s) -> bool | None:
337341
if s in true_values_union:
338342
return True
339343
elif s in false_values_union:
340344
return False
345+
elif s in none_values:
346+
return None
341347
else:
342348
raise ValueError(f"{s} cannot be cast to bool")
343349

pandas/io/parsers/base_parser.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -745,11 +745,13 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi
745745
if isinstance(cast_type, BooleanDtype):
746746
# error: Unexpected keyword argument "true_values" for
747747
# "_from_sequence_of_strings" of "ExtensionArray"
748+
values_str = [str(val) for val in values]
748749
return array_type._from_sequence_of_strings( # type: ignore[call-arg]
749-
values,
750+
values_str,
750751
dtype=cast_type,
751752
true_values=self.true_values,
752753
false_values=self.false_values,
754+
none_values=self.na_values,
753755
)
754756
else:
755757
return array_type._from_sequence_of_strings(values, dtype=cast_type)
Binary file not shown.
5.18 KB
Binary file not shown.

pandas/tests/io/excel/test_readers.py

+30
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,36 @@ def xfail_datetimes_with_pyxlsb(engine, request):
164164

165165

166166
class TestReaders:
167+
@pytest.mark.parametrize("col", [[True, None, False], [True], [True, False]])
168+
def test_read_excel_type_check(self, col, datapath):
169+
# GH 58159
170+
df = DataFrame({"bool_column": col}, dtype="boolean")
171+
f_path = datapath("io", "data", "excel", "test_boolean_types.xlsx")
172+
173+
df.to_excel(f_path, index=False)
174+
df2 = pd.read_excel(f_path, dtype={"bool_column": "boolean"}, engine="openpyxl")
175+
tm.assert_frame_equal(df, df2)
176+
177+
def test_pass_none_type(self, datapath):
178+
# GH 58159
179+
f_path = datapath("io", "data", "excel", "test_none_type.xlsx")
180+
181+
with pd.ExcelFile(f_path) as excel:
182+
parsed = pd.read_excel(
183+
excel,
184+
sheet_name="Sheet1",
185+
keep_default_na=True,
186+
na_values=["nan", "None", "abcd"],
187+
dtype="boolean",
188+
engine="openpyxl",
189+
)
190+
expected = DataFrame(
191+
{"Test": [True, None, False, None, False, None, True]},
192+
dtype="boolean",
193+
)
194+
195+
tm.assert_frame_equal(parsed, expected)
196+
167197
@pytest.fixture(autouse=True)
168198
def cd_and_set_engine(self, engine, datapath, monkeypatch):
169199
"""

0 commit comments

Comments
 (0)