-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: True cannot be cast to bool in read_excel #58994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
1fcbde6
f77c41d
ffb65d4
14e565e
873bc9b
e918066
7de150e
e9fe60c
2a2e8a3
561b6ff
614c880
c62571e
d871d79
306ea32
d11f77b
66d34f0
42285ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -297,7 +297,13 @@ class BooleanArray(BaseMaskedArray): | |
""" | ||
|
||
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} | ||
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} | ||
_FALSE_VALUES = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you undo the formatting here? |
||
"False", | ||
"FALSE", | ||
"false", | ||
"0", | ||
"0.0", | ||
} | ||
|
||
@classmethod | ||
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self: | ||
|
@@ -329,15 +335,21 @@ def _from_sequence_of_strings( | |
copy: bool = False, | ||
true_values: list[str] | None = None, | ||
false_values: list[str] | None = None, | ||
none_values: list[str] | None = None, | ||
) -> BooleanArray: | ||
true_values_union = cls._TRUE_VALUES.union(true_values or []) | ||
false_values_union = cls._FALSE_VALUES.union(false_values or []) | ||
|
||
def map_string(s) -> bool: | ||
if none_values is None: | ||
none_values = [] | ||
|
||
def map_string(s) -> bool | None: | ||
if s in true_values_union: | ||
return True | ||
elif s in false_values_union: | ||
return False | ||
elif s in none_values: | ||
return None | ||
else: | ||
raise ValueError(f"{s} cannot be cast to bool") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -745,11 +745,13 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi | |
if isinstance(cast_type, BooleanDtype): | ||
# error: Unexpected keyword argument "true_values" for | ||
# "_from_sequence_of_strings" of "ExtensionArray" | ||
values_str = [str(val) for val in values] | ||
return array_type._from_sequence_of_strings( # type: ignore[call-arg] | ||
values, | ||
values_str, | ||
dtype=cast_type, | ||
true_values=self.true_values, | ||
false_values=self.false_values, | ||
Comment on lines
+750
to
753
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I see what you're saying. You're suggesting to propagate I think that's a good idea. I'll make that change. That should allow me to get rid of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those params are already available in _cast_types since it's an instance method (so self.na_values and self.keep_default_na are available here, similar to self.true_values and self.false_values). I was thinking about progagating them to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep we're on the same page. Making those changes now |
||
none_values=self.na_values, | ||
) | ||
else: | ||
return array_type._from_sequence_of_strings(values, dtype=cast_type) | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -164,6 +164,35 @@ def xfail_datetimes_with_pyxlsb(engine, request): | |||||
|
||||||
|
||||||
class TestReaders: | ||||||
@pytest.mark.parametrize("col", [[True, None, False], [True], [True, False]]) | ||||||
def test_read_excel_type_check(self, col): | ||||||
# GH 58159 | ||||||
df = DataFrame({"bool_column": col}, dtype=pd.BooleanDtype.name) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
And could you change other usages here? |
||||||
df.to_excel("test-type.xlsx", index=False) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you use the |
||||||
df2 = pd.read_excel( | ||||||
"test-type.xlsx", | ||||||
dtype={"bool_column": pd.BooleanDtype.name}, | ||||||
engine="openpyxl", | ||||||
) | ||||||
tm.assert_frame_equal(df, df2) | ||||||
|
||||||
def test_pass_none_type(self): | ||||||
# GH 58159 | ||||||
with pd.ExcelFile("test7.xlsx") as excel: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you use the |
||||||
parsed = pd.read_excel( | ||||||
excel, | ||||||
sheet_name="Sheet1", | ||||||
keep_default_na=True, | ||||||
na_values=["nan", "None", "abcd"], | ||||||
dtype=pd.BooleanDtype.name, | ||||||
engine="openpyxl", | ||||||
) | ||||||
expected = DataFrame( | ||||||
{"Test": [True, None, False, None, False, None, True]}, | ||||||
dtype=pd.BooleanDtype.name, | ||||||
) | ||||||
tm.assert_frame_equal(parsed, expected) | ||||||
|
||||||
@pytest.fixture(autouse=True) | ||||||
def cd_and_set_engine(self, engine, datapath, monkeypatch): | ||||||
""" | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.