diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 358d9447b131d..6bb86a90e487a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -569,6 +569,7 @@ I/O - Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`) - Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`) - Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`) +- Bug in :func:`read_excel` when reading a ``.xlsx`` file with a boolean column that contains blank lines (:issue:`45903`) Period ^^^^^^ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 35b9de3f7af93..f7f276f5ed330 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -289,6 +289,7 @@ class BooleanArray(BaseMaskedArray): # Fill values used for any/all _truthy_value = True _falsey_value = False + _NONE_VALUES = {"nan", "NaN", "None", "NA", "null", "NULL"} _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} @@ -321,8 +322,8 @@ def _from_sequence_of_strings( false_values_union = cls._FALSE_VALUES.union(false_values or []) def map_string(s): - if isna(s): - return s + if isna(s) or s in cls._NONE_VALUES: + return None elif s in true_values_union: return True elif s in false_values_union: diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 2851ea36c8a33..b0562b06e9fea 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -785,7 +785,7 @@ def _cast_types(self, values, cast_type, column): try: if is_bool_dtype(cast_type): return array_type._from_sequence_of_strings( - values, + values.astype(str), dtype=cast_type, true_values=self.true_values, false_values=self.false_values, diff --git a/pandas/tests/io/data/excel/test_types.ods b/pandas/tests/io/data/excel/test_types.ods index c9a82bfff810b..881c89c1f42db 100644 Binary files a/pandas/tests/io/data/excel/test_types.ods and b/pandas/tests/io/data/excel/test_types.ods differ diff --git a/pandas/tests/io/data/excel/test_types.xls b/pandas/tests/io/data/excel/test_types.xls index 2d387603a8307..bae1dc4815dad 100644 Binary files a/pandas/tests/io/data/excel/test_types.xls and b/pandas/tests/io/data/excel/test_types.xls differ diff --git a/pandas/tests/io/data/excel/test_types.xlsb b/pandas/tests/io/data/excel/test_types.xlsb index e7403aa288263..9c5ecddb7dff7 100644 Binary files a/pandas/tests/io/data/excel/test_types.xlsb and b/pandas/tests/io/data/excel/test_types.xlsb differ diff --git a/pandas/tests/io/data/excel/test_types.xlsm b/pandas/tests/io/data/excel/test_types.xlsm index 4c8c10e40effe..0da5807c66b7e 100644 Binary files a/pandas/tests/io/data/excel/test_types.xlsm and b/pandas/tests/io/data/excel/test_types.xlsm differ diff --git a/pandas/tests/io/data/excel/test_types.xlsx b/pandas/tests/io/data/excel/test_types.xlsx index 561bb2c5d6714..b6f3ae43b8060 100644 Binary files a/pandas/tests/io/data/excel/test_types.xlsx and b/pandas/tests/io/data/excel/test_types.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 1e0f74ea41453..2600f674fc6e7 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -420,8 +420,10 @@ def test_reader_special_dtypes(self, request, read_ext): datetime(2013, 12, 14), datetime(2015, 3, 14), ], + "BoolColWithBlank": [True, False, None, True, False], }, ) + basename = "test_types" # should read in correctly and infer types