diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index f594aae2c7f9f..ca8b8ca15ec47 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -24,6 +24,7 @@ Fixed regressions - Fixed regression in :func:`read_csv` with ``index_col=False`` identifying first row as index names when ``header=None`` (:issue:`46955`) - Fixed regression in :meth:`.DataFrameGroupBy.agg` when used with list-likes or dict-likes and ``axis=1`` that would give incorrect results; now raises ``NotImplementedError`` (:issue:`46995`) - Fixed regression in :meth:`DataFrame.resample` and :meth:`DataFrame.rolling` when used with list-likes or dict-likes and ``axis=1`` that would raise an unintuitive error message; now raises ``NotImplementedError`` (:issue:`46904`) +- Fixed regression in :func:`read_excel` returning ints as floats on certain input sheets (:issue:`46988`) - Fixed regression in :meth:`DataFrame.shift` when ``axis`` is ``columns`` and ``fill_value`` is absent, ``freq`` is ignored (:issue:`47039`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 27c03d4a74bc1..8ec24b35779a0 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -560,8 +560,14 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar: return "" # compat with xlrd elif cell.data_type == TYPE_ERROR: return np.nan - elif not convert_float and cell.data_type == TYPE_NUMERIC: - return float(cell.value) + elif cell.data_type == TYPE_NUMERIC: + # GH5394, GH46988 + if convert_float: + val = int(cell.value) + if val == cell.value: + return val + else: + return float(cell.value) return cell.value diff --git a/pandas/tests/io/data/excel/ints_spelled_with_decimals.xlsx b/pandas/tests/io/data/excel/ints_spelled_with_decimals.xlsx new file mode 100644 index 0000000000000..a667be86283b8 Binary files /dev/null and b/pandas/tests/io/data/excel/ints_spelled_with_decimals.xlsx differ diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index e0d4a0c12ecdf..e420b9e806f65 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -375,3 +375,11 @@ def test_read_empty_with_blank_row(datapath, ext, read_only): wb.close() expected = DataFrame() tm.assert_frame_equal(result, expected) + + +def test_ints_spelled_with_decimals(datapath, ext): + # GH 46988 - openpyxl returns this sheet with floats + path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}") + result = pd.read_excel(path) + expected = DataFrame(range(2, 12), columns=[1]) + tm.assert_frame_equal(result, expected)