Skip to content

Commit b18982c

Browse files
ahawrylukTLouf
authored andcommitted
DEPR: Deprecate convert_float (pandas-dev#41176)
1 parent ae3577d commit b18982c

File tree

5 files changed

+57
-36
lines changed

5 files changed

+57
-36
lines changed

doc/source/user_guide/io.rst

-9
Original file line numberDiff line numberDiff line change
@@ -3684,15 +3684,6 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`.
36843684
df1.to_excel(writer, sheet_name="Sheet1")
36853685
df2.to_excel(writer, sheet_name="Sheet2")
36863686
3687-
.. note::
3688-
3689-
Wringing a little more performance out of ``read_excel``
3690-
Internally, Excel stores all numeric data as floats. Because this can
3691-
produce unexpected behavior when reading in data, pandas defaults to trying
3692-
to convert integers to floats if it doesn't lose information (``1.0 -->
3693-
1``). You can pass ``convert_float=False`` to disable this behavior, which
3694-
may give a slight performance improvement.
3695-
36963687
.. _io.excel_writing_buffer:
36973688

36983689
Writing Excel files to memory

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,7 @@ Deprecations
676676
- The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`)
677677
- Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`)
678678
- Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
679+
- Deprecated the ``convert_float`` optional argument in :func:`read_excel` and :meth:`ExcelFile.parse` (:issue:`41127`)
679680
- Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
680681
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
681682
- Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`)

pandas/io/excel/_base.py

+19-14
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import abc
44
import datetime
5-
import inspect
65
from io import BytesIO
76
import os
87
from textwrap import fill
@@ -33,6 +32,7 @@
3332
deprecate_nonkeyword_arguments,
3433
doc,
3534
)
35+
from pandas.util._exceptions import find_stack_level
3636

3737
from pandas.core.dtypes.common import (
3838
is_bool,
@@ -245,6 +245,10 @@
245245
Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
246246
data will be read in as floats: Excel stores all numbers as floats
247247
internally.
248+
249+
.. deprecated:: 1.3.0
250+
convert_float will be removed in a future version
251+
248252
mangle_dupe_cols : bool, default True
249253
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
250254
'X'...'X'. Passing in False will cause data to be overwritten if there
@@ -355,7 +359,7 @@ def read_excel(
355359
thousands=None,
356360
comment=None,
357361
skipfooter=0,
358-
convert_float=True,
362+
convert_float=None,
359363
mangle_dupe_cols=True,
360364
storage_options: StorageOptions = None,
361365
):
@@ -489,11 +493,21 @@ def parse(
489493
thousands=None,
490494
comment=None,
491495
skipfooter=0,
492-
convert_float=True,
496+
convert_float=None,
493497
mangle_dupe_cols=True,
494498
**kwds,
495499
):
496500

501+
if convert_float is None:
502+
convert_float = True
503+
else:
504+
stacklevel = find_stack_level()
505+
warnings.warn(
506+
"convert_float is deprecated and will be removed in a future version",
507+
FutureWarning,
508+
stacklevel=stacklevel,
509+
)
510+
497511
validate_header_arg(header)
498512

499513
ret_dict = False
@@ -1206,16 +1220,7 @@ def __init__(
12061220
f"only the xls format is supported. Install openpyxl instead."
12071221
)
12081222
elif ext and ext != "xls":
1209-
caller = inspect.stack()[1]
1210-
if (
1211-
caller.filename.endswith(
1212-
os.path.join("pandas", "io", "excel", "_base.py")
1213-
)
1214-
and caller.function == "read_excel"
1215-
):
1216-
stacklevel = 4
1217-
else:
1218-
stacklevel = 2
1223+
stacklevel = find_stack_level()
12191224
warnings.warn(
12201225
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
12211226
f"only the xls format is supported. Install "
@@ -1251,7 +1256,7 @@ def parse(
12511256
thousands=None,
12521257
comment=None,
12531258
skipfooter=0,
1254-
convert_float=True,
1259+
convert_float=None,
12551260
mangle_dupe_cols=True,
12561261
**kwds,
12571262
):

pandas/tests/io/excel/test_readers.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,17 @@ def test_reader_special_dtypes(self, request, read_ext):
434434
float_expected = expected.copy()
435435
float_expected["IntCol"] = float_expected["IntCol"].astype(float)
436436
float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0
437-
actual = pd.read_excel(
438-
basename + read_ext, sheet_name="Sheet1", convert_float=False
439-
)
437+
with tm.assert_produces_warning(
438+
FutureWarning,
439+
match="convert_float is deprecated",
440+
raise_on_extra_warnings=False,
441+
):
442+
# raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning
443+
# on database job Linux_py37_IO (ci/deps/actions-37-db.yaml)
444+
# See GH#41176
445+
actual = pd.read_excel(
446+
basename + read_ext, sheet_name="Sheet1", convert_float=False
447+
)
440448
tm.assert_frame_equal(actual, float_expected)
441449

442450
# check setting Index (assuming xls and xlsx are the same here)
@@ -456,12 +464,20 @@ def test_reader_special_dtypes(self, request, read_ext):
456464

457465
no_convert_float = float_expected.copy()
458466
no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str)
459-
actual = pd.read_excel(
460-
basename + read_ext,
461-
sheet_name="Sheet1",
462-
convert_float=False,
463-
converters={"StrCol": str},
464-
)
467+
with tm.assert_produces_warning(
468+
FutureWarning,
469+
match="convert_float is deprecated",
470+
raise_on_extra_warnings=False,
471+
):
472+
# raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning
473+
# on database job Linux_py37_IO (ci/deps/actions-37-db.yaml)
474+
# See GH#41176
475+
actual = pd.read_excel(
476+
basename + read_ext,
477+
sheet_name="Sheet1",
478+
convert_float=False,
479+
converters={"StrCol": str},
480+
)
465481
tm.assert_frame_equal(actual, no_convert_float)
466482

467483
# GH8212 - support for converters and missing values

pandas/tests/io/excel/test_writers.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,12 @@ def test_int_types(self, np_type, path):
474474
float_frame = df.astype(float)
475475
float_frame.columns = float_frame.columns.astype(float)
476476
float_frame.index = float_frame.index.astype(float)
477-
recons = pd.read_excel(
478-
path, sheet_name="test1", convert_float=False, index_col=0
479-
)
477+
with tm.assert_produces_warning(
478+
FutureWarning, match="convert_float is deprecated"
479+
):
480+
recons = pd.read_excel(
481+
path, sheet_name="test1", convert_float=False, index_col=0
482+
)
480483
tm.assert_frame_equal(recons, float_frame)
481484

482485
@pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64])
@@ -1293,7 +1296,12 @@ def test_merged_cell_custom_objects(self, merge_cells, path):
12931296
)
12941297
expected = DataFrame(np.ones((2, 2)), columns=mi)
12951298
expected.to_excel(path)
1296-
result = pd.read_excel(path, header=[0, 1], index_col=0, convert_float=False)
1299+
with tm.assert_produces_warning(
1300+
FutureWarning, match="convert_float is deprecated"
1301+
):
1302+
result = pd.read_excel(
1303+
path, header=[0, 1], index_col=0, convert_float=False
1304+
)
12971305
# need to convert PeriodIndexes to standard Indexes for assert equal
12981306
expected.columns = expected.columns.set_levels(
12991307
[[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],

0 commit comments

Comments
 (0)