Skip to content

Commit d834750

Browse files
nikitavedMarco Gorelli
authored and
Marco Gorelli
committed
initial format support
Co-Authored-By: MarcoGorelli <> Co-Authored-By: FDRocha <>
1 parent 1e5fee8 commit d834750

File tree

107 files changed

+651
-1403
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+651
-1403
lines changed

asv_bench/asv.conf.json

-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"openpyxl": [],
5555
"xlsxwriter": [],
5656
"xlrd": [],
57-
"xlwt": [],
5857
"odfpy": [],
5958
"jinja2": [],
6059
},

asv_bench/benchmarks/io/excel.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _generate_dataframe():
3333

3434
class WriteExcel:
3535

36-
params = ["openpyxl", "xlsxwriter", "xlwt"]
36+
params = ["openpyxl", "xlsxwriter"]
3737
param_names = ["engine"]
3838

3939
def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
6868

6969
class ReadExcel:
7070

71-
params = ["xlrd", "openpyxl", "odf"]
71+
params = ["openpyxl", "odf"]
7272
param_names = ["engine"]
7373
fname_excel = "spreadsheet.xlsx"
74-
fname_excel_xls = "spreadsheet.xls"
7574
fname_odf = "spreadsheet.ods"
7675

7776
def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
9291
self.df = _generate_dataframe()
9392

9493
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
95-
self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
9694
self._create_odf()
9795

9896
def time_read_excel(self, engine):
99-
if engine == "xlrd":
100-
fname = self.fname_excel_xls
101-
elif engine == "odf":
97+
if engine == "odf":
10298
fname = self.fname_odf
10399
else:
104100
fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
107103

108104
class ReadExcelNRows(ReadExcel):
109105
def time_read_excel(self, engine):
110-
if engine == "xlrd":
111-
fname = self.fname_excel_xls
112-
elif engine == "odf":
106+
if engine == "odf":
113107
fname = self.fname_odf
114108
else:
115109
fname = self.fname_excel

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import pandas
4747
4848
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
4949
'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
50-
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
50+
'tables', 'urllib.request', 'xlrd', 'xlsxwriter'}
5151
5252
# GH#28227 for some of these check for top-level modules, while others are
5353
# more specific (e.g. urllib.request)

ci/deps/actions-310.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/actions-38-downstream_compat.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard
5655

5756
# downstream packages

ci/deps/actions-38-minimum_versions.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,4 @@ dependencies:
5353
- xarray=0.19.0
5454
- xlrd=2.0.1
5555
- xlsxwriter=1.4.3
56-
- xlwt=1.3.0
5756
- zstandard=0.15.2

ci/deps/actions-38.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,4 @@ dependencies:
5050
- xarray
5151
- xlrd
5252
- xlsxwriter
53-
- xlwt
5453
- zstandard

ci/deps/actions-39.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

ci/deps/circle-38-arm64.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,4 @@ dependencies:
5151
- xarray
5252
- xlrd
5353
- xlsxwriter
54-
- xlwt
5554
- zstandard

doc/source/getting_started/install.rst

-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ Can be managed as optional_extra with ``pandas[excel]``.
336336
Dependency Minimum Version optional_extra Notes
337337
========================= ================== =============== =============================================================
338338
xlrd 2.0.1 excel Reading Excel
339-
xlwt 1.3.0 excel Writing Excel
340339
xlsxwriter 1.4.3 excel Writing Excel
341340
openpyxl 3.0.7 excel Reading / writing for xlsx files
342341
pyxlsb 1.0.8 excel Reading for xlsb files

doc/source/user_guide/io.rst

+2-19
Original file line numberDiff line numberDiff line change
@@ -3466,8 +3466,6 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
34663466

34673467
.. warning::
34683468

3469-
The `xlwt <https://xlwt.readthedocs.io/en/latest/>`__ package for writing old-style ``.xls``
3470-
excel files is no longer maintained.
34713469
The `xlrd <https://xlrd.readthedocs.io/en/latest/>`__ package is now only for reading
34723470
old-style ``.xls`` files.
34733471

@@ -3481,12 +3479,6 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
34813479
**Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.**
34823480
This is no longer supported, switch to using ``openpyxl`` instead.
34833481

3484-
Attempting to use the ``xlwt`` engine will raise a ``FutureWarning``
3485-
unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``.
3486-
While this option is now deprecated and will also raise a ``FutureWarning``,
3487-
it can be globally set and the warning suppressed. Users are recommended to
3488-
write ``.xlsx`` files using the ``openpyxl`` engine instead.
3489-
34903482
.. _io.excel_reader:
34913483

34923484
Reading Excel files
@@ -3788,7 +3780,7 @@ written. For example:
37883780
37893781
df.to_excel("path_to_file.xlsx", sheet_name="Sheet1")
37903782
3791-
Files with a ``.xls`` extension will be written using ``xlwt`` and those with a
3783+
Files with a
37923784
``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or
37933785
``openpyxl``.
37943786

@@ -3849,35 +3841,26 @@ pandas supports writing Excel files to buffer-like objects such as ``StringIO``
38493841
Excel writer engines
38503842
''''''''''''''''''''
38513843

3852-
.. deprecated:: 1.2.0
3853-
3854-
As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer
3855-
maintained, the ``xlwt`` engine will be removed from a future version
3856-
of pandas. This is the only engine in pandas that supports writing to
3857-
``.xls`` files.
3858-
38593844
pandas chooses an Excel writer via two methods:
38603845

38613846
1. the ``engine`` keyword argument
38623847
2. the filename extension (via the default specified in config options)
38633848

38643849
By default, pandas uses the `XlsxWriter`_ for ``.xlsx``, `openpyxl`_
3865-
for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple
3850+
for ``.xlsm``. If you have multiple
38663851
engines installed, you can set the default engine through :ref:`setting the
38673852
config options <options>` ``io.excel.xlsx.writer`` and
38683853
``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx``
38693854
files if `Xlsxwriter`_ is not available.
38703855

38713856
.. _XlsxWriter: https://xlsxwriter.readthedocs.io
38723857
.. _openpyxl: https://openpyxl.readthedocs.io/
3873-
.. _xlwt: http://www.python-excel.org
38743858

38753859
To specify which writer you want to use, you can pass an engine keyword
38763860
argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
38773861

38783862
* ``openpyxl``: version 2.4 or higher is required
38793863
* ``xlsxwriter``
3880-
* ``xlwt``
38813864

38823865
.. code-block:: python
38833866

doc/source/user_guide/missing_data.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ at the new values.
472472
473473
.. _scipy: https://scipy.org/
474474
.. _documentation: https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation
475-
.. _guide: https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
475+
.. _guide: https://docs.scipy.org/doc/scipy/tutorial/interpolate.html
476476

477477
.. _missing_data.interp_limits:
478478

doc/source/whatsnew/v2.0.0.rst

+21-1
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,24 @@ Removal of prior version deprecations/changes
192192
- Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`)
193193
- Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`)
194194
- Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`)
195+
- Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`)
195196
- Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`)
196197
- Disallow passing non-round floats to :class:`Timestamp` with ``unit="M"`` or ``unit="Y"`` (:issue:`47266`)
197198
- Remove keywords ``convert_float`` and ``mangle_dupe_cols`` from :func:`read_excel` (:issue:`41176`)
198199
- Disallow passing non-keyword arguments to :func:`read_excel` except ``io`` and ``sheet_name`` (:issue:`34418`)
200+
- Disallow passing non-keyword arguments to :meth:`DataFrame.drop_duplicates` except for ``subset`` (:issue:`41485`)
201+
- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` (:issue:`41506`)
202+
- Disallow passing non-keyword arguments to :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` except for ``method`` (:issue:`41510`)
203+
- Disallow passing non-keyword arguments to :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44896`)
204+
- Disallow passing non-keyword arguments to :meth:`Index.set_names` except for ``names`` (:issue:`41551`)
205+
- Disallow passing non-keyword arguments to :meth:`Index.join` except for ``other`` (:issue:`46518`)
206+
- Disallow passing non-keyword arguments to :func:`concat` except for ``objs`` (:issue:`41485`)
207+
- Disallow passing non-keyword arguments to :func:`pivot` except for ``data`` (:issue:`48301`)
208+
- Disallow passing non-keyword arguments to :meth:`DataFrame.pivot` (:issue:`48301`)
209+
- Disallow passing non-keyword arguments to :func:`read_json` except for ``path_or_buf`` (:issue:`27573`)
210+
- Disallow passing non-keyword arguments to :func:`read_sas` except for ``filepath_or_buffer`` (:issue:`47154`)
211+
- Disallow passing non-keyword arguments to :func:`read_stata` except for ``filepath_or_buffer`` (:issue:`48128`)
212+
- Disallow passing non-keyword arguments to :func:`read_xml` except for ``path_or_buffer`` (:issue:`45133`)
199213
- Disallow passing non-keyword arguments to :meth:`Series.mask` and :meth:`DataFrame.mask` except ``cond`` and ``other`` (:issue:`41580`)
200214
- Disallow passing non-keyword arguments to :meth:`DataFrame.to_stata` except for ``path`` (:issue:`48128`)
201215
- Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`)
@@ -228,16 +242,21 @@ Removal of prior version deprecations/changes
228242
- Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`)
229243
- Removed :meth:`Series.str.__iter__` (:issue:`28277`)
230244
- Removed ``pandas.SparseArray`` in favor of :class:`arrays.SparseArray` (:issue:`30642`)
231-
- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame`` (:issue:`30642`)
245+
- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`)
232246
- Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
233247
- Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
234248
- Removed setting Categorical._codes directly (:issue:`41429`)
235249
- Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`)
236250
- Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`)
251+
- Enforced disallowing indexing a :class:`Series` with a single item list with a slice (e.g. ``ser[[slice(0, 2)]]``). Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
237252
- Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`)
238253
- Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
239254
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
240255
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
256+
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
257+
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
258+
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
259+
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
241260
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
242261

243262
.. ---------------------------------------------------------------------------
@@ -314,6 +333,7 @@ Conversion
314333
- Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`)
315334
- Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
316335
- Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
336+
- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
317337

318338
Strings
319339
^^^^^^^

environment.yml

-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ dependencies:
5353
- xarray
5454
- xlrd
5555
- xlsxwriter
56-
- xlwt
5756
- zstandard
5857

5958
# downstream packages

pandas/_libs/lib.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def ensure_string_array(
158158
) -> npt.NDArray[np.object_]: ...
159159
def infer_datetimelike_array(
160160
arr: npt.NDArray[np.object_],
161-
) -> tuple[str, bool]: ...
161+
) -> str: ...
162162
def convert_nans_to_NA(
163163
arr: npt.NDArray[np.object_],
164164
) -> npt.NDArray[np.object_]: ...

pandas/_libs/lib.pyx

+15-35
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ from pandas._libs.util cimport (
9595
is_nan,
9696
)
9797

98-
from pandas._libs.tslib import array_to_datetime
9998
from pandas._libs.tslibs import (
10099
OutOfBoundsDatetime,
101100
OutOfBoundsTimedelta,
@@ -1583,25 +1582,19 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
15831582
Returns
15841583
-------
15851584
str: {datetime, timedelta, date, nat, mixed}
1586-
bool
15871585
"""
15881586
cdef:
15891587
Py_ssize_t i, n = len(arr)
15901588
bint seen_timedelta = False, seen_date = False, seen_datetime = False
15911589
bint seen_tz_aware = False, seen_tz_naive = False
1592-
bint seen_nat = False, seen_str = False
1590+
bint seen_nat = False
15931591
bint seen_period = False, seen_interval = False
1594-
list objs = []
15951592
object v
15961593

15971594
for i in range(n):
15981595
v = arr[i]
15991596
if isinstance(v, str):
1600-
objs.append(v)
1601-
seen_str = True
1602-
1603-
if len(objs) == 3:
1604-
break
1597+
return "mixed"
16051598

16061599
elif v is None or util.is_nan(v):
16071600
# nan or None
@@ -1619,7 +1612,7 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
16191612
seen_tz_aware = True
16201613

16211614
if seen_tz_naive and seen_tz_aware:
1622-
return "mixed", seen_str
1615+
return "mixed"
16231616
elif util.is_datetime64_object(v):
16241617
# np.datetime64
16251618
seen_datetime = True
@@ -1635,43 +1628,30 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
16351628
seen_interval = True
16361629
break
16371630
else:
1638-
return "mixed", seen_str
1631+
return "mixed"
16391632

16401633
if seen_period:
16411634
if is_period_array(arr):
1642-
return "period", seen_str
1643-
return "mixed", seen_str
1635+
return "period"
1636+
return "mixed"
16441637

16451638
if seen_interval:
16461639
if is_interval_array(arr):
1647-
return "interval", seen_str
1648-
return "mixed", seen_str
1640+
return "interval"
1641+
return "mixed"
16491642

16501643
if seen_date and not (seen_datetime or seen_timedelta):
1651-
return "date", seen_str
1644+
return "date"
16521645
elif seen_datetime and not seen_timedelta:
1653-
return "datetime", seen_str
1646+
return "datetime"
16541647
elif seen_timedelta and not seen_datetime:
1655-
return "timedelta", seen_str
1648+
return "timedelta"
1649+
elif seen_datetime and seen_timedelta:
1650+
return "mixed"
16561651
elif seen_nat:
1657-
return "nat", seen_str
1652+
return "nat"
16581653

1659-
# short-circuit by trying to
1660-
# actually convert these strings
1661-
# this is for performance as we don't need to try
1662-
# convert *every* string array
1663-
if len(objs):
1664-
try:
1665-
# require_iso8601 as in maybe_infer_to_datetimelike
1666-
array_to_datetime(objs, errors="raise", require_iso8601=True)
1667-
return "datetime", seen_str
1668-
except (ValueError, TypeError):
1669-
pass
1670-
1671-
# we are *not* going to infer from strings
1672-
# for timedelta as too much ambiguity
1673-
1674-
return "mixed", seen_str
1654+
return "mixed"
16751655

16761656

16771657
cdef inline bint is_timedelta(object o):

0 commit comments

Comments
 (0)