diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 18f394b8e549b..462873d4e88b0 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -267,6 +267,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.api.types.is_integer \
pandas.api.types.pandas_dtype \
pandas.read_clipboard \
+ pandas.ExcelFile \
pandas.ExcelFile.parse \
pandas.DataFrame.to_html \
pandas.io.formats.style.Styler.to_html \
diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
index 8ae71452874f7..e9f986e37ca86 100644
--- a/doc/source/reference/indexing.rst
+++ b/doc/source/reference/indexing.rst
@@ -299,6 +299,9 @@ MultiIndex components
MultiIndex.reorder_levels
MultiIndex.remove_unused_levels
MultiIndex.drop
+ MultiIndex.copy
+ MultiIndex.append
+ MultiIndex.truncate
MultiIndex selecting
~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst
index 425b5f81be966..fbd0f6bd200b9 100644
--- a/doc/source/reference/io.rst
+++ b/doc/source/reference/io.rst
@@ -40,6 +40,9 @@ Excel
read_excel
DataFrame.to_excel
+ ExcelFile
+ ExcelFile.book
+ ExcelFile.sheet_names
ExcelFile.parse
.. currentmodule:: pandas.io.formats.style
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index b167b7e811d98..badf3f0f68627 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -361,7 +361,6 @@ Other enhancements
- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.DataFrameGroupBy.var`, :meth:`.SeriesGroupBy.var`, :meth:`.DataFrameGroupBy.std`, :meth:`.SeriesGroupBy.std`, :meth:`.DataFrameGroupBy.sem`, :meth:`.SeriesGroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`)
- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`)
- Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`)
-- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
- Added ``numeric_only`` argument to :meth:`.Resampler.sum`, :meth:`.Resampler.prod`, :meth:`.Resampler.min`, :meth:`.Resampler.max`, :meth:`.Resampler.first`, and :meth:`.Resampler.last` (:issue:`46442`)
- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`)
@@ -1014,7 +1013,6 @@ Performance improvements
- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`)
- Performance improvement to :func:`read_sas` (:issue:`47404`)
- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`)
--
.. ---------------------------------------------------------------------------
.. _whatsnew_150.bug_fixes:
@@ -1041,19 +1039,16 @@ Datetimelike
- Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`)
- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`)
--
Timedelta
^^^^^^^^^
- Bug in :func:`astype_nansafe` astype("timedelta64[ns]") fails when np.nan is included (:issue:`45798`)
- Bug in constructing a :class:`Timedelta` with a ``np.timedelta64`` object and a ``unit`` sometimes silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`46827`)
- Bug in constructing a :class:`Timedelta` from a large integer or float with ``unit="W"`` silently overflowing and returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`47268`)
--
Time Zones
^^^^^^^^^^
- Bug in :class:`Timestamp` constructor raising when passed a ``ZoneInfo`` tzinfo object (:issue:`46425`)
--
Numeric
^^^^^^^
@@ -1078,13 +1073,11 @@ Conversion
- Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`)
- Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`)
- Bug in :meth:`DataFrame.eval` when pandas objects (e.g. ``'Timestamp'``) were column names (:issue:`44603`)
--
Strings
^^^^^^^
- Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`)
- Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`)
--
Interval
^^^^^^^^
@@ -1192,7 +1185,6 @@ Period
- Bug in adding ``np.timedelta64("NaT", "ns")`` to a :class:`Period` with a timedelta-like freq incorrectly raising ``IncompatibleFrequency`` instead of returning ``NaT`` (:issue:`47196`)
- Bug in adding an array of integers to an array with :class:`PeriodDtype` giving incorrect results when ``dtype.freq.n > 1`` (:issue:`47209`)
- Bug in subtracting a :class:`Period` from an array with :class:`PeriodDtype` returning incorrect results instead of raising ``OverflowError`` when the operation overflows (:issue:`47538`)
--
Plotting
^^^^^^^^
@@ -1236,7 +1228,6 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list which misses the resample key (:issue:`47362`)
- Bug in :meth:`DataFrame.groupby` would lose index columns when the DataFrame is empty for transforms, like fillna (:issue:`47787`)
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` with ``dropna=False`` and ``sort=False`` would put any null groups at the end instead the order that they are encountered (:issue:`46584`)
--
Reshaping
^^^^^^^^^
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 70c60401f29fb..e14b30e2b71b4 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -46,7 +46,7 @@ Previously it was only possible to use ``int64``, ``uint64`` & ``float64`` dtype
In [3]: pd.Index([1, 2, 3], dtype=np.float32)
Out[3]: Float64Index([1.0, 2.0, 3.0], dtype="float64")
-:class:`Int64Index`, :class:`UInt64Index` & :class:`Float64Index` were depreciated in pandas
+:class:`Int64Index`, :class:`UInt64Index` & :class:`Float64Index` were deprecated in pandas
version 1.4 and have now been removed. Instead :class:`Index` should be used directly, and
can it now take all numpy numeric dtypes, i.e.
``int8``/ ``int16``/``int32``/``int64``/``uint8``/``uint16``/``uint32``/``uint64``/``float32``/``float64`` dtypes:
@@ -57,7 +57,7 @@ can it now take all numpy numeric dtypes, i.e.
pd.Index([1, 2, 3], dtype=np.uint16)
pd.Index([1, 2, 3], dtype=np.float32)
-The ability for ``Index`` to hold the numpy numeric dtypes has meant some changes in Pandas
+The ability for :class:`Index` to hold the numpy numeric dtypes has meant some changes in Pandas
functionality. In particular, operations that previously were forced to create 64-bit indexes,
can now create indexes with lower bit sizes, e.g. 32-bit indexes.
@@ -246,11 +246,15 @@ Copy-on-Write improvements
can never update the original Series or DataFrame. Therefore, an informative
error is raised to the user instead of silently doing nothing (:issue:`49467`)
-Copy-on-Write can be enabled through
+Copy-on-Write can be enabled through one of
.. code-block:: python
pd.set_option("mode.copy_on_write", True)
+
+
+.. code-block:: python
+
pd.options.mode.copy_on_write = True
Alternatively, copy on write can be enabled locally through:
@@ -281,7 +285,7 @@ Other enhancements
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
- Improve exception message when using :func:`.testing.assert_frame_equal` on a :class:`DataFrame` to include the column that is compared (:issue:`50323`)
- Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`)
-- Added support for extension array dtypes to :func:`get_dummies` (:func:`32430`)
+- Added support for extension array dtypes to :func:`get_dummies` (:issue:`32430`)
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
@@ -687,18 +691,18 @@ In the past, :func:`to_datetime` guessed the format for each element independent
*Old behavior*:
- .. code-block:: ipython
+.. code-block:: ipython
- In [1]: ser = pd.Series(['13-01-2000', '12-01-2000'])
- In [2]: pd.to_datetime(ser)
- Out[2]:
- 0 2000-01-13
- 1 2000-12-01
- dtype: datetime64[ns]
+ In [1]: ser = pd.Series(['13-01-2000', '12-01-2000'])
+ In [2]: pd.to_datetime(ser)
+ Out[2]:
+ 0 2000-01-13
+ 1 2000-12-01
+ dtype: datetime64[ns]
*New behavior*:
- .. ipython:: python
+.. ipython:: python
:okwarning:
ser = pd.Series(['13-01-2000', '12-01-2000'])
@@ -736,7 +740,7 @@ Other API changes
- :func:`pandas.testing.assert_index_equal` with parameter ``exact="equiv"`` now considers two indexes equal when both are either a :class:`RangeIndex` or :class:`Index` with an ``int64`` dtype. Previously it meant either a :class:`RangeIndex` or a :class:`Int64Index` (:issue:`51098`)
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`, :issue:`50453`)
-- :func:`api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
+- :func:`pandas.api.types.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
@@ -779,7 +783,7 @@ Deprecations
- :meth:`Index.is_numeric` has been deprecated. Use :func:`pandas.api.types.is_any_real_numeric_dtype` instead (:issue:`50042`,:issue:`51152`)
- :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`)
- :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`)
-- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`)
+- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_interval_dtype` instead (:issue:`50042`)
- Deprecated ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes, use e.g. ``(obj != pd.Timestamp(0), tz=obj.tz).all()`` instead (:issue:`34479`)
- Deprecated unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` (:issue:`50977`)
- Deprecated calling ``float`` or ``int`` on a single element :class:`Series` to return a ``float`` or ``int`` respectively. Extract the element before calling ``float`` or ``int`` instead (:issue:`51101`)
@@ -1162,7 +1166,7 @@ Numeric
- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
- Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`)
-- Bug in :meth:`query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`)
+- Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`)
Conversion
^^^^^^^^^^
@@ -1182,7 +1186,7 @@ Conversion
Strings
^^^^^^^
-- Bug in :func:`pandas.api.dtypes.is_string_dtype` that would not return ``True`` for :class:`StringDtype` or :class:`ArrowDtype` with ``pyarrow.string()`` (:issue:`15585`)
+- Bug in :func:`pandas.api.types.is_string_dtype` that would not return ``True`` for :class:`StringDtype` or :class:`ArrowDtype` with ``pyarrow.string()`` (:issue:`15585`)
- Bug in converting string dtypes to "datetime64[ns]" or "timedelta64[ns]" incorrectly raising ``TypeError`` (:issue:`36153`)
-
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 19c17fd0a4358..95f35eabb342e 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1148,8 +1148,9 @@ def copy( # type: ignore[override]
name=None,
):
"""
- Make a copy of this object. Names, dtype, levels and codes can be
- passed and will be set on new copy.
+ Make a copy of this object.
+
+ Names, dtype, levels and codes can be passed and will be set on new copy.
Parameters
----------
@@ -1167,6 +1168,16 @@ def copy( # type: ignore[override]
In most cases, there should be no functional difference from using
``deep``, but if ``deep`` is passed it will attempt to deepcopy.
This could be potentially expensive on large MultiIndex objects.
+
+ Examples
+ --------
+ >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
+ >>> mi
+ MultiIndex([('a', 'b', 'c')],
+ )
+ >>> mi.copy()
+ MultiIndex([('a', 'b', 'c')],
+ )
"""
names = self._validate_names(name=name, names=names, deep=deep)
keep_id = not deep
@@ -2085,7 +2096,7 @@ def take(
def append(self, other):
"""
- Append a collection of Index options together
+ Append a collection of Index options together.
Parameters
----------
@@ -2093,7 +2104,18 @@ def append(self, other):
Returns
-------
- appended : Index
+ Index
+ The combined index.
+
+ Examples
+ --------
+ >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']])
+ >>> mi
+ MultiIndex([('a', 'b')],
+ )
+ >>> mi.append(mi)
+ MultiIndex([('a', 'b'), ('a', 'b')],
+ )
"""
if not isinstance(other, (list, tuple)):
other = [other]
@@ -3397,18 +3419,29 @@ def _reorder_indexer(
def truncate(self, before=None, after=None) -> MultiIndex:
"""
- Slice index between two labels / tuples, return new MultiIndex
+ Slice index between two labels / tuples, return new MultiIndex.
Parameters
----------
before : label or tuple, can be partial. Default None
- None defaults to start
+ None defaults to start.
after : label or tuple, can be partial. Default None
- None defaults to end
+ None defaults to end.
Returns
-------
- truncated : MultiIndex
+ MultiIndex
+ The truncated MultiIndex.
+
+ Examples
+ --------
+ >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']])
+ >>> mi
+ MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')],
+ )
+ >>> mi.truncate(before='a', after='b')
+ MultiIndex([('a', 'x'), ('b', 'y')],
+ )
"""
if after and before and after < before:
raise ValueError("after < before")
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 2a0d3a01d3383..79d174db5c0a7 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1413,14 +1413,14 @@ def inspect_excel_format(
class ExcelFile:
"""
- Class for parsing tabular excel sheets into DataFrame objects.
+ Class for parsing tabular Excel sheets into DataFrame objects.
See read_excel for more documentation.
Parameters
----------
path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath),
- a file-like object, xlrd workbook or openpyxl workbook.
+ A file-like object, xlrd workbook or openpyxl workbook.
If a string or path object, expected to be a path to a
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
engine : str, default None
@@ -1448,6 +1448,7 @@ class ExcelFile:
`pyxlsb `_ will be used.
.. versionadded:: 1.3.0
+
- Otherwise if `openpyxl `_ is installed,
then ``openpyxl`` will be used.
- Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised.
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 615d03589047c..c3d7cb5df717f 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -27,21 +27,20 @@
@doc(storage_options=_shared_docs["storage_options"])
class ODFReader(BaseExcelReader):
- """
- Read tables out of OpenDocument formatted files.
-
- Parameters
- ----------
- filepath_or_buffer : str, path to be parsed or
- an open readable stream.
- {storage_options}
- """
-
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
storage_options: StorageOptions = None,
) -> None:
+ """
+ Read tables out of OpenDocument formatted files.
+
+ Parameters
+ ----------
+ filepath_or_buffer : str, path to be parsed or
+ an open readable stream.
+ {storage_options}
+ """
import_optional_dependency("odf")
super().__init__(filepath_or_buffer, storage_options=storage_options)