Skip to content

Commit aa35a6f

Browse files
Merge branch 'main' into pandas.core.groupby.DataFrameGroupBy.agg
2 parents 1d97210 + b948821 commit aa35a6f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+544
-348
lines changed

ci/code_checks.sh

-15
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.RangeIndex.step SA01" \
7776
-i "pandas.Series.cat.add_categories PR01,PR02" \
7877
-i "pandas.Series.cat.as_ordered PR01" \
7978
-i "pandas.Series.cat.as_unordered PR01" \
@@ -97,14 +96,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9796
-i "pandas.Series.dt.tz_localize PR01,PR02" \
9897
-i "pandas.Series.dt.unit GL08" \
9998
-i "pandas.Series.pad PR01,SA01" \
100-
-i "pandas.Series.sparse.fill_value SA01" \
10199
-i "pandas.Series.sparse.from_coo PR07,SA01" \
102100
-i "pandas.Series.sparse.npoints SA01" \
103-
-i "pandas.Series.sparse.sp_values SA01" \
104101
-i "pandas.Timedelta.max PR02" \
105102
-i "pandas.Timedelta.min PR02" \
106103
-i "pandas.Timedelta.resolution PR02" \
107-
-i "pandas.Timedelta.to_timedelta64 SA01" \
108104
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
109105
-i "pandas.Timestamp.max PR02" \
110106
-i "pandas.Timestamp.min PR02" \
@@ -115,25 +111,18 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
115111
-i "pandas.api.types.is_dict_like PR07,SA01" \
116112
-i "pandas.api.types.is_file_like PR07,SA01" \
117113
-i "pandas.api.types.is_float PR01,SA01" \
118-
-i "pandas.api.types.is_float_dtype SA01" \
119114
-i "pandas.api.types.is_hashable PR01,RT03,SA01" \
120-
-i "pandas.api.types.is_int64_dtype SA01" \
121115
-i "pandas.api.types.is_integer PR01,SA01" \
122-
-i "pandas.api.types.is_interval_dtype SA01" \
123116
-i "pandas.api.types.is_iterator PR07,SA01" \
124-
-i "pandas.api.types.is_list_like SA01" \
125117
-i "pandas.api.types.is_named_tuple PR07,SA01" \
126-
-i "pandas.api.types.is_object_dtype SA01" \
127118
-i "pandas.api.types.is_re PR07,SA01" \
128119
-i "pandas.api.types.is_re_compilable PR07,SA01" \
129120
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
130121
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
131-
-i "pandas.arrays.BooleanArray SA01" \
132122
-i "pandas.arrays.DatetimeArray SA01" \
133123
-i "pandas.arrays.IntegerArray SA01" \
134124
-i "pandas.arrays.IntervalArray.left SA01" \
135125
-i "pandas.arrays.IntervalArray.length SA01" \
136-
-i "pandas.arrays.IntervalArray.mid SA01" \
137126
-i "pandas.arrays.IntervalArray.right SA01" \
138127
-i "pandas.arrays.NumpyExtensionArray SA01" \
139128
-i "pandas.arrays.SparseArray PR07,SA01" \
@@ -161,7 +150,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
161150
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
162151
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
163152
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
164-
-i "pandas.core.resample.Resampler.ffill RT03" \
165153
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
166154
-i "pandas.core.resample.Resampler.groups SA01" \
167155
-i "pandas.core.resample.Resampler.indices SA01" \
@@ -176,15 +164,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
176164
-i "pandas.core.resample.Resampler.sum SA01" \
177165
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
178166
-i "pandas.core.resample.Resampler.var SA01" \
179-
-i "pandas.date_range RT03" \
180167
-i "pandas.errors.AttributeConflictWarning SA01" \
181168
-i "pandas.errors.CSSWarning SA01" \
182169
-i "pandas.errors.CategoricalConversionWarning SA01" \
183170
-i "pandas.errors.ChainedAssignmentError SA01" \
184171
-i "pandas.errors.ClosedFileError SA01" \
185172
-i "pandas.errors.DataError SA01" \
186173
-i "pandas.errors.DuplicateLabelError SA01" \
187-
-i "pandas.errors.EmptyDataError SA01" \
188174
-i "pandas.errors.IntCastingNaNError SA01" \
189175
-i "pandas.errors.InvalidIndexError SA01" \
190176
-i "pandas.errors.InvalidVersion SA01" \
@@ -366,7 +352,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
366352
-i "pandas.tseries.offsets.SemiMonthBegin.n GL08" \
367353
-i "pandas.tseries.offsets.SemiMonthBegin.normalize GL08" \
368354
-i "pandas.tseries.offsets.SemiMonthBegin.rule_code GL08" \
369-
-i "pandas.tseries.offsets.SemiMonthEnd SA01" \
370355
-i "pandas.tseries.offsets.SemiMonthEnd.day_of_month GL08" \
371356
-i "pandas.tseries.offsets.SemiMonthEnd.is_on_offset GL08" \
372357
-i "pandas.tseries.offsets.SemiMonthEnd.n GL08" \

doc/source/development/maintaining.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ in the next places:
344344
- Git repo with a `new tag <https://github.com/pandas-dev/pandas/tags>`_
345345
- Source distribution in a `GitHub release <https://github.com/pandas-dev/pandas/releases>`_
346346
- Pip packages in the `PyPI <https://pypi.org/project/pandas/>`_
347-
- Conda/Mamba packages in `conda-forge <https://anaconda.org/conda-forge/pandas>`_
347+
- Conda packages in `conda-forge <https://anaconda.org/conda-forge/pandas>`_
348348

349349
The process for releasing a new version of pandas is detailed next section.
350350

doc/source/getting_started/index.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ Installation
1717
:columns: 12 12 6 6
1818
:padding: 3
1919

20-
pandas is part of the `Anaconda <https://docs.continuum.io/anaconda/>`__
21-
distribution and can be installed with Anaconda or Miniconda:
20+
pandas can be installed via conda from `conda-forge <https://anaconda.org/conda-forge/pandas>`__.
2221

2322
++++++++++++++++++++++
2423

doc/source/getting_started/install.rst

+48-103
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
Installation
77
============
88

9-
The easiest way to install pandas is to install it
10-
as part of the `Anaconda <https://docs.continuum.io/free/anaconda/>`__ distribution, a
11-
cross platform distribution for data analysis and scientific computing.
12-
The `Conda <https://conda.io/en/latest/>`__ package manager is the
13-
recommended installation method for most users.
9+
The pandas development team officially distributes pandas for installation
10+
through the following methods:
1411

15-
Instructions for installing :ref:`from source <install.source>`,
16-
:ref:`PyPI <install.pypi>`, or a
17-
:ref:`development version <install.dev>` are also provided.
12+
* Available on `conda-forge <https://anaconda.org/conda-forge/pandas>`__ for installation with the conda package manager.
13+
* Available on `PyPI <https://pypi.org/project/pandas/>`__ for installation with pip.
14+
* Available on `Github <https://github.com/pandas-dev/pandas>`__ for installation from source.
15+
16+
.. note::
17+
pandas may be installable from other sources besides the ones listed above,
18+
but they are **not** managed by the pandas development team.
1819

1920
.. _install.version:
2021

@@ -26,68 +27,54 @@ See :ref:`Python support policy <policies.python_support>`.
2627
Installing pandas
2728
-----------------
2829

29-
.. _install.anaconda:
30+
.. _install.conda:
3031

31-
Installing with Anaconda
32-
~~~~~~~~~~~~~~~~~~~~~~~~
32+
Installing with Conda
33+
~~~~~~~~~~~~~~~~~~~~~
3334

34-
For users that are new to Python, the easiest way to install Python, pandas, and the
35-
packages that make up the `PyData <https://pydata.org/>`__ stack
36-
(`SciPy <https://scipy.org/>`__, `NumPy <https://numpy.org/>`__,
37-
`Matplotlib <https://matplotlib.org/>`__, `and more <https://docs.continuum.io/free/anaconda/reference/packages/pkg-docs/>`__)
38-
is with `Anaconda <https://docs.continuum.io/free/anaconda/>`__, a cross-platform
39-
(Linux, macOS, Windows) Python distribution for data analytics and
40-
scientific computing. Installation instructions for Anaconda
41-
`can be found here <https://docs.continuum.io/free/anaconda/install/>`__.
35+
For users working with the `Conda <https://conda.io/en/latest/>`__ package manager,
36+
pandas can be installed from the ``conda-forge`` channel.
4237

43-
.. _install.miniconda:
38+
.. code-block:: shell
4439
45-
Installing with Miniconda
46-
~~~~~~~~~~~~~~~~~~~~~~~~~
40+
conda install -c conda-forge pandas
4741
48-
For users experienced with Python, the recommended way to install pandas with
49-
`Miniconda <https://docs.conda.io/en/latest/miniconda.html>`__.
50-
Miniconda allows you to create a minimal, self-contained Python installation compared to Anaconda and use the
51-
`Conda <https://conda.io/en/latest/>`__ package manager to install additional packages
52-
and create a virtual environment for your installation. Installation instructions for Miniconda
53-
`can be found here <https://docs.conda.io/en/latest/miniconda.html>`__.
42+
To install the Conda package manager on your system, the
43+
`Miniforge distribution <https://github.com/conda-forge/miniforge?tab=readme-ov-file#install>`__
44+
is recommended.
5445

55-
The next step is to create a new conda environment. A conda environment is like a
56-
virtualenv that allows you to specify a specific version of Python and set of libraries.
57-
Run the following commands from a terminal window.
46+
Additionally, it is recommended to install and run pandas from a virtual environment.
5847

5948
.. code-block:: shell
6049
6150
conda create -c conda-forge -n name_of_my_env python pandas
62-
63-
This will create a minimal environment with only Python and pandas installed.
64-
To put your self inside this environment run.
65-
66-
.. code-block:: shell
67-
51+
# On Linux or MacOS
6852
source activate name_of_my_env
6953
# On Windows
7054
activate name_of_my_env
7155
72-
.. _install.pypi:
56+
.. tip::
57+
For users that are new to Python, the easiest way to install Python, pandas, and the
58+
packages that make up the `PyData <https://pydata.org/>`__ stack such as
59+
`SciPy <https://scipy.org/>`__, `NumPy <https://numpy.org/>`__ and
60+
`Matplotlib <https://matplotlib.org/>`__
61+
is with `Anaconda <https://docs.anaconda.com/anaconda/install/>`__, a cross-platform
62+
(Linux, macOS, Windows) Python distribution for data analytics and
63+
scientific computing.
7364

74-
Installing from PyPI
75-
~~~~~~~~~~~~~~~~~~~~
65+
However, pandas from Anaconda is **not** officially managed by the pandas development team.
7666

77-
pandas can be installed via pip from
78-
`PyPI <https://pypi.org/project/pandas>`__.
67+
.. _install.pip:
7968

80-
.. code-block:: shell
81-
82-
pip install pandas
69+
Installing with pip
70+
~~~~~~~~~~~~~~~~~~~
8371

84-
.. note::
85-
You must have ``pip>=19.3`` to install from PyPI.
72+
For users working with the `pip <https://pip.pypa.io/en/stable/>`__ package manager,
73+
pandas can be installed from `PyPI <https://pypi.org/project/pandas/>`__.
8674

87-
.. note::
75+
.. code-block:: shell
8876
89-
It is recommended to install and run pandas from a virtual environment, for example,
90-
using the Python standard library's `venv <https://docs.python.org/3/library/venv.html>`__
77+
pip install pandas
9178
9279
pandas can also be installed with sets of optional dependencies to enable certain functionality. For example,
9380
to install pandas with the optional dependencies to read Excel files.
@@ -98,25 +85,8 @@ to install pandas with the optional dependencies to read Excel files.
9885
9986
The full list of extras that can be installed can be found in the :ref:`dependency section.<install.optional_dependencies>`
10087

101-
Handling ImportErrors
102-
~~~~~~~~~~~~~~~~~~~~~
103-
104-
If you encounter an ``ImportError``, it usually means that Python couldn't find pandas in the list of available
105-
libraries. Python internally has a list of directories it searches through, to find packages. You can
106-
obtain these directories with.
107-
108-
.. code-block:: python
109-
110-
import sys
111-
sys.path
112-
113-
One way you could be encountering this error is if you have multiple Python installations on your system
114-
and you don't have pandas installed in the Python installation you're currently using.
115-
In Linux/Mac you can run ``which python`` on your terminal and it will tell you which Python installation you're
116-
using. If it's something like "/usr/bin/python", you're using the Python from the system, which is not recommended.
117-
118-
It is highly recommended to use ``conda``, for quick installation and for package and dependency updates.
119-
You can find simple installation instructions for pandas :ref:`in this document <install.miniconda>`.
88+
Additionally, it is recommended to install and run pandas from a virtual environment, for example,
89+
using the Python standard library's `venv <https://docs.python.org/3/library/venv.html>`__
12090

12191
.. _install.source:
12292

@@ -144,49 +114,24 @@ index from the PyPI registry of anaconda.org. You can install it by running.
144114
145115
pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas
146116
147-
Note that you might be required to uninstall an existing version of pandas to install the development version.
117+
.. note::
118+
You might be required to uninstall an existing version of pandas to install the development version.
148119

149-
.. code-block:: shell
120+
.. code-block:: shell
150121
151-
pip uninstall pandas -y
122+
pip uninstall pandas -y
152123
153124
Running the test suite
154125
----------------------
155126

156-
pandas is equipped with an exhaustive set of unit tests. The packages required to run the tests
157-
can be installed with ``pip install "pandas[test]"``. To run the tests from a
158-
Python terminal.
159-
160-
.. code-block:: python
161-
162-
>>> import pandas as pd
163-
>>> pd.test()
164-
running: pytest -m "not slow and not network and not db" /home/user/anaconda3/lib/python3.10/site-packages/pandas
165-
166-
============================= test session starts ==============================
167-
platform linux -- Python 3.9.7, pytest-6.2.5, py-1.11.0, pluggy-1.0.0
168-
rootdir: /home/user
169-
plugins: dash-1.19.0, anyio-3.5.0, hypothesis-6.29.3
170-
collected 154975 items / 4 skipped / 154971 selected
171-
........................................................................ [ 0%]
172-
........................................................................ [ 99%]
173-
....................................... [100%]
174-
175-
==================================== ERRORS ====================================
176-
177-
=================================== FAILURES ===================================
178-
179-
=============================== warnings summary ===============================
180-
181-
=========================== short test summary info ============================
182-
183-
= 1 failed, 146194 passed, 7402 skipped, 1367 xfailed, 5 xpassed, 197 warnings, 10 errors in 1090.16s (0:18:10) =
127+
If pandas has been installed :ref:`from source <install.source>`, running ``pytest pandas`` will run all of pandas unit tests.
184128

129+
The unit tests can also be run from the pandas module itself with the :func:`test` function. The packages required to run the tests
130+
can be installed with ``pip install "pandas[test]"``.
185131

186132
.. note::
187133

188-
This is just an example of what information is shown. Test failures are not necessarily indicative
189-
of a broken pandas installation.
134+
Test failures are not necessarily indicative of a broken pandas installation.
190135

191136
.. _install.dependencies:
192137

@@ -219,7 +164,7 @@ For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
219164
optional dependency is not installed, pandas will raise an ``ImportError`` when
220165
the method requiring that dependency is called.
221166

222-
If using pip, optional pandas dependencies can be installed or managed in a file (e.g. requirements.txt or pyproject.toml)
167+
With pip, optional pandas dependencies can be installed or managed in a file (e.g. requirements.txt or pyproject.toml)
223168
as optional extras (e.g. ``pandas[performance, aws]``). All optional dependencies can be installed with ``pandas[all]``,
224169
and specific sets of dependencies are listed in the sections below.
225170

doc/source/whatsnew/v3.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Other enhancements
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5656
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
57+
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
5758
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
5859
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
5960
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
@@ -619,6 +620,7 @@ I/O
619620
^^^
620621
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
621622
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
623+
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
622624
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
623625
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
624626
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)

pandas/_libs/lib.pyx

+14-1
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,14 @@ cpdef ndarray[object] ensure_string_array(
754754

755755
if hasattr(arr, "to_numpy"):
756756

757-
if hasattr(arr, "dtype") and arr.dtype.kind in "mM":
757+
if (
758+
hasattr(arr, "dtype")
759+
and arr.dtype.kind in "mM"
760+
# TODO: we should add a custom ArrowExtensionArray.astype implementation
761+
# that handles astype(str) specifically, avoiding ending up here and
762+
# then we can remove the below check for `_pa_array` (for ArrowEA)
763+
and not hasattr(arr, "_pa_array")
764+
):
758765
# dtype check to exclude DataFrame
759766
# GH#41409 TODO: not a great place for this
760767
out = arr.astype(str).astype(object)
@@ -1213,6 +1220,12 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:
12131220
bool
12141221
Whether `obj` has list-like properties.
12151222

1223+
See Also
1224+
--------
1225+
Series : One-dimensional ndarray with axis labels (including time series).
1226+
Index : Immutable sequence used for indexing and alignment.
1227+
numpy.ndarray : Array object from NumPy, which is considered list-like.
1228+
12161229
Examples
12171230
--------
12181231
>>> import datetime

pandas/_libs/tslibs/offsets.pyx

+12
Original file line numberDiff line numberDiff line change
@@ -3316,6 +3316,11 @@ cdef class SemiMonthEnd(SemiMonthOffset):
33163316
"""
33173317
Two DateOffset's per month repeating on the last day of the month & day_of_month.
33183318
3319+
This offset allows for flexibility in generating date ranges or adjusting dates
3320+
to the end of a month or a specific day in the month, such as the 15th or the last
3321+
day of the month. It is useful for financial or scheduling applications where
3322+
events occur bi-monthly.
3323+
33193324
Attributes
33203325
----------
33213326
n : int, default 1
@@ -3325,6 +3330,13 @@ cdef class SemiMonthEnd(SemiMonthOffset):
33253330
day_of_month : int, {1, 3,...,27}, default 15
33263331
A specific integer for the day of the month.
33273332
3333+
See Also
3334+
--------
3335+
tseries.offsets.SemiMonthBegin : Offset for semi-monthly frequencies, starting at
3336+
the beginning of the month.
3337+
tseries.offsets.MonthEnd : Offset to the last calendar day of the month.
3338+
tseries.offsets.MonthBegin : Offset to the first calendar day of the month.
3339+
33283340
Examples
33293341
--------
33303342
>>> ts = pd.Timestamp(2022, 1, 14)

0 commit comments

Comments
 (0)