Skip to content

Commit 3f67301

Browse files
committed
Merge remote-tracking branch 'upstream/main' into regr_transpose_ea
# Conflicts: # doc/source/whatsnew/v2.2.1.rst
2 parents 2d6b373 + 63dc0f7 commit 3f67301

33 files changed

+409
-111
lines changed

ci/code_checks.sh

+2-10
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ fi
6565
### DOCSTRINGS ###
6666
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
6767

68-
MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
69-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
68+
MSG='Validate docstrings (EX01, EX03, EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SA05, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
69+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01,EX03,EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SA05,SS01,SS02,SS03,SS04,SS05,SS06
7070
RET=$(($RET + $?)) ; echo $MSG "DONE"
7171

7272
MSG='Partially validate docstrings (PR02)' ; echo $MSG
@@ -3182,14 +3182,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
31823182
pandas.util.hash_pandas_object # There should be no backslash in the final line, please keep this comment in the last ignored function
31833183
RET=$(($RET + $?)) ; echo $MSG "DONE"
31843184

3185-
MSG='Partially validate docstrings (SA05)' ; echo $MSG
3186-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=SA05 --ignore_functions \
3187-
pandas.core.groupby.SeriesGroupBy.first\
3188-
pandas.core.groupby.SeriesGroupBy.last\
3189-
pandas.core.window.expanding.Expanding.aggregate\
3190-
pandas.core.window.rolling.Rolling.aggregate # There should be no backslash in the final line, please keep this comment in the last ignored function
3191-
RET=$(($RET + $?)) ; echo $MSG "DONE"
3192-
31933185
fi
31943186

31953187
### DOCUMENTATION NOTEBOOKS ###

doc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@
431431
"index",
432432
"pandas.tex",
433433
"pandas: powerful Python data analysis toolkit",
434-
"Wes McKinney and the Pandas Development Team",
434+
"Wes McKinney and the pandas Development Team",
435435
"manual",
436436
)
437437
]

doc/source/development/contributing_codebase.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ Testing type hints in code using pandas
253253

254254
.. warning::
255255

256-
* Pandas is not yet a py.typed library (:pep:`561`)!
256+
* pandas is not yet a py.typed library (:pep:`561`)!
257257
The primary purpose of locally declaring pandas as a py.typed library is to test and
258258
improve the pandas-builtin type annotations.
259259

doc/source/development/contributing_documentation.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ experts. If something in the docs doesn't make sense to you, updating the
1414
relevant section after you figure it out is a great way to ensure it will help
1515
the next person. Please visit the `issues page <https://github.com/pandas-dev/pandas/issues?page=1&q=is%3Aopen+sort%3Aupdated-desc+label%3ADocs>`__
1616
for a full list of issues that are currently open regarding the
17-
Pandas documentation.
17+
pandas documentation.
1818

1919

2020

doc/source/development/debugging_extensions.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
Debugging C extensions
77
======================
88

9-
Pandas uses Cython and C/C++ `extension modules <https://docs.python.org/3/extending/extending.html>`_ to optimize performance. Unfortunately, the standard Python debugger does not allow you to step into these extensions. Cython extensions can be debugged with the `Cython debugger <https://docs.cython.org/en/latest/src/userguide/debugging.html>`_ and C/C++ extensions can be debugged using the tools shipped with your platform's compiler.
9+
pandas uses Cython and C/C++ `extension modules <https://docs.python.org/3/extending/extending.html>`_ to optimize performance. Unfortunately, the standard Python debugger does not allow you to step into these extensions. Cython extensions can be debugged with the `Cython debugger <https://docs.cython.org/en/latest/src/userguide/debugging.html>`_ and C/C++ extensions can be debugged using the tools shipped with your platform's compiler.
1010

1111
For Python developers with limited or no C/C++ experience this can seem a daunting task. Core developer Will Ayd has written a 3 part blog series to help guide you from the standard Python debugger into these other tools:
1212

doc/source/development/maintaining.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ Release
430430
git checkout <branch>
431431
git pull --ff-only upstream <branch>
432432
git clean -xdf
433-
git commit --allow-empty --author="Pandas Development Team <[email protected]>" -m "RLS: <version>"
433+
git commit --allow-empty --author="pandas Development Team <[email protected]>" -m "RLS: <version>"
434434
git tag -a v<version> -m "Version <version>" # NOTE that the tag is v1.5.2 with "v" not 1.5.2
435435
git push upstream <branch> --follow-tags
436436

@@ -460,7 +460,7 @@ which will be triggered when the tag is pushed.
460460
4. Create a `new GitHub release <https://github.com/pandas-dev/pandas/releases/new>`_:
461461

462462
- Tag: ``<version>``
463-
- Title: ``Pandas <version>``
463+
- Title: ``pandas <version>``
464464
- Description: Copy the description of the last release of the same kind (release candidate, major/minor or patch release)
465465
- Files: ``pandas-<version>.tar.gz`` source distribution just generated
466466
- Set as a pre-release: Only check for a release candidate

doc/source/user_guide/10min.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Customarily, we import as follows:
1919
Basic data structures in pandas
2020
-------------------------------
2121

22-
Pandas provides two types of classes for handling data:
22+
pandas provides two types of classes for handling data:
2323

2424
1. :class:`Series`: a one-dimensional labeled array holding data of any type
2525
such as integers, strings, Python objects etc.

doc/source/user_guide/categorical.rst

+12-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ Equality semantics
245245

246246
Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal
247247
whenever they have the same categories and order. When comparing two
248-
unordered categoricals, the order of the ``categories`` is not considered.
248+
unordered categoricals, the order of the ``categories`` is not considered. Note
249+
that categories with different dtypes are not the same.
249250

250251
.. ipython:: python
251252
@@ -263,6 +264,16 @@ All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
263264
264265
c1 == "category"
265266
267+
Notice that the ``categories_dtype`` should be considered, especially when comparing with
268+
two empty ``CategoricalDtype`` instances.
269+
270+
.. ipython:: python
271+
272+
c2 = pd.Categorical(np.array([], dtype=object))
273+
c3 = pd.Categorical(np.array([], dtype=float))
274+
275+
c2.dtype == c3.dtype
276+
266277
Description
267278
-----------
268279

doc/source/user_guide/io.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,7 @@ Writing CSVs to binary file objects
10441044

10451045
``df.to_csv(..., mode="wb")`` allows writing a CSV to a file object
10461046
opened binary mode. In most cases, it is not necessary to specify
1047-
``mode`` as Pandas will auto-detect whether the file object is
1047+
``mode`` as pandas will auto-detect whether the file object is
10481048
opened in text or binary mode.
10491049

10501050
.. ipython:: python
@@ -1604,7 +1604,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
16041604
Specifying the parser engine
16051605
''''''''''''''''''''''''''''
16061606

1607-
Pandas currently supports three engines, the C engine, the python engine, and an experimental
1607+
pandas currently supports three engines, the C engine, the python engine, and an experimental
16081608
pyarrow engine (requires the ``pyarrow`` package). In general, the pyarrow engine is fastest
16091609
on larger workloads and is equivalent in speed to the C engine on most other workloads.
16101610
The python engine tends to be slower than the pyarrow and C engines on most workloads. However,
@@ -3910,7 +3910,7 @@ The look and feel of Excel worksheets created from pandas can be modified using
39103910

39113911
.. note::
39123912

3913-
As of Pandas 3.0, by default spreadsheets created with the ``to_excel`` method
3913+
As of pandas 3.0, by default spreadsheets created with the ``to_excel`` method
39143914
will not contain any styling. Users wishing to bold text, add bordered styles,
39153915
etc in a worksheet output by ``to_excel`` can do so by using :meth:`Styler.to_excel`
39163916
to create styled excel files. For documentation on styling spreadsheets, see

doc/source/whatsnew/v0.11.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Data have had quite a number of additions, and Dtype support is now full-fledged
1212
There are also a number of important API changes that long-time pandas users should
1313
pay close attention to.
1414

15-
There is a new section in the documentation, :ref:`10 Minutes to Pandas <10min>`,
15+
There is a new section in the documentation, :ref:`10 Minutes to pandas <10min>`,
1616
primarily geared to new users.
1717

1818
There is a new section in the documentation, :ref:`Cookbook <cookbook>`, a collection

doc/source/whatsnew/v1.3.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17-
- Pandas could not be built on PyPy (:issue:`42355`)
17+
- pandas could not be built on PyPy (:issue:`42355`)
1818
- :class:`DataFrame` constructed with an older version of pandas could not be unpickled (:issue:`42345`)
1919
- Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42248`)
2020
- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`)

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ Deprecated Int64Index, UInt64Index & Float64Index
607607

608608
:class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index` have been
609609
deprecated in favor of the base :class:`Index` class and will be removed in
610-
Pandas 2.0 (:issue:`43028`).
610+
pandas 2.0 (:issue:`43028`).
611611

612612
For constructing a numeric index, you can use the base :class:`Index` class
613613
instead specifying the data type (which will also work on older pandas

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ We recommend installing the latest version of PyArrow to access the most recentl
6363
DataFrame interchange protocol implementation
6464
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6565

66-
Pandas now implement the DataFrame interchange API spec.
66+
pandas now implement the DataFrame interchange API spec.
6767
See the full details on the API at https://data-apis.org/dataframe-protocol/latest/index.html
6868

6969
The protocol consists of two parts:

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ can it now take all numpy numeric dtypes, i.e.
5757
pd.Index([1, 2, 3], dtype=np.uint16)
5858
pd.Index([1, 2, 3], dtype=np.float32)
5959
60-
The ability for :class:`Index` to hold the numpy numeric dtypes has meant some changes in Pandas
60+
The ability for :class:`Index` to hold the numpy numeric dtypes has meant some changes in pandas
6161
functionality. In particular, operations that previously were forced to create 64-bit indexes,
6262
can now create indexes with lower bit sizes, e.g. 32-bit indexes.
6363

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ DataFrame reductions preserve extension dtypes
6767

6868
In previous versions of pandas, the results of DataFrame reductions
6969
(:meth:`DataFrame.sum` :meth:`DataFrame.mean` etc.) had NumPy dtypes, even when the DataFrames
70-
were of extension dtypes. Pandas can now keep the dtypes when doing reductions over DataFrame
70+
were of extension dtypes. pandas can now keep the dtypes when doing reductions over DataFrame
7171
columns with a common dtype (:issue:`52788`).
7272

7373
*Old Behavior*

doc/source/whatsnew/v2.2.1.rst

+2
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ Fixed regressions
2424
- Fixed regression in :meth:`CategoricalIndex.difference` raising ``KeyError`` when other contains null values other than NaN (:issue:`57318`)
2525
- Fixed regression in :meth:`DataFrame.groupby` raising ``ValueError`` when grouping by a :class:`Series` in some cases (:issue:`57276`)
2626
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
27+
- Fixed regression in :meth:`DataFrame.loc` which was unnecessarily throwing "incompatible dtype warning" when expanding with partial row indexer and multiple columns (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) (:issue:`56503`)
2728
- Fixed regression in :meth:`DataFrame.map` with ``na_action="ignore"`` not being respected for NumPy nullable and :class:`ArrowDtypes` (:issue:`57316`)
2829
- Fixed regression in :meth:`DataFrame.merge` raising ``ValueError`` for certain types of 3rd-party extension arrays (:issue:`57316`)
2930
- Fixed regression in :meth:`DataFrame.shift` raising ``AssertionError`` for ``axis=1`` and empty :class:`DataFrame` (:issue:`57301`)
3031
- Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`)
3132
- Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
3233
- Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`)
34+
- Fixed regression in :meth:`DataFrame.to_sql` when ``method="multi"`` is passed and the dialect type is not Oracle (:issue:`57310`)
3335
- Fixed regression in :meth:`DataFrame.transpose` with nullable extension dtypes not having F-contiguous data potentially causing exceptions when used (:issue:`57315`)
3436
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
3537
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ Deprecations
102102
~~~~~~~~~~~~
103103
- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
104104
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
105+
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
105106
-
106107

107108
.. ---------------------------------------------------------------------------

pandas/_libs/algos_take_helper.pxi.in

+12
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,17 @@ def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
184184

185185
fv = fill_value
186186

187+
{{if c_type_in == c_type_out != "object"}}
188+
with nogil:
189+
for i in range(n):
190+
for j in range(k):
191+
idx = indexer[j]
192+
if idx == -1:
193+
out[i, j] = fv
194+
else:
195+
out[i, j] = values[i, idx]
196+
197+
{{else}}
187198
for i in range(n):
188199
for j in range(k):
189200
idx = indexer[j]
@@ -195,6 +206,7 @@ def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
195206
{{else}}
196207
out[i, j] = values[i, idx]
197208
{{endif}}
209+
{{endif}}
198210

199211

200212
@cython.wraparound(False)

pandas/_typing.py

+1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
Concatenate: Any = None
119119

120120
HashableT = TypeVar("HashableT", bound=Hashable)
121+
HashableT2 = TypeVar("HashableT2", bound=Hashable)
121122
MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
122123

123124
# array-like

pandas/core/base.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from typing import (
99
TYPE_CHECKING,
1010
Any,
11-
Callable,
1211
Generic,
1312
Literal,
1413
cast,
@@ -105,7 +104,7 @@ class PandasObject(DirNamesMixin):
105104
_cache: dict[str, Any]
106105

107106
@property
108-
def _constructor(self) -> Callable[..., Self]:
107+
def _constructor(self) -> type[Self]:
109108
"""
110109
Class constructor (for this class it's just `__class__`).
111110
"""
@@ -1356,7 +1355,7 @@ def searchsorted(
13561355
sorter=sorter,
13571356
)
13581357

1359-
def drop_duplicates(self, *, keep: DropKeep = "first"):
1358+
def drop_duplicates(self, *, keep: DropKeep = "first") -> Self:
13601359
duplicated = self._duplicated(keep=keep)
13611360
# error: Value of type "IndexOpsMixin" is not indexable
13621361
return self[~duplicated] # type: ignore[index]

pandas/core/dtypes/missing.py

+14
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,20 @@ def infer_fill_value(val):
646646
return np.nan
647647

648648

649+
def construct_1d_array_from_inferred_fill_value(
650+
value: object, length: int
651+
) -> ArrayLike:
652+
# Find our empty_value dtype by constructing an array
653+
# from our value and doing a .take on it
654+
from pandas.core.algorithms import take_nd
655+
from pandas.core.construction import sanitize_array
656+
from pandas.core.indexes.base import Index
657+
658+
arr = sanitize_array(value, Index(range(1)), copy=False)
659+
taker = -1 * np.ones(length, dtype=np.intp)
660+
return take_nd(arr, taker)
661+
662+
649663
def maybe_fill(arr: np.ndarray) -> np.ndarray:
650664
"""
651665
Fill numpy.ndarray with NaN, unless we have a integer or boolean dtype.

0 commit comments

Comments
 (0)