Skip to content

Commit 1460052

Browse files
authored
Merge branch 'main' into outofbounds
2 parents 6da730d + 23c53bb commit 1460052

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+817
-138
lines changed

.github/workflows/32-bit-linux.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
3939
. ~/virtualenvs/pandas-dev/bin/activate && \
4040
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
41-
pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
41+
pip install cython==0.29.30 numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
4242
python setup.py build_ext -q -j2 && \
4343
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
4444
export PANDAS_CI=1 && \

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ repos:
230230
language: python
231231
additional_dependencies:
232232
- flake8==4.0.1
233-
- flake8-pyi==22.5.1
233+
- flake8-pyi==22.7.0
234234
- id: future-annotations
235235
name: import annotations from __future__
236236
entry: 'from __future__ import annotations'

doc/source/conf.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,25 @@
5050
# sphinxext.
5151

5252
extensions = [
53-
"sphinx.ext.autodoc",
54-
"sphinx.ext.autosummary",
55-
"sphinx.ext.doctest",
56-
"sphinx.ext.extlinks",
57-
"sphinx.ext.todo",
58-
"numpydoc", # handle NumPy documentation formatted docstrings
53+
"contributors", # custom pandas extension
5954
"IPython.sphinxext.ipython_directive",
6055
"IPython.sphinxext.ipython_console_highlighting",
6156
"matplotlib.sphinxext.plot_directive",
62-
"sphinx.ext.intersphinx",
57+
"numpydoc",
58+
"sphinx_copybutton",
59+
"sphinx_panels",
60+
"sphinx_toggleprompt",
61+
"sphinx.ext.autodoc",
62+
"sphinx.ext.autosummary",
6363
"sphinx.ext.coverage",
64-
"sphinx.ext.mathjax",
64+
"sphinx.ext.doctest",
65+
"sphinx.ext.extlinks",
6566
"sphinx.ext.ifconfig",
67+
"sphinx.ext.intersphinx",
6668
"sphinx.ext.linkcode",
69+
"sphinx.ext.mathjax",
70+
"sphinx.ext.todo",
6771
"nbsphinx",
68-
"sphinx_panels",
69-
"contributors", # custom pandas extension
7072
]
7173

7274
exclude_patterns = [
@@ -144,6 +146,9 @@
144146
# already loads it
145147
panels_add_bootstrap_css = False
146148

149+
# https://sphinx-toggleprompt.readthedocs.io/en/stable/#offset
150+
toggleprompt_offset_right = 35
151+
147152
# Add any paths that contain templates here, relative to this directory.
148153
templates_path = ["../_templates"]
149154

@@ -453,7 +458,6 @@
453458
# extlinks alias
454459
extlinks = {
455460
"issue": ("https://github.com/pandas-dev/pandas/issues/%s", "GH"),
456-
"wiki": ("https://github.com/pandas-dev/pandas/wiki/%s", "wiki "),
457461
}
458462

459463

doc/source/ecosystem.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ A good implementation for Python users is `has2k1/plotnine <https://github.com/h
161161
`IPython Vega <https://github.com/vega/ipyvega>`__ leverages `Vega
162162
<https://github.com/vega/vega>`__ to create plots within Jupyter Notebook.
163163

164-
`Plotly <https://poltly.com/python>`__
164+
`Plotly <https://plotly.com/python>`__
165165
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
166166

167-
`Plotly’s <https://poltly.com/>`__ `Python API <https://poltly.com/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <https://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://poltly.com/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plotly.com/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline <https://poltly.com/python/offline/>`__, or `on-premise <https://poltly.com/product/enterprise/>`__ accounts for private use.
167+
`Plotly’s <https://plotly.com/>`__ `Python API <https://plotly.com/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <https://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://plotly.com/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plotly.com/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline <https://plotly.com/python/offline/>`__, or `on-premise <https://plotly.com/product/enterprise/>`__ accounts for private use.
168168

169169
`Lux <https://github.com/lux-org/lux>`__
170170
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -591,12 +591,12 @@ Library Accessor Classes Description
591591
Development tools
592592
-----------------
593593

594-
`pandas-stubs <https://github.com/VirtusLab/pandas-stubs>`__
595-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
594+
`pandas-stubs <https://github.com/pandas-dev/pandas-stubs>`__
595+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
596596

597597
While pandas repository is partially typed, the package itself doesn't expose this information for external use.
598598
Install pandas-stubs to enable basic type coverage of pandas API.
599599

600600
Learn more by reading through :issue:`14468`, :issue:`26766`, :issue:`28142`.
601601

602-
See installation and usage instructions on the `github page <https://github.com/VirtusLab/pandas-stubs>`__.
602+
See installation and usage instructions on the `github page <https://github.com/pandas-dev/pandas-stubs>`__.

doc/source/getting_started/intro_tutorials/10_text_data.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ applied to integers, so no ``str`` is used.
179179
180180
Based on the index name of the row (``307``) and the column (``Name``),
181181
we can do a selection using the ``loc`` operator, introduced in the
182-
`tutorial on subsetting <3_subset_data.ipynb>`__.
182+
:ref:`tutorial on subsetting <10min_tut_03_subset>`.
183183

184184
.. raw:: html
185185

doc/source/user_guide/io.rst

+19-13
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,10 @@ index_col : int, str, sequence of int / str, or False, optional, default ``None`
107107
string name or column index. If a sequence of int / str is given, a
108108
MultiIndex is used.
109109

110-
Note: ``index_col=False`` can be used to force pandas to *not* use the first
111-
column as the index, e.g. when you have a malformed file with delimiters at
112-
the end of each line.
110+
.. note::
111+
``index_col=False`` can be used to force pandas to *not* use the first
112+
column as the index, e.g. when you have a malformed file with delimiters at
113+
the end of each line.
113114

114115
The default value of ``None`` instructs pandas to guess. If the number of
115116
fields in the column header row is equal to the number of fields in the body
@@ -182,15 +183,16 @@ General parsing configuration
182183
+++++++++++++++++++++++++++++
183184

184185
dtype : Type name or dict of column -> type, default ``None``
185-
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}``
186-
(unsupported with ``engine='python'``). Use ``str`` or ``object`` together
187-
with suitable ``na_values`` settings to preserve and
188-
not interpret dtype.
186+
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}``
187+
Use ``str`` or ``object`` together with suitable ``na_values`` settings to preserve
188+
and not interpret dtype. If converters are specified, they will be applied INSTEAD
189+
of dtype conversion.
190+
189191
.. versionadded:: 1.5.0
190192

191-
Support for defaultdict was added. Specify a defaultdict as input where
192-
the default determines the dtype of the columns which are not explicitly
193-
listed.
193+
Support for defaultdict was added. Specify a defaultdict as input where
194+
the default determines the dtype of the columns which are not explicitly
195+
listed.
194196
engine : {``'c'``, ``'python'``, ``'pyarrow'``}
195197
Parser engine to use. The C and pyarrow engines are faster, while the python engine
196198
is currently more feature-complete. Multithreading is currently only supported by
@@ -283,7 +285,9 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
283285
* If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date
284286
column.
285287
* If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'.
286-
A fast-path exists for iso8601-formatted dates.
288+
289+
.. note::
290+
A fast-path exists for iso8601-formatted dates.
287291
infer_datetime_format : boolean, default ``False``
288292
If ``True`` and parse_dates is enabled for a column, attempt to infer the
289293
datetime format to speed up the processing.
@@ -1593,8 +1597,10 @@ of multi-columns indices.
15931597
15941598
pd.read_csv("mi2.csv", header=[0, 1], index_col=0)
15951599
1596-
Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
1597-
with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will be *lost*.
1600+
.. note::
1601+
If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
1602+
with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will
1603+
be *lost*.
15981604

15991605
.. ipython:: python
16001606
:suppress:

doc/source/whatsnew/v1.5.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ Other Deprecations
773773
- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
774774
- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
775775
- Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`)
776+
- Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`)
776777

777778
.. ---------------------------------------------------------------------------
778779
.. _whatsnew_150.performance:
@@ -802,6 +803,7 @@ Performance improvements
802803
- Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`)
803804
- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`)
804805
- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`)
806+
- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`)
805807
-
806808

807809
.. ---------------------------------------------------------------------------
@@ -1023,6 +1025,7 @@ Reshaping
10231025
- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
10241026
- Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`)
10251027
- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`)
1028+
- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`)
10261029

10271030
Sparse
10281031
^^^^^^

environment.yml

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ dependencies:
103103
- pytest-cython # doctest
104104
- sphinx
105105
- sphinx-panels
106+
- sphinx-copybutton
106107
- types-python-dateutil
107108
- types-PyMySQL
108109
- types-pytz
@@ -128,3 +129,4 @@ dependencies:
128129
- jupyterlab >=3.4,<4
129130
- pip:
130131
- jupyterlite==0.1.0b10
132+
- sphinx-toggleprompt

pandas/_libs/algos.pyi

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from __future__ import annotations
2-
31
from typing import Any
42

53
import numpy as np

pandas/_libs/interval.pyi

+9-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from __future__ import annotations
2-
31
from typing import (
42
Any,
53
Generic,
@@ -84,7 +82,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
8482
self: Interval[_OrderableTimesT], key: _OrderableTimesT
8583
) -> bool: ...
8684
@overload
87-
def __contains__(self: Interval[_OrderableScalarT], key: int | float) -> bool: ...
85+
def __contains__(self: Interval[_OrderableScalarT], key: float) -> bool: ...
8886
@overload
8987
def __add__(
9088
self: Interval[_OrderableTimesT], y: Timedelta
@@ -94,7 +92,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
9492
self: Interval[int], y: _OrderableScalarT
9593
) -> Interval[_OrderableScalarT]: ...
9694
@overload
97-
def __add__(self: Interval[float], y: int | float) -> Interval[float]: ...
95+
def __add__(self: Interval[float], y: float) -> Interval[float]: ...
9896
@overload
9997
def __radd__(
10098
self: Interval[_OrderableTimesT], y: Timedelta
@@ -104,7 +102,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
104102
self: Interval[int], y: _OrderableScalarT
105103
) -> Interval[_OrderableScalarT]: ...
106104
@overload
107-
def __radd__(self: Interval[float], y: int | float) -> Interval[float]: ...
105+
def __radd__(self: Interval[float], y: float) -> Interval[float]: ...
108106
@overload
109107
def __sub__(
110108
self: Interval[_OrderableTimesT], y: Timedelta
@@ -114,7 +112,7 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
114112
self: Interval[int], y: _OrderableScalarT
115113
) -> Interval[_OrderableScalarT]: ...
116114
@overload
117-
def __sub__(self: Interval[float], y: int | float) -> Interval[float]: ...
115+
def __sub__(self: Interval[float], y: float) -> Interval[float]: ...
118116
@overload
119117
def __rsub__(
120118
self: Interval[_OrderableTimesT], y: Timedelta
@@ -124,31 +122,31 @@ class Interval(IntervalMixin, Generic[_OrderableT]):
124122
self: Interval[int], y: _OrderableScalarT
125123
) -> Interval[_OrderableScalarT]: ...
126124
@overload
127-
def __rsub__(self: Interval[float], y: int | float) -> Interval[float]: ...
125+
def __rsub__(self: Interval[float], y: float) -> Interval[float]: ...
128126
@overload
129127
def __mul__(
130128
self: Interval[int], y: _OrderableScalarT
131129
) -> Interval[_OrderableScalarT]: ...
132130
@overload
133-
def __mul__(self: Interval[float], y: int | float) -> Interval[float]: ...
131+
def __mul__(self: Interval[float], y: float) -> Interval[float]: ...
134132
@overload
135133
def __rmul__(
136134
self: Interval[int], y: _OrderableScalarT
137135
) -> Interval[_OrderableScalarT]: ...
138136
@overload
139-
def __rmul__(self: Interval[float], y: int | float) -> Interval[float]: ...
137+
def __rmul__(self: Interval[float], y: float) -> Interval[float]: ...
140138
@overload
141139
def __truediv__(
142140
self: Interval[int], y: _OrderableScalarT
143141
) -> Interval[_OrderableScalarT]: ...
144142
@overload
145-
def __truediv__(self: Interval[float], y: int | float) -> Interval[float]: ...
143+
def __truediv__(self: Interval[float], y: float) -> Interval[float]: ...
146144
@overload
147145
def __floordiv__(
148146
self: Interval[int], y: _OrderableScalarT
149147
) -> Interval[_OrderableScalarT]: ...
150148
@overload
151-
def __floordiv__(self: Interval[float], y: int | float) -> Interval[float]: ...
149+
def __floordiv__(self: Interval[float], y: float) -> Interval[float]: ...
152150
def overlaps(self: Interval[_OrderableT], other: Interval[_OrderableT]) -> bool: ...
153151

154152
def intervals_to_interval_bounds(

pandas/_libs/join.pyi

+3-3
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def asof_join_backward_on_X_by_Y(
5555
left_by_values: np.ndarray, # by_t[:]
5656
right_by_values: np.ndarray, # by_t[:]
5757
allow_exact_matches: bool = ...,
58-
tolerance: np.number | int | float | None = ...,
58+
tolerance: np.number | float | None = ...,
5959
use_hashtable: bool = ...,
6060
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
6161
def asof_join_forward_on_X_by_Y(
@@ -64,7 +64,7 @@ def asof_join_forward_on_X_by_Y(
6464
left_by_values: np.ndarray, # by_t[:]
6565
right_by_values: np.ndarray, # by_t[:]
6666
allow_exact_matches: bool = ...,
67-
tolerance: np.number | int | float | None = ...,
67+
tolerance: np.number | float | None = ...,
6868
use_hashtable: bool = ...,
6969
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
7070
def asof_join_nearest_on_X_by_Y(
@@ -73,6 +73,6 @@ def asof_join_nearest_on_X_by_Y(
7373
left_by_values: np.ndarray, # by_t[:]
7474
right_by_values: np.ndarray, # by_t[:]
7575
allow_exact_matches: bool = ...,
76-
tolerance: np.number | int | float | None = ...,
76+
tolerance: np.number | float | None = ...,
7777
use_hashtable: bool = ...,
7878
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...

pandas/_libs/json.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def dumps(
1212
date_unit: str = ...,
1313
iso_dates: bool = ...,
1414
default_handler: None
15-
| Callable[[Any], str | int | float | bool | list | dict | None] = ...,
15+
| Callable[[Any], str | float | bool | list | dict | None] = ...,
1616
) -> str: ...
1717
def loads(
1818
s: str,

pandas/_libs/tslibs/offsets.pyi

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from __future__ import annotations
2-
31
from datetime import (
42
datetime,
53
timedelta,

pandas/_libs/tslibs/timedeltas.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class Timedelta(timedelta):
8686
cls: type[_S],
8787
value=...,
8888
unit: str = ...,
89-
**kwargs: int | float | np.integer | np.floating,
89+
**kwargs: float | np.integer | np.floating,
9090
) -> _S: ...
9191
# GH 46171
9292
# While Timedelta can return pd.NaT, having the constructor return
@@ -123,7 +123,7 @@ class Timedelta(timedelta):
123123
@overload # type: ignore[override]
124124
def __floordiv__(self, other: timedelta) -> int: ...
125125
@overload
126-
def __floordiv__(self, other: int | float) -> Timedelta: ...
126+
def __floordiv__(self, other: float) -> Timedelta: ...
127127
@overload
128128
def __floordiv__(
129129
self, other: npt.NDArray[np.timedelta64]

pandas/_libs/tslibs/timestamps.pyi

+1-7
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,7 @@ class Timestamp(datetime):
3333
value: int # np.int64
3434
def __new__(
3535
cls: type[_DatetimeT],
36-
ts_input: int
37-
| np.integer
38-
| float
39-
| str
40-
| _date
41-
| datetime
42-
| np.datetime64 = ...,
36+
ts_input: np.integer | float | str | _date | datetime | np.datetime64 = ...,
4337
freq: int | None | str | BaseOffset = ...,
4438
tz: str | _tzinfo | None | int = ...,
4539
unit: str | int | None = ...,

pandas/_libs/writers.pyi

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from __future__ import annotations
2-
31
import numpy as np
42

53
from pandas._typing import ArrayLike

0 commit comments

Comments
 (0)