Skip to content

Commit 485615c

Browse files
authored
Merge branch 'main' into today_now_error
2 parents 5582fd8 + 6e1a040 commit 485615c

File tree

113 files changed

+1172
-443
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+1172
-443
lines changed

.github/workflows/32-bit-linux.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
3939
. ~/virtualenvs/pandas-dev/bin/activate && \
4040
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
41-
pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
41+
pip install cython==0.29.30 numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
4242
python setup.py build_ext -q -j2 && \
4343
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
4444
export PANDAS_CI=1 && \

.github/workflows/codeql.yml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: CodeQL
2+
on:
3+
schedule:
4+
# every day at midnight
5+
- cron: "0 0 * * *"
6+
7+
concurrency:
8+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
9+
cancel-in-progress: true
10+
11+
jobs:
12+
analyze:
13+
runs-on: ubuntu-latest
14+
permissions:
15+
actions: read
16+
contents: read
17+
security-events: write
18+
19+
strategy:
20+
fail-fast: false
21+
matrix:
22+
language:
23+
- python
24+
25+
steps:
26+
- uses: actions/checkout@v3
27+
- uses: github/codeql-action/init@v2
28+
with:
29+
languages: ${{ matrix.language }}
30+
- uses: github/codeql-action/autobuild@v2
31+
- uses: github/codeql-action/analyze@v2

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ repos:
230230
language: python
231231
additional_dependencies:
232232
- flake8==4.0.1
233-
- flake8-pyi==22.5.1
233+
- flake8-pyi==22.7.0
234234
- id: future-annotations
235235
name: import annotations from __future__
236236
entry: 'from __future__ import annotations'

ci/code_checks.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ fi
7878
### DOCSTRINGS ###
7979
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8080

81-
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05)' ; echo $MSG
82-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05
81+
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
82+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8383
RET=$(($RET + $?)) ; echo $MSG "DONE"
8484

8585
fi

doc/source/conf.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,25 @@
5050
# sphinxext.
5151

5252
extensions = [
53-
"sphinx.ext.autodoc",
54-
"sphinx.ext.autosummary",
55-
"sphinx.ext.doctest",
56-
"sphinx.ext.extlinks",
57-
"sphinx.ext.todo",
58-
"numpydoc", # handle NumPy documentation formatted docstrings
53+
"contributors", # custom pandas extension
5954
"IPython.sphinxext.ipython_directive",
6055
"IPython.sphinxext.ipython_console_highlighting",
6156
"matplotlib.sphinxext.plot_directive",
62-
"sphinx.ext.intersphinx",
57+
"numpydoc",
58+
"sphinx_copybutton",
59+
"sphinx_panels",
60+
"sphinx_toggleprompt",
61+
"sphinx.ext.autodoc",
62+
"sphinx.ext.autosummary",
6363
"sphinx.ext.coverage",
64-
"sphinx.ext.mathjax",
64+
"sphinx.ext.doctest",
65+
"sphinx.ext.extlinks",
6566
"sphinx.ext.ifconfig",
67+
"sphinx.ext.intersphinx",
6668
"sphinx.ext.linkcode",
69+
"sphinx.ext.mathjax",
70+
"sphinx.ext.todo",
6771
"nbsphinx",
68-
"sphinx_panels",
69-
"contributors", # custom pandas extension
7072
]
7173

7274
exclude_patterns = [
@@ -144,6 +146,9 @@
144146
# already loads it
145147
panels_add_bootstrap_css = False
146148

149+
# https://sphinx-toggleprompt.readthedocs.io/en/stable/#offset
150+
toggleprompt_offset_right = 35
151+
147152
# Add any paths that contain templates here, relative to this directory.
148153
templates_path = ["../_templates"]
149154

@@ -453,7 +458,6 @@
453458
# extlinks alias
454459
extlinks = {
455460
"issue": ("https://github.com/pandas-dev/pandas/issues/%s", "GH"),
456-
"wiki": ("https://github.com/pandas-dev/pandas/wiki/%s", "wiki "),
457461
}
458462

459463

doc/source/ecosystem.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ A good implementation for Python users is `has2k1/plotnine <https://github.com/h
161161
`IPython Vega <https://github.com/vega/ipyvega>`__ leverages `Vega
162162
<https://github.com/vega/vega>`__ to create plots within Jupyter Notebook.
163163

164-
`Plotly <https://poltly.com/python>`__
164+
`Plotly <https://plotly.com/python>`__
165165
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
166166

167-
`Plotly’s <https://poltly.com/>`__ `Python API <https://poltly.com/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <https://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://poltly.com/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plotly.com/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline <https://poltly.com/python/offline/>`__, or `on-premise <https://poltly.com/product/enterprise/>`__ accounts for private use.
167+
`Plotly’s <https://plotly.com/>`__ `Python API <https://plotly.com/python/>`__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js <https://d3js.org/>`__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn <https://plotly.com/python/matplotlib-to-plotly-tutorial/>`__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks <https://plotly.com/ipython-notebooks/>`__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `offline <https://plotly.com/python/offline/>`__, or `on-premise <https://plotly.com/product/enterprise/>`__ accounts for private use.
168168

169169
`Lux <https://github.com/lux-org/lux>`__
170170
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -591,12 +591,12 @@ Library Accessor Classes Description
591591
Development tools
592592
-----------------
593593

594-
`pandas-stubs <https://github.com/VirtusLab/pandas-stubs>`__
595-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
594+
`pandas-stubs <https://github.com/pandas-dev/pandas-stubs>`__
595+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
596596

597597
While pandas repository is partially typed, the package itself doesn't expose this information for external use.
598598
Install pandas-stubs to enable basic type coverage of pandas API.
599599

600600
Learn more by reading through :issue:`14468`, :issue:`26766`, :issue:`28142`.
601601

602-
See installation and usage instructions on the `github page <https://github.com/VirtusLab/pandas-stubs>`__.
602+
See installation and usage instructions on the `github page <https://github.com/pandas-dev/pandas-stubs>`__.

doc/source/getting_started/intro_tutorials/10_text_data.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ applied to integers, so no ``str`` is used.
179179
180180
Based on the index name of the row (``307``) and the column (``Name``),
181181
we can do a selection using the ``loc`` operator, introduced in the
182-
`tutorial on subsetting <3_subset_data.ipynb>`__.
182+
:ref:`tutorial on subsetting <10min_tut_03_subset>`.
183183

184184
.. raw:: html
185185

doc/source/reference/general_functions.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,4 @@ Importing from other DataFrame libraries
8585
.. autosummary::
8686
:toctree: api/
8787

88-
api.exchange.from_dataframe
88+
api.interchange.from_dataframe

doc/source/user_guide/io.rst

+19-13
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,10 @@ index_col : int, str, sequence of int / str, or False, optional, default ``None`
107107
string name or column index. If a sequence of int / str is given, a
108108
MultiIndex is used.
109109

110-
Note: ``index_col=False`` can be used to force pandas to *not* use the first
111-
column as the index, e.g. when you have a malformed file with delimiters at
112-
the end of each line.
110+
.. note::
111+
``index_col=False`` can be used to force pandas to *not* use the first
112+
column as the index, e.g. when you have a malformed file with delimiters at
113+
the end of each line.
113114

114115
The default value of ``None`` instructs pandas to guess. If the number of
115116
fields in the column header row is equal to the number of fields in the body
@@ -182,15 +183,16 @@ General parsing configuration
182183
+++++++++++++++++++++++++++++
183184

184185
dtype : Type name or dict of column -> type, default ``None``
185-
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}``
186-
(unsupported with ``engine='python'``). Use ``str`` or ``object`` together
187-
with suitable ``na_values`` settings to preserve and
188-
not interpret dtype.
186+
Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32, 'c': 'Int64'}``
187+
Use ``str`` or ``object`` together with suitable ``na_values`` settings to preserve
188+
and not interpret dtype. If converters are specified, they will be applied INSTEAD
189+
of dtype conversion.
190+
189191
.. versionadded:: 1.5.0
190192

191-
Support for defaultdict was added. Specify a defaultdict as input where
192-
the default determines the dtype of the columns which are not explicitly
193-
listed.
193+
Support for defaultdict was added. Specify a defaultdict as input where
194+
the default determines the dtype of the columns which are not explicitly
195+
listed.
194196
engine : {``'c'``, ``'python'``, ``'pyarrow'``}
195197
Parser engine to use. The C and pyarrow engines are faster, while the python engine
196198
is currently more feature-complete. Multithreading is currently only supported by
@@ -283,7 +285,9 @@ parse_dates : boolean or list of ints or names or list of lists or dict, default
283285
* If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date
284286
column.
285287
* If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'.
286-
A fast-path exists for iso8601-formatted dates.
288+
289+
.. note::
290+
A fast-path exists for iso8601-formatted dates.
287291
infer_datetime_format : boolean, default ``False``
288292
If ``True`` and parse_dates is enabled for a column, attempt to infer the
289293
datetime format to speed up the processing.
@@ -1593,8 +1597,10 @@ of multi-columns indices.
15931597
15941598
pd.read_csv("mi2.csv", header=[0, 1], index_col=0)
15951599
1596-
Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
1597-
with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will be *lost*.
1600+
.. note::
1601+
If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
1602+
with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will
1603+
be *lost*.
15981604

15991605
.. ipython:: python
16001606
:suppress:

doc/source/whatsnew/v1.5.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ Other Deprecations
773773
- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
774774
- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
775775
- Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`)
776+
- Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`)
776777

777778
.. ---------------------------------------------------------------------------
778779
.. _whatsnew_150.performance:
@@ -802,6 +803,7 @@ Performance improvements
802803
- Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`)
803804
- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`)
804805
- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`)
806+
- Performance improvement in ``argmax`` and ``argmin`` for :class:`arrays.SparseArray` (:issue:`34197`)
805807
-
806808

807809
.. ---------------------------------------------------------------------------
@@ -1023,6 +1025,7 @@ Reshaping
10231025
- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
10241026
- Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`)
10251027
- Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`)
1028+
- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`)
10261029

10271030
Sparse
10281031
^^^^^^

environment.yml

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ dependencies:
103103
- pytest-cython # doctest
104104
- sphinx
105105
- sphinx-panels
106+
- sphinx-copybutton
106107
- types-python-dateutil
107108
- types-PyMySQL
108109
- types-pytz
@@ -128,3 +129,4 @@ dependencies:
128129
- jupyterlab >=3.4,<4
129130
- pip:
130131
- jupyterlite==0.1.0b10
132+
- sphinx-toggleprompt

pandas/_config/config.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,9 @@ class RegisteredOption(NamedTuple):
102102

103103
class OptionError(AttributeError, KeyError):
104104
"""
105-
Exception for pandas.options, backwards compatible with KeyError
106-
checks.
105+
Exception raised for pandas.options.
106+
107+
Backwards compatible with KeyError checks.
107108
"""
108109

109110

pandas/_libs/algos.pyi

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from __future__ import annotations
2-
31
from typing import Any
42

53
import numpy as np

pandas/_libs/groupby.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def group_any_all(
5050
val_test: Literal["any", "all"],
5151
skipna: bool,
5252
) -> None: ...
53-
def group_add(
53+
def group_sum(
5454
out: np.ndarray, # complexfloating_t[:, ::1]
5555
counts: np.ndarray, # int64_t[::1]
5656
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]

pandas/_libs/groupby.pyx

+15-15
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def group_median_float64(
124124
ndarray[intp_t] indexer
125125
float64_t* ptr
126126

127-
assert min_count == -1, "'min_count' only used in add and prod"
127+
assert min_count == -1, "'min_count' only used in sum and prod"
128128

129129
ngroups = len(counts)
130130
N, K = (<object>values).shape
@@ -502,7 +502,7 @@ def group_any_all(
502502

503503

504504
# ----------------------------------------------------------------------
505-
# group_add, group_prod, group_var, group_mean, group_ohlc
505+
# group_sum, group_prod, group_var, group_mean, group_ohlc
506506
# ----------------------------------------------------------------------
507507

508508
ctypedef fused mean_t:
@@ -511,17 +511,17 @@ ctypedef fused mean_t:
511511
complex64_t
512512
complex128_t
513513

514-
ctypedef fused add_t:
514+
ctypedef fused sum_t:
515515
mean_t
516516
object
517517

518518

519519
@cython.wraparound(False)
520520
@cython.boundscheck(False)
521-
def group_add(
522-
add_t[:, ::1] out,
521+
def group_sum(
522+
sum_t[:, ::1] out,
523523
int64_t[::1] counts,
524-
ndarray[add_t, ndim=2] values,
524+
ndarray[sum_t, ndim=2] values,
525525
const intp_t[::1] labels,
526526
Py_ssize_t min_count=0,
527527
bint is_datetimelike=False,
@@ -531,8 +531,8 @@ def group_add(
531531
"""
532532
cdef:
533533
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
534-
add_t val, t, y
535-
add_t[:, ::1] sumx, compensation
534+
sum_t val, t, y
535+
sum_t[:, ::1] sumx, compensation
536536
int64_t[:, ::1] nobs
537537
Py_ssize_t len_values = len(values), len_labels = len(labels)
538538

@@ -546,7 +546,7 @@ def group_add(
546546

547547
N, K = (<object>values).shape
548548

549-
if add_t is object:
549+
if sum_t is object:
550550
# NB: this does not use 'compensation' like the non-object track does.
551551
for i in range(N):
552552
lab = labels[i]
@@ -588,10 +588,10 @@ def group_add(
588588

589589
# not nan
590590
# With dt64/td64 values, values have been cast to float64
591-
# instead if int64 for group_add, but the logic
591+
# instead if int64 for group_sum, but the logic
592592
# is otherwise the same as in _treat_as_na
593593
if val == val and not (
594-
add_t is float64_t
594+
sum_t is float64_t
595595
and is_datetimelike
596596
and val == <float64_t>NPY_NAT
597597
):
@@ -677,7 +677,7 @@ def group_var(
677677
int64_t[:, ::1] nobs
678678
Py_ssize_t len_values = len(values), len_labels = len(labels)
679679

680-
assert min_count == -1, "'min_count' only used in add and prod"
680+
assert min_count == -1, "'min_count' only used in sum and prod"
681681

682682
if len_values != len_labels:
683683
raise ValueError("len(index) != len(labels)")
@@ -745,7 +745,7 @@ def group_mean(
745745
Array containing unique label for each group, with its
746746
ordering matching up to the corresponding record in `values`.
747747
min_count : Py_ssize_t
748-
Only used in add and prod. Always -1.
748+
Only used in sum and prod. Always -1.
749749
is_datetimelike : bool
750750
True if `values` contains datetime-like entries.
751751
mask : ndarray[bool, ndim=2], optional
@@ -766,7 +766,7 @@ def group_mean(
766766
int64_t[:, ::1] nobs
767767
Py_ssize_t len_values = len(values), len_labels = len(labels)
768768

769-
assert min_count == -1, "'min_count' only used in add and prod"
769+
assert min_count == -1, "'min_count' only used in sum and prod"
770770

771771
if len_values != len_labels:
772772
raise ValueError("len(index) != len(labels)")
@@ -821,7 +821,7 @@ def group_ohlc(
821821
Py_ssize_t i, j, N, K, lab
822822
floating val
823823

824-
assert min_count == -1, "'min_count' only used in add and prod"
824+
assert min_count == -1, "'min_count' only used in sum and prod"
825825

826826
if len(labels) == 0:
827827
return

0 commit comments

Comments
 (0)