Skip to content

Commit 49551db

Browse files
Merge remote-tracking branch 'upstream/main' into bisect
2 parents f35ceca + 49937b9 commit 49551db

File tree

122 files changed

+915
-757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+915
-757
lines changed

azure-pipelines.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
4646
. ~/virtualenvs/pandas-dev/bin/activate && \
4747
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
48-
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines && \
48+
pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio hypothesis && \
4949
python setup.py build_ext -q -j2 && \
5050
python -m pip install --no-build-isolation -e . && \
5151
pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml"

ci/deps/actions-310-numpydev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest-cov
1010
- pytest-xdist>=1.31
1111
- hypothesis>=5.5.3
12+
- pytest-asyncio
1213

1314
# pandas dependencies
1415
- python-dateutil

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ dependencies:
4242
- pytables
4343
- pyarrow
4444
- pyreadstat
45+
- python-snappy
4546
- pyxlsb
4647
- s3fs
4748
- scipy
48-
- snappy
4949
- sqlalchemy
5050
- tabulate
5151
- xarray

ci/deps/actions-38-downstream_compat.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,14 @@ dependencies:
3838
- odfpy
3939
- pandas-gbq
4040
- psycopg2
41-
- pymysql
42-
- pytables
4341
- pyarrow
42+
- pymysql
4443
- pyreadstat
44+
- pytables
45+
- python-snappy
4546
- pyxlsb
4647
- s3fs
4748
- scipy
48-
- snappy
4949
- sqlalchemy
5050
- tabulate
5151
- xarray

ci/deps/actions-38-minimum_versions.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ dependencies:
4040
- openpyxl=3.0.3
4141
- pandas-gbq=0.14.0
4242
- psycopg2=2.8.4
43-
- pymysql=0.10.1
44-
- pytables=3.6.1
4543
- pyarrow=1.0.1
44+
- pymysql=0.10.1
4645
- pyreadstat=1.1.0
46+
- pytables=3.6.1
47+
- python-snappy=0.6.0
4748
- pyxlsb=1.0.6
4849
- s3fs=0.4.0
4950
- scipy=1.4.1
50-
- snappy=1.1.8
5151
- sqlalchemy=1.4.0
5252
- tabulate=0.8.7
5353
- xarray=0.15.1

ci/deps/actions-38.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@ dependencies:
3737
- odfpy
3838
- pandas-gbq
3939
- psycopg2
40-
- pymysql
41-
- pytables
4240
- pyarrow
41+
- pymysql
4342
- pyreadstat
43+
- pytables
44+
- python-snappy
4445
- pyxlsb
4546
- s3fs
4647
- scipy
47-
- snappy
4848
- sqlalchemy
4949
- tabulate
5050
- xarray

ci/deps/actions-39.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ dependencies:
3838
- pandas-gbq
3939
- psycopg2
4040
- pymysql
41-
- pytables
4241
- pyarrow
4342
- pyreadstat
43+
- pytables
44+
- python-snappy
4445
- pyxlsb
4546
- s3fs
4647
- scipy
47-
- snappy
4848
- sqlalchemy
4949
- tabulate
5050
- xarray

ci/deps/circle-38-arm64.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-xdist>=1.31
1111
- hypothesis>=5.5.3
12+
- pytest-asyncio
1213

1314
# pandas dependencies
1415
- botocore>=1.11

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,6 @@ Compression
411411
Dependency Minimum Version Notes
412412
========================= ================== =============================================================
413413
brotli 0.7.0 Brotli compression
414-
snappy 1.1.8 Snappy compression
414+
python-snappy 0.6.0 Snappy compression
415415
Zstandard 0.15.2 Zstandard compression
416416
========================= ================== =============================================================

doc/source/user_guide/enhancingperf.rst

+22
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,28 @@ a larger amount of data points (e.g. 1+ million).
350350
In [6]: %timeit roll.apply(f, engine='cython', raw=True)
351351
3.92 s ± 59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
352352
353+
If your compute hardware contains multiple CPUs, the largest performance gain can be realized by setting ``parallel`` to ``True``
354+
to leverage more than 1 CPU. Internally, pandas leverages numba to parallelize computations over the columns of a :class:`DataFrame`;
355+
therefore, this performance benefit is only beneficial for a :class:`DataFrame` with a large number of columns.
356+
357+
.. code-block:: ipython
358+
359+
In [1]: import numba
360+
361+
In [2]: numba.set_num_threads(1)
362+
363+
In [3]: df = pd.DataFrame(np.random.randn(10_000, 100))
364+
365+
In [4]: roll = df.rolling(100)
366+
367+
In [5]: %timeit roll.mean(engine="numba", engine_kwargs={"parallel": True})
368+
347 ms ± 26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
369+
370+
In [6]: numba.set_num_threads(2)
371+
372+
In [7]: %timeit roll.mean(engine="numba", engine_kwargs={"parallel": True})
373+
201 ms ± 2.97 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
374+
353375
Custom Function Examples
354376
~~~~~~~~~~~~~~~~~~~~~~~~
355377

doc/source/whatsnew/v1.4.2.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ Fixed regressions
1818
- Fixed regression in :func:`read_csv` killing python process when invalid file input was given for ``engine="c"`` (:issue:`45957`)
1919
- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`)
2020
- Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`)
21-
- Fixed regression in :meth:`DataFrame.loc.__setitem__` losing :class:`MultiIndex` names if :class:`DataFrame` was empty before (:issue:`46317`)
21+
- Fixed regression in :meth:`DataFrame.replace` when a replacement value was also a target for replacement (:issue:`46306`)
22+
- Fixed regression when setting values with :meth:`DataFrame.loc` losing :class:`MultiIndex` names if :class:`DataFrame` was empty before (:issue:`46317`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.5.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ Performance improvements
311311
- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`)
312312
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`)
313313
- Performance improvement in :meth:`MultiIndex.get_locs` (:issue:`45681`, :issue:`46040`)
314+
- Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`)
314315
- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`)
315316
- Performance improvement in :meth:`DataFrame.join` when left and/or right are empty (:issue:`46015`)
316317
- Performance improvement in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when target is a :class:`MultiIndex` (:issue:`46235`)
@@ -394,6 +395,7 @@ Indexing
394395
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
395396
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
396397
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
398+
- Bug in :meth:`DataFrame.__getattribute__` raising ``AttributeError`` if columns have ``"string"`` dtype (:issue:`46185`)
397399
- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)
398400
- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`)
399401
- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`)
@@ -453,6 +455,7 @@ Groupby/resample/rolling
453455
- Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`)
454456
- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
455457
- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
458+
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
456459
-
457460

458461
Reshaping

pandas/_libs/groupby.pyx

+9-4
Original file line numberDiff line numberDiff line change
@@ -989,13 +989,18 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
989989
return False
990990

991991

992-
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max):
992+
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike):
993993
"""
994994
Find either the min or the max supported by numeric_t; 'val' is a placeholder
995995
to effectively make numeric_t an argument.
996996
"""
997997
if numeric_t is int64_t:
998-
return -_int64_max if compute_max else util.INT64_MAX
998+
if compute_max and is_datetimelike:
999+
return -_int64_max
1000+
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
1001+
# instead of NPY_NAT here causes build warnings and failure in
1002+
# test_cummax_i8_at_implementation_bound
1003+
return NPY_NAT if compute_max else util.INT64_MAX
9991004
elif numeric_t is int32_t:
10001005
return util.INT32_MIN if compute_max else util.INT32_MAX
10011006
elif numeric_t is int16_t:
@@ -1395,7 +1400,7 @@ cdef group_min_max(
13951400
nobs = np.zeros((<object>out).shape, dtype=np.int64)
13961401

13971402
group_min_or_max = np.empty_like(out)
1398-
group_min_or_max[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max)
1403+
group_min_or_max[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max, is_datetimelike)
13991404

14001405
if iu_64_floating_t is int64_t:
14011406
# TODO: only if is_datetimelike?
@@ -1564,7 +1569,7 @@ cdef group_cummin_max(
15641569
bint isna_entry
15651570

15661571
accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
1567-
accum[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max)
1572+
accum[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max, is_datetimelike)
15681573

15691574
na_val = _get_na_val(<iu_64_floating_t>0, is_datetimelike)
15701575

0 commit comments

Comments
 (0)