Skip to content

Commit 1d187ca

Browse files
Merge remote-tracking branch 'upstream/main' into td-construction
2 parents e63b8fd + c4027ad commit 1d187ca

File tree

87 files changed

+1032
-558
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1032
-558
lines changed

.github/workflows/posix.yml

-13
Original file line numberDiff line numberDiff line change
@@ -155,24 +155,11 @@ jobs:
155155
channel-priority: flexible
156156
environment-file: ${{ env.ENV_FILE }}
157157
use-only-tar-bz2: true
158-
if: ${{ env.IS_PYPY == 'false' }} # No pypy3.8 support
159158

160159
- name: Upgrade Arrow version
161160
run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=${{ matrix.pyarrow_version }}
162161
if: ${{ matrix.pyarrow_version }}
163162

164-
- name: Setup PyPy
165-
uses: actions/setup-python@v3
166-
with:
167-
python-version: "pypy-3.8"
168-
if: ${{ env.IS_PYPY == 'true' }}
169-
170-
- name: Setup PyPy dependencies
171-
run: |
172-
# TODO: re-enable cov, its slowing the tests down though
173-
pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 pytest-asyncio>=0.17 hypothesis>=5.5.3
174-
if: ${{ env.IS_PYPY == 'true' }}
175-
176163
- name: Build Pandas
177164
uses: ./.github/actions/build_pandas
178165

asv_bench/benchmarks/strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def time_get_dummies(self, dtype):
268268

269269
class Encode:
270270
def setup(self):
271-
self.ser = Series(tm.makeUnicodeIndex())
271+
self.ser = Series(tm.makeStringIndex())
272272

273273
def time_encode_decode(self):
274274
self.ser.str.encode("utf-8").str.decode("utf-8")

ci/deps/actions-38-minimum_versions.yaml

+23-24
Original file line numberDiff line numberDiff line change
@@ -20,38 +20,37 @@ dependencies:
2020
- numpy=1.18.5
2121
- pytz=2020.1
2222

23-
# optional dependencies, markupsafe for jinja2
24-
- beautifulsoup4=4.8.2
25-
- blosc=1.20.1
26-
- bottleneck=1.3.1
23+
# optional dependencies
24+
- beautifulsoup4=4.9.3
25+
- blosc=1.21.0
26+
- bottleneck=1.3.2
2727
- brotlipy=0.7.0
2828
- fastparquet=0.4.0
29-
- fsspec=0.7.4
29+
- fsspec=2021.05.0
3030
- html5lib=1.1
31-
- hypothesis=5.5.3
32-
- gcsfs=0.6.0
33-
- jinja2=2.11
34-
- lxml=4.5.0
35-
- markupsafe=2.0.1
31+
- hypothesis=6.13.0
32+
- gcsfs=2021.05.0
33+
- jinja2=3.0.0
34+
- lxml=4.6.3
3635
- matplotlib=3.3.2
37-
- numba=0.50.1
38-
- numexpr=2.7.1
36+
- numba=0.53.1
37+
- numexpr=2.7.3
3938
- odfpy=1.4.1
40-
- openpyxl=3.0.3
41-
- pandas-gbq=0.14.0
42-
- psycopg2=2.8.4
39+
- openpyxl=3.0.7
40+
- pandas-gbq=0.15.0
41+
- psycopg2=2.8.6
4342
- pyarrow=1.0.1
44-
- pymysql=0.10.1
45-
- pyreadstat=1.1.0
43+
- pymysql=1.0.2
44+
- pyreadstat=1.1.2
4645
- pytables=3.6.1
4746
- python-snappy=0.6.0
48-
- pyxlsb=1.0.6
49-
- s3fs=0.4.0
50-
- scipy=1.4.1
51-
- sqlalchemy=1.4.0
52-
- tabulate=0.8.7
53-
- xarray=0.15.1
47+
- pyxlsb=1.0.8
48+
- s3fs=2021.05.0
49+
- scipy=1.7.1
50+
- sqlalchemy=1.4.16
51+
- tabulate=0.8.9
52+
- xarray=0.19.0
5453
- xlrd=2.0.1
55-
- xlsxwriter=1.2.2
54+
- xlsxwriter=1.4.3
5655
- xlwt=1.3.0
5756
- zstandard=0.15.2

ci/deps/actions-pypy-38.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ dependencies:
1111
- cython>=0.29.24
1212
- pytest>=6.0
1313
- pytest-cov
14+
- pytest-asyncio
1415
- pytest-xdist>=1.31
1516
- hypothesis>=5.5.3
1617

doc/source/whatsnew/v1.4.3.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1818
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
19-
-
19+
- Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`)
20+
- Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`)
2021

2122
.. ---------------------------------------------------------------------------
2223

doc/source/whatsnew/v1.5.0.rst

+53-2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ Other enhancements
150150
- Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`)
151151
- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
152152
- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`)
153+
- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`)
153154

154155
.. ---------------------------------------------------------------------------
155156
.. _whatsnew_150.notable_bug_fixes:
@@ -362,7 +363,50 @@ If installed, we now require:
362363
+=================+=================+==========+=========+
363364
| mypy (dev) | 0.950 | | X |
364365
+-----------------+-----------------+----------+---------+
365-
366+
| beautifulsoup4 | 4.9.3 | | X |
367+
+-----------------+-----------------+----------+---------+
368+
| blosc | 1.21.0 | | X |
369+
+-----------------+-----------------+----------+---------+
370+
| bottleneck | 1.3.2 | | X |
371+
+-----------------+-----------------+----------+---------+
372+
| fsspec | 2021.05.0 | | X |
373+
+-----------------+-----------------+----------+---------+
374+
| hypothesis | 6.13.0 | | X |
375+
+-----------------+-----------------+----------+---------+
376+
| gcsfs | 2021.05.0 | | X |
377+
+-----------------+-----------------+----------+---------+
378+
| jinja2 | 3.0.0 | | X |
379+
+-----------------+-----------------+----------+---------+
380+
| lxml | 4.6.3 | | X |
381+
+-----------------+-----------------+----------+---------+
382+
| numba | 0.53.1 | | X |
383+
+-----------------+-----------------+----------+---------+
384+
| numexpr | 2.7.3 | | X |
385+
+-----------------+-----------------+----------+---------+
386+
| openpyxl | 3.0.7 | | X |
387+
+-----------------+-----------------+----------+---------+
388+
| pandas-gbq | 0.15.0 | | X |
389+
+-----------------+-----------------+----------+---------+
390+
| psycopg2 | 2.8.6 | | X |
391+
+-----------------+-----------------+----------+---------+
392+
| pymysql | 1.0.2 | | X |
393+
+-----------------+-----------------+----------+---------+
394+
| pyreadstat | 1.1.2 | | X |
395+
+-----------------+-----------------+----------+---------+
396+
| pyxlsb | 1.0.8 | | X |
397+
+-----------------+-----------------+----------+---------+
398+
| s3fs | 2021.05.0 | | X |
399+
+-----------------+-----------------+----------+---------+
400+
| scipy | 1.7.1 | | X |
401+
+-----------------+-----------------+----------+---------+
402+
| sqlalchemy | 1.4.16 | | X |
403+
+-----------------+-----------------+----------+---------+
404+
| tabulate | 0.8.9 | | X |
405+
+-----------------+-----------------+----------+---------+
406+
| xarray | 0.19.0 | | X |
407+
+-----------------+-----------------+----------+---------+
408+
| xlsxwriter | 1.4.3 | | X |
409+
+-----------------+-----------------+----------+---------+
366410

367411
For `optional libraries <https://pandas.pydata.org/docs/getting_started/install.html>`_ the general recommendation is to use the latest version.
368412
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
@@ -493,7 +537,8 @@ retained by specifying ``group_keys=False``.
493537
``numeric_only`` default value
494538
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
495539

496-
Across the DataFrame operations such as ``min``, ``sum``, and ``idxmax``, the default
540+
Across the DataFrame and DataFrameGroupBy operations such as
541+
``min``, ``sum``, and ``idxmax``, the default
497542
value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
498543
Furthermore, operations with the default value ``None`` can lead to surprising
499544
results. (:issue:`46560`)
@@ -523,6 +568,8 @@ gained the ``numeric_only`` argument.
523568
- :meth:`DataFrame.cov`
524569
- :meth:`DataFrame.idxmin`
525570
- :meth:`DataFrame.idxmax`
571+
- :meth:`.DataFrameGroupBy.cummin`
572+
- :meth:`.DataFrameGroupBy.cummax`
526573
- :meth:`.DataFrameGroupBy.idxmin`
527574
- :meth:`.DataFrameGroupBy.idxmax`
528575
- :meth:`.GroupBy.var`
@@ -553,6 +600,8 @@ Other Deprecations
553600
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
554601
- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
555602
- Deprecated positional arguments to :meth:`Index.join` except for ``other``, use keyword-only arguments instead of positional arguments (:issue:`46518`)
603+
- Deprecated indexing on a timezone-naive :class:`DatetimeIndex` using a string representing a timezone-aware datetime (:issue:`46903`, :issue:`36148`)
604+
-
556605

557606
.. ---------------------------------------------------------------------------
558607
.. _whatsnew_150.performance:
@@ -594,6 +643,7 @@ Datetimelike
594643
- Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`)
595644
- Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`)
596645
- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`)
646+
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
597647
-
598648

599649
Timedelta
@@ -745,6 +795,7 @@ Reshaping
745795
- Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
746796
- Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`)
747797
- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`)
798+
- Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`)
748799
-
749800

750801
Sparse

pandas/_libs/src/ujson/lib/ultrajson.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
2929
https://github.com/client9/stringencoders
3030
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
3131
32-
Numeric decoder derived from from TCL library
32+
Numeric decoder derived from TCL library
3333
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3434
* Copyright (c) 1988-1993 The Regents of the University of California.
3535
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/src/ujson/lib/ultrajsondec.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
3232
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights
3333
reserved.
3434
35-
Numeric decoder derived from from TCL library
35+
Numeric decoder derived from TCL library
3636
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3737
* Copyright (c) 1988-1993 The Regents of the University of California.
3838
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/src/ujson/lib/ultrajsonenc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
3232
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights
3333
reserved.
3434
35-
Numeric decoder derived from from TCL library
35+
Numeric decoder derived from TCL library
3636
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3737
* Copyright (c) 1988-1993 The Regents of the University of California.
3838
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/src/ujson/python/JSONtoObj.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
2929
https://github.com/client9/stringencoders
3030
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
3131
32-
Numeric decoder derived from from TCL library
32+
Numeric decoder derived from TCL library
3333
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3434
* Copyright (c) 1988-1993 The Regents of the University of California.
3535
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/src/ujson/python/objToJSON.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
3030
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights
3131
reserved.
3232
33-
Numeric decoder derived from from TCL library
33+
Numeric decoder derived from TCL library
3434
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3535
* Copyright (c) 1988-1993 The Regents of the University of California.
3636
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/src/ujson/python/ujson.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
2929
https://github.com/client9/stringencoders
3030
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
3131
32-
Numeric decoder derived from from TCL library
32+
Numeric decoder derived from TCL library
3333
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3434
* Copyright (c) 1988-1993 The Regents of the University of California.
3535
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/src/ujson/python/version.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
2929
https://github.com/client9/stringencoders
3030
Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
3131
32-
Numeric decoder derived from from TCL library
32+
Numeric decoder derived from TCL library
3333
https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
3434
* Copyright (c) 1988-1993 The Regents of the University of California.
3535
* Copyright (c) 1994 Sun Microsystems, Inc.

pandas/_libs/tslib.pyx

+11-12
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ from pandas._libs.tslibs.nattype cimport (
6060
)
6161
from pandas._libs.tslibs.timestamps cimport _Timestamp
6262

63+
from pandas._libs.tslibs import (
64+
Resolution,
65+
get_resolution,
66+
)
6367
from pandas._libs.tslibs.timestamps import Timestamp
6468

6569
# Note: this is the only non-tslibs intra-pandas dependency here
@@ -122,11 +126,11 @@ def format_array_from_datetime(
122126
"""
123127
cdef:
124128
int64_t val, ns, N = len(values)
125-
ndarray[int64_t] consider_values
126129
bint show_ms = False, show_us = False, show_ns = False
127130
bint basic_format = False
128131
ndarray[object] result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
129-
object ts, res
132+
_Timestamp ts
133+
str res
130134
npy_datetimestruct dts
131135

132136
if na_rep is None:
@@ -136,16 +140,10 @@ def format_array_from_datetime(
136140
# a format based on precision
137141
basic_format = format is None and tz is None
138142
if basic_format:
139-
consider_values = values[values != NPY_NAT]
140-
show_ns = (consider_values % 1000).any()
141-
142-
if not show_ns:
143-
consider_values //= 1000
144-
show_us = (consider_values % 1000).any()
145-
146-
if not show_ms:
147-
consider_values //= 1000
148-
show_ms = (consider_values % 1000).any()
143+
reso_obj = get_resolution(values)
144+
show_ns = reso_obj == Resolution.RESO_NS
145+
show_us = reso_obj == Resolution.RESO_US
146+
show_ms = reso_obj == Resolution.RESO_MS
149147

150148
for i in range(N):
151149
val = values[i]
@@ -178,6 +176,7 @@ def format_array_from_datetime(
178176
# invalid format string
179177
# requires dates > 1900
180178
try:
179+
# Note: dispatches to pydatetime
181180
result[i] = ts.strftime(format)
182181
except ValueError:
183182
result[i] = str(ts)

pandas/_libs/tslibs/fields.pyi

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def get_start_end_field(
2222
def get_date_field(
2323
dtindex: npt.NDArray[np.int64], # const int64_t[:]
2424
field: str,
25+
reso: int = ..., # NPY_DATETIMEUNIT
2526
) -> npt.NDArray[np.int32]: ...
2627
def get_timedelta_field(
2728
tdindex: npt.NDArray[np.int64], # const int64_t[:]
@@ -32,6 +33,7 @@ def isleapyear_arr(
3233
) -> npt.NDArray[np.bool_]: ...
3334
def build_isocalendar_sarray(
3435
dtindex: npt.NDArray[np.int64], # const int64_t[:]
36+
reso: int = ..., # NPY_DATETIMEUNIT
3537
) -> np.ndarray: ...
3638
def _get_locale_names(name_type: str, locale: str | None = ...): ...
3739

0 commit comments

Comments
 (0)