Skip to content

Commit c3d5b77

Browse files
committed
Merge branch 'main' into 37715-remove-mypy-ignore-V
2 parents 97531ec + be170fc commit c3d5b77

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+602
-564
lines changed

.github/workflows/ubuntu.yml

+1-6
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,6 @@ jobs:
6060
env_file: actions-310.yaml
6161
pattern: "not slow and not network and not single_cpu"
6262
pandas_copy_on_write: "1"
63-
- name: "Data Manager"
64-
env_file: actions-38.yaml
65-
pattern: "not slow and not network and not single_cpu"
66-
pandas_data_manager: "array"
6763
- name: "Pypy"
6864
env_file: actions-pypy-38.yaml
6965
pattern: "not slow and not network and not single_cpu"
@@ -86,7 +82,6 @@ jobs:
8682
EXTRA_APT: ${{ matrix.extra_apt || '' }}
8783
LANG: ${{ matrix.lang || '' }}
8884
LC_ALL: ${{ matrix.lc_all || '' }}
89-
PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
9085
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
9186
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
9287
TEST_ARGS: ${{ matrix.test_args || '' }}
@@ -97,7 +92,7 @@ jobs:
9792
COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }}
9893
concurrency:
9994
# https://github.community/t/concurrecy-not-work-for-push/183068/7
100-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }}
95+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
10196
cancel-in-progress: true
10297

10398
services:

ci/fix_wheels.py

+35-32
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
1+
"""
2+
This file "repairs" our Windows wheels by copying the necessary DLLs for pandas to run
3+
on a barebones Windows installation() into the wheel.
4+
5+
NOTE: The paths for the DLLs are hard-coded to the location of the Visual Studio
6+
redistributables
7+
"""
18
import os
29
import shutil
10+
import subprocess
11+
from subprocess import CalledProcessError
312
import sys
413
import zipfile
514

@@ -18,41 +27,35 @@
1827
raise ValueError(
1928
"User must pass the path to the wheel and the destination directory."
2029
)
21-
# Wheels are zip files
2230
if not os.path.isdir(dest_dir):
2331
print(f"Created directory {dest_dir}")
2432
os.mkdir(dest_dir)
25-
shutil.copy(wheel_path, dest_dir) # Remember to delete if process fails
33+
2634
wheel_name = os.path.basename(wheel_path)
2735
success = True
28-
exception = None
29-
repaired_wheel_path = os.path.join(dest_dir, wheel_name)
30-
with zipfile.ZipFile(repaired_wheel_path, "a") as zipf:
31-
try:
32-
# TODO: figure out how licensing works for the redistributables
33-
base_redist_dir = (
34-
f"C:/Program Files (x86)/Microsoft Visual Studio/2019/"
35-
f"Enterprise/VC/Redist/MSVC/14.29.30133/{PYTHON_ARCH}/"
36-
f"Microsoft.VC142.CRT/"
37-
)
38-
zipf.write(
39-
os.path.join(base_redist_dir, "msvcp140.dll"),
40-
"pandas/_libs/window/msvcp140.dll",
41-
)
42-
zipf.write(
43-
os.path.join(base_redist_dir, "concrt140.dll"),
44-
"pandas/_libs/window/concrt140.dll",
45-
)
46-
if not is_32:
47-
zipf.write(
48-
os.path.join(base_redist_dir, "vcruntime140_1.dll"),
49-
"pandas/_libs/window/vcruntime140_1.dll",
50-
)
51-
except Exception as e:
52-
success = False
53-
exception = e
5436

55-
if not success:
56-
os.remove(repaired_wheel_path)
57-
raise exception
58-
print(f"Successfully repaired wheel was written to {repaired_wheel_path}")
37+
try:
38+
# Use the wheel CLI for zipping up the wheel since the CLI will
39+
# take care of rebuilding the hashes found in the record file
40+
tmp_dir = os.path.join(dest_dir, "tmp")
41+
with zipfile.ZipFile(wheel_path, "r") as f:
42+
# Extracting all the members of the zip
43+
# into a specific location.
44+
f.extractall(path=tmp_dir)
45+
base_redist_dir = (
46+
f"C:/Program Files (x86)/Microsoft Visual Studio/2019/"
47+
f"Enterprise/VC/Redist/MSVC/14.29.30133/{PYTHON_ARCH}/"
48+
f"Microsoft.VC142.CRT/"
49+
)
50+
required_dlls = ["msvcp140.dll", "concrt140.dll"]
51+
if not is_32:
52+
required_dlls += ["vcruntime140_1.dll"]
53+
dest_dll_dir = os.path.join(tmp_dir, "pandas/_libs/window")
54+
for dll in required_dlls:
55+
src = os.path.join(base_redist_dir, dll)
56+
shutil.copy(src, dest_dll_dir)
57+
subprocess.run(["wheel", "pack", tmp_dir, "-d", dest_dir], check=True)
58+
except CalledProcessError:
59+
print("Failed to add DLLS to wheel.")
60+
sys.exit(1)
61+
print("Successfully repaired wheel")

ci/run_tests.sh

-15
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,3 @@ fi
3232

3333
echo $PYTEST_CMD
3434
sh -c "$PYTEST_CMD"
35-
36-
if [[ "$PANDAS_DATA_MANAGER" != "array" && "$PYTEST_TARGET" == "pandas" ]]; then
37-
# The ArrayManager tests should have already been run by PYTEST_CMD if PANDAS_DATA_MANAGER was already set to array
38-
# If we're targeting specific files, e.g. test_downstream.py, don't run.
39-
PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas"
40-
41-
if [[ "$PATTERN" ]]; then
42-
PYTEST_AM_CMD="$PYTEST_AM_CMD -m \"$PATTERN and arraymanager\""
43-
else
44-
PYTEST_AM_CMD="$PYTEST_AM_CMD -m \"arraymanager\""
45-
fi
46-
47-
echo $PYTEST_AM_CMD
48-
sh -c "$PYTEST_AM_CMD"
49-
fi

ci/test_wheels.py

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import shutil
44
import subprocess
5+
from subprocess import CalledProcessError
56
import sys
67

78
if os.name == "nt":
@@ -15,6 +16,17 @@
1516
wheel_path = None
1617
print(f"IS_32_BIT is {is_32_bit}")
1718
print(f"Path to built wheel is {wheel_path}")
19+
20+
print("Verifying file hashes in wheel RECORD file")
21+
try:
22+
tmp_dir = "tmp"
23+
subprocess.run(["wheel", "unpack", wheel_path, "-d", tmp_dir], check=True)
24+
except CalledProcessError:
25+
print("wheel RECORD file hash verification failed.")
26+
sys.exit(1)
27+
finally:
28+
shutil.rmtree(tmp_dir)
29+
1830
if is_32_bit:
1931
sys.exit(0) # No way to test Windows 32-bit(no docker image)
2032
if wheel_path is None:

doc/source/development/extending.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ Below is an example to define two original properties, "internal_cache" as a tem
450450
Plotting backends
451451
-----------------
452452

453-
Starting in 0.25 pandas can be extended with third-party plotting backends. The
453+
pandas can be extended with third-party plotting backends. The
454454
main idea is letting users select a plotting backend different than the provided
455455
one based on Matplotlib. For example:
456456

doc/source/getting_started/install.rst

-8
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,6 @@ to install pandas with the optional dependencies to read Excel files.
149149

150150
The full list of extras that can be installed can be found in the :ref:`dependency section.<install.optional_dependencies>`
151151

152-
Installing with ActivePython
153-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
154-
155-
Installation instructions for
156-
`ActivePython <https://www.activestate.com/products/python/>`__ can be found
157-
`here <https://www.activestate.com/products/python/>`__. Versions
158-
2.7, 3.5 and 3.6 include pandas.
159-
160152
Installing using your Linux distribution's package manager.
161153
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
162154

doc/source/user_guide/advanced.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ If you select a label *contained* within an interval, this will also select the
918918
df.loc[2.5]
919919
df.loc[[2.5, 3.5]]
920920
921-
Selecting using an ``Interval`` will only return exact matches (starting from pandas 0.25.0).
921+
Selecting using an ``Interval`` will only return exact matches.
922922

923923
.. ipython:: python
924924

doc/source/user_guide/io.rst

+2-7
Original file line numberDiff line numberDiff line change
@@ -3999,7 +3999,7 @@ any pickled pandas object (or any other pickled object) from file:
39993999

40004000
.. warning::
40014001

4002-
:func:`read_pickle` is only guaranteed backwards compatible back to pandas version 0.20.3
4002+
:func:`read_pickle` is only guaranteed backwards compatible back to a few minor release.
40034003

40044004
.. _io.pickle.compression:
40054005

@@ -5922,11 +5922,6 @@ And then issue the following queries:
59225922
Google BigQuery
59235923
---------------
59245924

5925-
.. warning::
5926-
5927-
Starting in 0.20.0, pandas has split off Google BigQuery support into the
5928-
separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it.
5929-
59305925
The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery.
59315926

59325927
pandas integrates with this external package. if ``pandas-gbq`` is installed, you can
@@ -6114,7 +6109,7 @@ SAS formats
61146109
-----------
61156110

61166111
The top-level function :func:`read_sas` can read (but not write) SAS
6117-
XPORT (.xpt) and (since *v0.18.0*) SAS7BDAT (.sas7bdat) format files.
6112+
XPORT (.xpt) and SAS7BDAT (.sas7bdat) format files.
61186113

61196114
SAS files only contain two value types: ASCII text and floating point
61206115
values (usually 8 bytes but sometimes truncated). For xport files,

doc/source/user_guide/merging.rst

-6
Original file line numberDiff line numberDiff line change
@@ -510,12 +510,6 @@ all standard database join operations between ``DataFrame`` or named ``Series``
510510
dataset.
511511
* "many_to_many" or "m:m": allowed, but does not result in checks.
512512

513-
.. note::
514-
515-
Support for specifying index levels as the ``on``, ``left_on``, and
516-
``right_on`` parameters was added in version 0.23.0.
517-
Support for merging named ``Series`` objects was added in version 0.24.0.
518-
519513
The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` or named ``Series``
520514
and ``right`` is a subclass of ``DataFrame``, the return type will still be ``DataFrame``.
521515

doc/source/user_guide/missing_data.rst

-5
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,6 @@ account for missing data. For example:
182182
Sum/prod of empties/nans
183183
~~~~~~~~~~~~~~~~~~~~~~~~
184184

185-
.. warning::
186-
187-
This behavior is now standard as of v0.22.0 and is consistent with the default in ``numpy``; previously sum/prod of all-NA or empty Series/DataFrames would return NaN.
188-
See :ref:`v0.22.0 whatsnew <whatsnew_0220>` for more.
189-
190185
The sum of an empty or all-NA Series or column of a DataFrame is 0.
191186

192187
.. ipython:: python

doc/source/user_guide/text.rst

+1-16
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,7 @@ and replacing any remaining whitespaces with underscores:
206206

207207
.. warning::
208208

209-
Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
210-
v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously.
209+
The type of the Series is inferred and the allowed types (i.e. strings).
211210

212211
Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
213212
exceptions, other uses are not supported, and may be disabled at a later point.
@@ -423,11 +422,6 @@ the ``join``-keyword.
423422
s.str.cat(u)
424423
s.str.cat(u, join="left")
425424
426-
.. warning::
427-
428-
If the ``join`` keyword is not passed, the method :meth:`~Series.str.cat` will currently fall back to the behavior before version 0.23.0 (i.e. no alignment),
429-
but a ``FutureWarning`` will be raised if any of the involved indexes differ, since this default will change to ``join='left'`` in a future version.
430-
431425
The usual options are available for ``join`` (one of ``'left', 'outer', 'inner', 'right'``).
432426
In particular, alignment also means that the different lengths do not need to coincide anymore.
433427

@@ -503,15 +497,6 @@ Extracting substrings
503497
Extract first match in each subject (extract)
504498
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
505499

506-
.. warning::
507-
508-
Before version 0.23, argument ``expand`` of the ``extract`` method defaulted to
509-
``False``. When ``expand=False``, ``expand`` returns a ``Series``, ``Index``, or
510-
``DataFrame``, depending on the subject and regular expression
511-
pattern. When ``expand=True``, it always returns a ``DataFrame``,
512-
which is more consistent and less confusing from the perspective of a user.
513-
``expand=True`` has been the default since version 0.23.0.
514-
515500
The ``extract`` method accepts a `regular expression
516501
<https://docs.python.org/3/library/re.html>`__ with at least one
517502
capture group.

doc/source/user_guide/visualization.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1794,7 +1794,7 @@ when plotting a large number of points.
17941794
Plotting backends
17951795
-----------------
17961796

1797-
Starting in version 0.25, pandas can be extended with third-party plotting backends. The
1797+
pandas can be extended with third-party plotting backends. The
17981798
main idea is letting users select a plotting backend different than the provided
17991799
one based on Matplotlib.
18001800

doc/source/whatsnew/v0.14.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :is
506506

507507
You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and
508508
for the **columns**. Their are some ambiguous cases where the passed indexer could be mis-interpreted
509-
as indexing *both* axes, rather than into say the MuliIndex for the rows.
509+
as indexing *both* axes, rather than into say the MultiIndex for the rows.
510510

511511
You should do this:
512512

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Fixed regression in :meth:`Series.describe` showing ``RuntimeWarning`` for extension dtype :class:`Series` with one element (:issue:`52515`)
1818
- Fixed regression in :meth:`DataFrame.sort_values` not resetting index when :class:`DataFrame` is already sorted and ``ignore_index=True`` (:issue:`52553`)
1919
- Fixed regression in :meth:`MultiIndex.isin` raising ``TypeError`` for ``Generator`` (:issue:`52568`)
20+
- Fixed regression in :meth:`DataFrame.pivot` changing :class:`Index` name of input object (:issue:`52629`)
2021

2122
.. ---------------------------------------------------------------------------
2223
.. _whatsnew_201.bug_fixes:

doc/source/whatsnew/v2.1.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,10 @@ Deprecations
230230
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
231231
- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
232232
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
233+
- Deprecated :func:`is_interval_dtype`, check ``isinstance(dtype, pd.IntervalDtype)`` instead (:issue:`52607`)
234+
- Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
235+
- Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
236+
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
233237
-
234238

235239
.. ---------------------------------------------------------------------------

doc/sphinxext/announce.py

+14
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939

4040
from git import Repo
4141

42+
# Contributors to be renamed.
43+
CONTRIBUTOR_MAPPING = {"znkjnffrezna": "znetbgcubravk"}
44+
4245
UTF8Writer = codecs.getwriter("utf8")
4346
this_repo = Repo(os.path.join(os.path.dirname(__file__), "..", ".."))
4447

@@ -87,6 +90,17 @@ def get_authors(revision_range):
8790
cur.discard("Homu")
8891
pre.discard("Homu")
8992

93+
# Rename contributors according to mapping.
94+
for old_name, new_name in CONTRIBUTOR_MAPPING.items():
95+
old_name_decoded = codecs.decode(old_name, "rot13")
96+
new_name_decoded = codecs.decode(new_name, "rot13")
97+
if old_name_decoded in pre:
98+
pre.discard(old_name_decoded)
99+
pre.add(new_name_decoded)
100+
if old_name_decoded in cur:
101+
cur.discard(old_name_decoded)
102+
cur.add(new_name_decoded)
103+
90104
# Append '+' to new authors.
91105
authors = [s + " +" for s in cur - pre] + list(cur & pre)
92106
authors.sort()

pandas/_libs/lib.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ NoDefault = Literal[_NoDefault.no_default]
3636
i8max: int
3737
u8max: int
3838

39+
def is_np_dtype(dtype: object, kinds: str | None = ...) -> bool: ...
3940
def item_from_zerodim(val: object) -> object: ...
4041
def infer_dtype(value: object, skipna: bool = ...) -> str: ...
4142
def is_iterator(obj: object) -> bool: ...

pandas/_libs/lib.pyx

+27
Original file line numberDiff line numberDiff line change
@@ -3070,3 +3070,30 @@ def dtypes_all_equal(list types not None) -> bool:
30703070
return False
30713071
else:
30723072
return True
3073+
3074+
3075+
def is_np_dtype(object dtype, str kinds=None) -> bool:
3076+
"""
3077+
Optimized check for `isinstance(dtype, np.dtype)` with
3078+
optional `and dtype.kind in kinds`.
3079+
3080+
dtype = np.dtype("m8[ns]")
3081+
3082+
In [7]: %timeit isinstance(dtype, np.dtype)
3083+
117 ns ± 1.91 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
3084+
3085+
In [8]: %timeit is_np_dtype(dtype)
3086+
64 ns ± 1.51 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
3087+
3088+
In [9]: %timeit is_timedelta64_dtype(dtype)
3089+
209 ns ± 6.96 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
3090+
3091+
In [10]: %timeit is_np_dtype(dtype, "m")
3092+
93.4 ns ± 1.11 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
3093+
"""
3094+
if not cnp.PyArray_DescrCheck(dtype):
3095+
# i.e. not isinstance(dtype, np.dtype)
3096+
return False
3097+
if kinds is None:
3098+
return True
3099+
return dtype.kind in kinds

0 commit comments

Comments
 (0)