Skip to content

Commit 3146aae

Browse files
committed
Merge remote-tracking branch 'upstream/master' into typ_c_parser
2 parents de71573 + ff84f69 commit 3146aae

File tree

143 files changed

+1350
-1074
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+1350
-1074
lines changed

.github/workflows/python-dev.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
python -m pip install --upgrade pip setuptools wheel
5252
pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
5353
pip install git+https://github.com/nedbat/coveragepy.git
54-
pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov
54+
pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov pytest-timeout
5555
pip list
5656
5757
- name: Build Pandas

.pre-commit-config.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ repos:
111111
# Incorrect code-block / IPython directives
112112
|\.\.\ code-block\ ::
113113
|\.\.\ ipython\ ::
114+
# directive should not have a space before ::
115+
|\.\.\ \w+\ ::
114116
115117
# Check for deprecated messages without sphinx directive
116118
|(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)

azure-pipelines.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
4444
. ~/virtualenvs/pandas-dev/bin/activate && \
4545
python -m pip install --no-deps -U pip wheel setuptools && \
46-
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines && \
46+
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines pytest-timeout && \
4747
python setup.py build_ext -q -j2 && \
4848
python -m pip install --no-build-isolation -e . && \
4949
pytest -m 'not slow and not network and not clipboard' pandas --junitxml=test-data.xml"

ci/code_checks.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ fi
9393
### DOCSTRINGS ###
9494
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9595

96-
MSG='Validate docstrings (GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS01, SS02, SS03, SS04, SS05, PR03, PR04, PR05, PR08, PRO9, PR10, EX04, RT01, RT04, RT05, SA02, SA03)' ; echo $MSG
97-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS02,SS03,SS04,SS05,PR03,PR04,PR05,PR08,PR09,PR10,EX04,RT01,RT04,RT05,SA02,SA03
96+
MSG='Validate docstrings (GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS01, SS02, SS03, SS04, SS05, PR03, PR04, PR05, PR06, PR08, PR09, PR10, EX04, RT01, RT04, RT05, SA02, SA03)' ; echo $MSG
97+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS02,SS03,SS04,SS05,PR03,PR04,PR05,PR06,PR08,PR09,PR10,EX04,RT01,RT04,RT05,SA02,SA03
9898
RET=$(($RET + $?)) ; echo $MSG "DONE"
9999

100100
fi

ci/deps/actions-38-db-min.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- hypothesis>=5.5.3
1314

1415
# required

ci/deps/actions-38-db.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- cython>=0.29.24
99
- pytest>=6.0
1010
- pytest-xdist>=1.31
11+
- pytest-timeout
1112
- hypothesis>=5.5.3
1213
- pytest-cov>=2.10.1 # this is only needed in the coverage build, ref: GH 35737
1314

ci/deps/actions-38-locale.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- pytest-asyncio>=0.12.0
1314
- hypothesis>=5.5.3
1415

ci/deps/actions-38-locale_slow.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.31
13+
- pytest-timeout
1314
- hypothesis>=5.5.3
1415

1516
# pandas dependencies

ci/deps/actions-38-minimum_versions.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- hypothesis>=5.5.3
1314
- psutil
1415

ci/deps/actions-38-slow.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- hypothesis>=5.5.3
1314

1415
# pandas dependencies

ci/deps/actions-38.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.31
13+
- pytest-timeout
1314
- hypothesis>=5.5.3
1415

1516
# pandas dependencies

ci/deps/actions-39-numpydev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- pytest>=6.0
99
- pytest-cov
1010
- pytest-xdist>=1.31
11+
- pytest-timeout
1112
- hypothesis>=5.5.3
1213

1314
# pandas dependencies

ci/deps/actions-39-slow.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.31
13+
- pytest-timeout
1314
- hypothesis>=5.5.3
1415

1516
# pandas dependencies

ci/deps/actions-39.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- hypothesis>=5.5.3
1314

1415
# pandas dependencies

ci/deps/azure-macos-38.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
# tools
99
- pytest>=6.0
1010
- pytest-xdist>=1.31
11+
- pytest-timeout
1112
- hypothesis>=5.5.3
1213
- pytest-azurepipelines
1314

ci/deps/azure-windows-38.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- cython>=0.29.24
1010
- pytest>=6.0
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- hypothesis>=5.5.3
1314
- pytest-azurepipelines
1415

ci/deps/azure-windows-39.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- cython>=0.29.24
1010
- pytest>=6.0
1111
- pytest-xdist>=1.31
12+
- pytest-timeout
1213
- hypothesis>=5.5.3
1314
- pytest-azurepipelines
1415

ci/deps/circle-38-arm64.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- cython>=0.29.24
99
- pytest>=6.0
1010
- pytest-xdist>=1.31
11+
- pytest-timeout
1112
- hypothesis>=5.5.3
1213

1314
# pandas dependencies

doc/source/user_guide/io.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -3546,9 +3546,9 @@ with ``on_demand=True``.
35463546
Specifying sheets
35473547
+++++++++++++++++
35483548

3549-
.. note :: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names``.
3549+
.. note:: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names``.
35503550

3551-
.. note :: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets.
3551+
.. note:: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets.
35523552

35533553
* The arguments ``sheet_name`` allows specifying the sheet or sheets to read.
35543554
* The default value for ``sheet_name`` is 0, indicating to read the first sheet

doc/source/whatsnew/v1.3.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`)
1818
- Fixed regression in :func:`merge_asof` raising error when array was supplied as join key (:issue:`42844`)
19+
- Fixed regression when resampling :class:`DataFrame` with :class:`DateTimeIndex` with empty groups and ``uint8``, ``uint16`` or ``uint32`` columns incorrectly raising ``RuntimeError`` (:issue:`43329`)
1920
- Fixed regression in creating a :class:`DataFrame` from a timezone-aware :class:`Timestamp` scalar near a Daylight Savings Time transition (:issue:`42505`)
2021
- Fixed performance regression in :func:`read_csv` (:issue:`44106`)
2122
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)

doc/source/whatsnew/v1.4.0.rst

+10-2
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ representation of :class:`DataFrame` objects (:issue:`4889`).
198198

199199
Other enhancements
200200
^^^^^^^^^^^^^^^^^^
201+
- :meth:`concat` will preserve the ``attrs`` when it is the same for all objects and discard the ``attrs`` when they are different. (:issue:`41828`)
201202
- :class:`DataFrameGroupBy` operations with ``as_index=False`` now correctly retain ``ExtensionDtype`` dtypes for columns being grouped on (:issue:`41373`)
202203
- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`)
203204
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
@@ -226,6 +227,9 @@ Other enhancements
226227
``USFederalHolidayCalendar``. See also `Other API changes`_.
227228
- :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, :meth:`.Expanding.std` now support `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`44461`)
228229
- :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`)
230+
- Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`44746`)
231+
- :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`)
232+
-
229233

230234

231235
.. ---------------------------------------------------------------------------
@@ -543,7 +547,7 @@ Performance improvements
543547
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
544548
- Performance improvement when converting non-string arrays to string arrays (:issue:`34483`)
545549
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
546-
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`, :issue:`43142`, :issue:`43147`, :issue:`43307`, :issue:`43144`)
550+
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`, :issue:`43142`, :issue:`43147`, :issue:`43307`, :issue:`43144`, :issue:`44826`)
547551
- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`)
548552
- Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`)
549553
- Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`, :issue:`43578`)
@@ -640,6 +644,7 @@ Numeric
640644
- Bug in arithmetic operations involving :class:`RangeIndex` where the result would have the incorrect ``name`` (:issue:`43962`)
641645
- Bug in arithmetic operations involving :class:`Series` where the result could have the incorrect ``name`` when the operands having matching NA or matching tuple names (:issue:`44459`)
642646
- Bug in division with ``IntegerDtype`` or ``BooleanDtype`` array and NA scalar incorrectly raising (:issue:`44685`)
647+
- Bug in multiplying a :class:`Series` with ``FloatingDtype`` with a timedelta-like scalar incorrectly raising (:issue:`44772`)
643648
-
644649

645650
Conversion
@@ -649,7 +654,7 @@ Conversion
649654
- Bug in :class:`IntegerDtype` not allowing coercion from string dtype (:issue:`25472`)
650655
- Bug in :func:`to_datetime` with ``arg:xr.DataArray`` and ``unit="ns"`` specified raises TypeError (:issue:`44053`)
651656
- Bug in :meth:`DataFrame.convert_dtypes` not returning the correct type when a subclass does not overload :meth:`_constructor_sliced` (:issue:`43201`)
652-
-
657+
- Bug in :meth:`DataFrame.astype` not propagating ``attrs`` from the original :class:`DataFrame` (:issue:`44414`)
653658

654659
Strings
655660
^^^^^^^
@@ -701,6 +706,7 @@ Missing
701706
- Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`)
702707
- Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
703708
- Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`)
709+
- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`)
704710
-
705711

706712
MultiIndex
@@ -742,6 +748,8 @@ I/O
742748
- Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`)
743749
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
744750
- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
751+
- Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`)
752+
-
745753

746754
Period
747755
^^^^^^

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ dependencies:
6161
- pytest>=6.0
6262
- pytest-cov
6363
- pytest-xdist>=1.31
64+
- pytest-timeout
6465
- pytest-asyncio
6566
- pytest-instafail
6667

pandas/_config/config.py

-1
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,6 @@ def _warn_if_deprecated(key: str) -> bool:
642642
d = _get_deprecated_option(key)
643643
if d:
644644
if d.msg:
645-
print(d.msg)
646645
warnings.warn(d.msg, FutureWarning)
647646
else:
648647
msg = f"'{key}' is deprecated"

pandas/_testing/__init__.py

+53
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,15 @@
110110
UInt64Index,
111111
)
112112
from pandas.core.arrays import (
113+
BaseMaskedArray,
113114
DatetimeArray,
115+
ExtensionArray,
114116
PandasArray,
115117
PeriodArray,
116118
TimedeltaArray,
117119
period_array,
118120
)
121+
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
119122

120123
if TYPE_CHECKING:
121124
from pandas import (
@@ -1050,3 +1053,53 @@ def at(x):
10501053

10511054
def iat(x):
10521055
return x.iat
1056+
1057+
1058+
# -----------------------------------------------------------------------------
1059+
1060+
1061+
def shares_memory(left, right) -> bool:
1062+
"""
1063+
Pandas-compat for np.shares_memory.
1064+
"""
1065+
if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
1066+
return np.shares_memory(left, right)
1067+
elif isinstance(left, np.ndarray):
1068+
# Call with reversed args to get to unpacking logic below.
1069+
return shares_memory(right, left)
1070+
1071+
if isinstance(left, RangeIndex):
1072+
return False
1073+
if isinstance(left, MultiIndex):
1074+
return shares_memory(left._codes, right)
1075+
if isinstance(left, (Index, Series)):
1076+
return shares_memory(left._values, right)
1077+
1078+
if isinstance(left, NDArrayBackedExtensionArray):
1079+
return shares_memory(left._ndarray, right)
1080+
if isinstance(left, pd.core.arrays.SparseArray):
1081+
return shares_memory(left.sp_values, right)
1082+
1083+
if isinstance(left, ExtensionArray) and left.dtype == "string[pyarrow]":
1084+
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
1085+
if isinstance(right, ExtensionArray) and right.dtype == "string[pyarrow]":
1086+
# error: "ExtensionArray" has no attribute "_data"
1087+
left_pa_data = left._data # type: ignore[attr-defined]
1088+
# error: "ExtensionArray" has no attribute "_data"
1089+
right_pa_data = right._data # type: ignore[attr-defined]
1090+
left_buf1 = left_pa_data.chunk(0).buffers()[1]
1091+
right_buf1 = right_pa_data.chunk(0).buffers()[1]
1092+
return left_buf1 == right_buf1
1093+
1094+
if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
1095+
# By convention, we'll say these share memory if they share *either*
1096+
# the _data or the _mask
1097+
return np.shares_memory(left._data, right._data) or np.shares_memory(
1098+
left._mask, right._mask
1099+
)
1100+
1101+
if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
1102+
arr = left._mgr.arrays[0]
1103+
return shares_memory(arr, right)
1104+
1105+
raise NotImplementedError(type(left), type(right))

pandas/_testing/_warnings.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,16 @@ def _assert_caught_no_extra_warnings(
147147

148148
for actual_warning in caught_warnings:
149149
if _is_unexpected_warning(actual_warning, expected_warning):
150-
unclosed = "unclosed transport <asyncio.sslproto._SSLProtocolTransport"
151-
if actual_warning.category == ResourceWarning and unclosed in str(
152-
actual_warning.message
150+
# GH 44732: Don't make the CI flaky by filtering SSL-related
151+
# ResourceWarning from dependencies
152+
# GH#38630 pytest.filterwarnings does not suppress these.
153+
unclosed_ssl = (
154+
"unclosed transport <asyncio.sslproto._SSLProtocolTransport",
155+
"unclosed <ssl.SSLSocket",
156+
)
157+
if actual_warning.category == ResourceWarning and any(
158+
msg in str(actual_warning.message) for msg in unclosed_ssl
153159
):
154-
# FIXME(GH#38630): kludge because pytest.filterwarnings does not
155-
# suppress these
156160
continue
157161

158162
extra_warnings.append(

pandas/_testing/asserters.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1385,7 +1385,8 @@ def assert_equal(left, right, **kwargs):
13851385
assert kwargs == {}
13861386
assert left == right
13871387
else:
1388-
raise NotImplementedError(type(left))
1388+
assert kwargs == {}
1389+
assert_almost_equal(left, right)
13891390

13901391

13911392
def assert_sp_array_equal(left, right):

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ def factorize(
633633
is a Categorical. When `values` is some other pandas object, an
634634
`Index` is returned. Otherwise, a 1-D ndarray is returned.
635635
636-
.. note ::
636+
.. note::
637637
638638
Even if there's a missing value in `values`, `uniques` will
639639
*not* contain an entry for it.

pandas/core/arraylike.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,16 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
326326
reconstruct_kwargs = {}
327327

328328
def reconstruct(result):
329+
if ufunc.nout > 1:
330+
# np.modf, np.frexp, np.divmod
331+
return tuple(_reconstruct(x) for x in result)
332+
333+
return _reconstruct(result)
334+
335+
def _reconstruct(result):
329336
if lib.is_scalar(result):
330337
return result
331338

332-
if isinstance(result, tuple):
333-
# np.modf, np.frexp, np.divmod
334-
return tuple(reconstruct(x) for x in result)
335-
336339
if result.ndim != self.ndim:
337340
if method == "outer":
338341
if self.ndim == 2:
@@ -367,10 +370,12 @@ def reconstruct(result):
367370
return result
368371

369372
if "out" in kwargs:
373+
# e.g. test_multiindex_get_loc
370374
result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
371375
return reconstruct(result)
372376

373377
if method == "reduce":
378+
# e.g. test.series.test_ufunc.test_reduce
374379
result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
375380
if result is not NotImplemented:
376381
return result

0 commit comments

Comments
 (0)