Skip to content

Commit 5af3690

Browse files
committed
Merge branch 'main' into bug-range
2 parents 64a591a + 490c586 commit 5af3690

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+748
-525
lines changed

.github/workflows/code-checks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ jobs:
7474

7575
- name: Install pyright
7676
# note: keep version in sync with .pre-commit-config.yaml
77-
run: npm install -g [email protected].202
77+
run: npm install -g [email protected].212
7878

7979
- name: Build Pandas
8080
id: build

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ repos:
8484
pass_filenames: false
8585
types: [python]
8686
stages: [manual]
87-
# note: keep version in sync with .github/workflows/ci.yml
88-
additional_dependencies: ['[email protected].202']
87+
# note: keep version in sync with .github/workflows/code-checks.yml
88+
additional_dependencies: ['[email protected].212']
8989
- repo: local
9090
hooks:
9191
- id: flake8-rst

asv_bench/benchmarks/frame_methods.py

+3
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,9 @@ def time_frame_duplicated(self):
611611
def time_frame_duplicated_wide(self):
612612
self.df2.duplicated()
613613

614+
def time_frame_duplicated_subset(self):
615+
self.df.duplicated(subset=["a"])
616+
614617

615618
class XS:
616619

doc/source/development/contributing_environment.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ Consult the docs for setting up pyenv `here <https://github.com/pyenv/pyenv>`__.
222222
pyenv virtualenv <version> <name-to-give-it>
223223
224224
# For instance:
225-
pyenv virtualenv 3.7.6 pandas-dev
225+
pyenv virtualenv 3.9.10 pandas-dev
226226
227227
# Activate the virtualenv
228228
pyenv activate pandas-dev

doc/source/getting_started/comparison/comparison_with_sql.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ structure.
1818
1919
url = (
2020
"https://raw.github.com/pandas-dev"
21-
"/pandas/master/pandas/tests/io/data/csv/tips.csv"
21+
"/pandas/main/pandas/tests/io/data/csv/tips.csv"
2222
)
2323
tips = pd.read_csv(url)
2424
tips

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.4
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.4.1
2728
v1.4.0
2829

2930
Version 1.3

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ Other Deprecations
811811
- Deprecated allowing scalars to be passed to the :class:`Categorical` constructor (:issue:`38433`)
812812
- Deprecated constructing :class:`CategoricalIndex` without passing list-like data (:issue:`38944`)
813813
- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`, :issue:`21311`, :issue:`22315`, :issue:`26974`)
814-
- Deprecated the :meth:`astype` method of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to convert to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`)
814+
- Deprecated the :meth:`astype` method of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to convert to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`). This deprecation was later reverted in pandas 1.4.0.
815815
- Deprecated :meth:`MultiIndex.is_lexsorted` and :meth:`MultiIndex.lexsort_depth`, use :meth:`MultiIndex.is_monotonic_increasing` instead (:issue:`32259`)
816816
- Deprecated keyword ``try_cast`` in :meth:`Series.where`, :meth:`Series.mask`, :meth:`DataFrame.where`, :meth:`DataFrame.mask`; cast results manually if desired (:issue:`38836`)
817817
- Deprecated comparison of :class:`Timestamp` objects with ``datetime.date`` objects. Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`)

doc/source/whatsnew/v1.4.0.rst

+177-146
Large diffs are not rendered by default.

doc/source/whatsnew/v1.4.1.rst

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
.. _whatsnew_141:
2+
3+
What's new in 1.4.1 (February ??, 2022)
4+
---------------------------------------
5+
6+
These are the changes in pandas 1.4.1. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_141.regressions:
14+
15+
Fixed regressions
16+
~~~~~~~~~~~~~~~~~
17+
-
18+
-
19+
20+
.. ---------------------------------------------------------------------------
21+
22+
.. _whatsnew_141.bug_fixes:
23+
24+
Bug fixes
25+
~~~~~~~~~
26+
-
27+
-
28+
29+
.. ---------------------------------------------------------------------------
30+
31+
.. _whatsnew_141.other:
32+
33+
Other
34+
~~~~~
35+
-
36+
-
37+
38+
.. ---------------------------------------------------------------------------
39+
40+
.. _whatsnew_141.contributors:
41+
42+
Contributors
43+
~~~~~~~~~~~~
44+
45+
.. contributors:: v1.4.0..v1.4.1|HEAD

doc/source/whatsnew/v1.5.0.rst

+10-5
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ enhancement2
3131

3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
34+
- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`)
3435
- :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
3536
- Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`)
3637
- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
38+
- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba <https://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`45428`)
3739
-
3840

3941
.. ---------------------------------------------------------------------------
@@ -70,9 +72,10 @@ If installed, we now require:
7072
+-----------------+-----------------+----------+---------+
7173
| Package | Minimum Version | Required | Changed |
7274
+=================+=================+==========+=========+
73-
| | | X | X |
75+
| mypy (dev) | 0.931 | | X |
7476
+-----------------+-----------------+----------+---------+
7577

78+
7679
For `optional libraries <https://pandas.pydata.org/docs/getting_started/install.html>`_ the general recommendation is to use the latest version.
7780
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
7881
Optional libraries below the lowest tested version may still work, but are not considered supported.
@@ -145,6 +148,7 @@ Other Deprecations
145148
- Deprecated behavior of :meth:`SparseArray.astype`, :meth:`Series.astype`, and :meth:`DataFrame.astype` with :class:`SparseDtype` when passing a non-sparse ``dtype``. In a future version, this will cast to that non-sparse dtype instead of wrapping it in a :class:`SparseDtype` (:issue:`34457`)
146149
- Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`)
147150
- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`)
151+
- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`)
148152
-
149153

150154

@@ -153,7 +157,7 @@ Other Deprecations
153157

154158
Performance improvements
155159
~~~~~~~~~~~~~~~~~~~~~~~~
156-
-
160+
- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`)
157161
-
158162

159163
.. ---------------------------------------------------------------------------
@@ -203,7 +207,7 @@ Strings
203207

204208
Interval
205209
^^^^^^^^
206-
-
210+
- Bug in :meth:`IntervalArray.__setitem__` when setting ``np.nan`` into an integer-backed array raising ``ValueError`` instead of ``TypeError`` (:issue:`45484`)
207211
-
208212

209213
Indexing
@@ -213,6 +217,7 @@ Indexing
213217
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
214218
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
215219
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
220+
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
216221
-
217222

218223
Missing
@@ -228,7 +233,7 @@ MultiIndex
228233
I/O
229234
^^^
230235
- Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`)
231-
-
236+
- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`)
232237

233238
Period
234239
^^^^^^
@@ -242,7 +247,7 @@ Plotting
242247

243248
Groupby/resample/rolling
244249
^^^^^^^^^^^^^^^^^^^^^^^^
245-
-
250+
- Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`)
246251
-
247252

248253
Reshaping

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- flake8-bugbear=21.3.2 # used by flake8, find likely bugs
2525
- flake8-comprehensions=3.7.0 # used by flake8, linting of unnecessary comprehensions
2626
- isort>=5.2.1 # check that imports are in the right order
27-
- mypy=0.930
27+
- mypy=0.931
2828
- pre-commit>=2.9.2
2929
- pycodestyle # used by flake8
3030
- pyupgrade

pandas/_libs/src/ujson/python/objToJSON.c

+1
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ static PyObject *get_values(PyObject *obj) {
228228
PyErr_Clear();
229229
} else if (PyObject_HasAttrString(values, "__array__")) {
230230
// We may have gotten a Categorical or Sparse array so call np.array
231+
Py_DECREF(values);
231232
values = PyObject_CallMethod(values, "__array__", NULL);
232233
} else if (!PyArray_CheckExact(values)) {
233234
// Didn't get a numpy array, so keep trying

pandas/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
199199
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
200200
# between branches and tags. By ignoring refnames without digits, we
201201
# filter out many common branch names like "release" and
202-
# "stabilization", as well as "HEAD" and "master".
202+
# "stabilization", as well as "HEAD" and "main".
203203
tags = {r for r in refs if re.search(r"\d", r)}
204204
if verbose:
205205
print("discarding '%s', no digits" % ",".join(refs - tags))

pandas/core/array_algos/putmask.py

+2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ def putmask_without_repeat(
126126
mask : np.ndarray[bool]
127127
new : Any
128128
"""
129+
new = setitem_datetimelike_compat(values, mask.sum(), new)
130+
129131
if getattr(new, "ndim", 0) >= 1:
130132
new = new.astype(values.dtype, copy=False)
131133

pandas/core/arrays/base.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ def __contains__(self, item: object) -> bool | np.bool_:
427427
if not self._can_hold_na:
428428
return False
429429
elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
430-
return self._hasnans
430+
return self._hasna
431431
else:
432432
return False
433433
else:
@@ -606,7 +606,7 @@ def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
606606
raise AbstractMethodError(self)
607607

608608
@property
609-
def _hasnans(self) -> bool:
609+
def _hasna(self) -> bool:
610610
# GH#22680
611611
"""
612612
Equivalent to `self.isna().any()`.
@@ -628,6 +628,16 @@ def _values_for_argsort(self) -> np.ndarray:
628628
See Also
629629
--------
630630
ExtensionArray.argsort : Return the indices that would sort this array.
631+
632+
Notes
633+
-----
634+
The caller is responsible for *not* modifying these values in-place, so
635+
it is safe for implementors to give views on `self`.
636+
637+
Functions that use this (e.g. ExtensionArray.argsort) should ignore
638+
entries with missing values in the original array (according to `self.isna()`).
639+
This means that the corresponding entries in the returned array don't need to
640+
be modified to sort correctly.
631641
"""
632642
# Note: this is used in `ExtensionArray.argsort`.
633643
return np.array(self)
@@ -698,7 +708,7 @@ def argmin(self, skipna: bool = True) -> int:
698708
ExtensionArray.argmax
699709
"""
700710
validate_bool_kwarg(skipna, "skipna")
701-
if not skipna and self._hasnans:
711+
if not skipna and self._hasna:
702712
raise NotImplementedError
703713
return nargminmax(self, "argmin")
704714

@@ -722,7 +732,7 @@ def argmax(self, skipna: bool = True) -> int:
722732
ExtensionArray.argmin
723733
"""
724734
validate_bool_kwarg(skipna, "skipna")
725-
if not skipna and self._hasnans:
735+
if not skipna and self._hasna:
726736
raise NotImplementedError
727737
return nargminmax(self, "argmax")
728738

@@ -1534,6 +1544,9 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
15341544
ExtensionDtype.empty
15351545
ExtensionDtype.empty is the 'official' public version of this API.
15361546
"""
1547+
# Implementer note: while ExtensionDtype.empty is the public way to
1548+
# call this method, it is still required to implement this `_empty`
1549+
# method as well (it is called internally in pandas)
15371550
obj = cls._from_sequence([], dtype=dtype)
15381551

15391552
taker = np.broadcast_to(np.intp(-1), shape)

pandas/core/arrays/boolean.py

-18
Original file line numberDiff line numberDiff line change
@@ -421,24 +421,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
421421
# coerce
422422
return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
423423

424-
def _values_for_argsort(self) -> np.ndarray:
425-
"""
426-
Return values for sorting.
427-
428-
Returns
429-
-------
430-
ndarray
431-
The transformed values should maintain the ordering between values
432-
within the array.
433-
434-
See Also
435-
--------
436-
ExtensionArray.argsort : Return the indices that would sort this array.
437-
"""
438-
data = self._data.copy()
439-
data[self._mask] = -1
440-
return data
441-
442424
def _logical_method(self, other, op):
443425

444426
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}

pandas/core/arrays/datetimelike.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -430,14 +430,6 @@ def astype(self, dtype, copy: bool = True):
430430
elif is_integer_dtype(dtype):
431431
# we deliberately ignore int32 vs. int64 here.
432432
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
433-
warnings.warn(
434-
f"casting {self.dtype} values to int64 with .astype(...) is "
435-
"deprecated and will raise in a future version. "
436-
"Use .view(...) instead.",
437-
FutureWarning,
438-
stacklevel=find_stack_level(),
439-
)
440-
441433
values = self.asi8
442434

443435
if is_unsigned_integer_dtype(dtype):
@@ -849,7 +841,7 @@ def _isnan(self) -> npt.NDArray[np.bool_]:
849841
return self.asi8 == iNaT
850842

851843
@property # NB: override with cache_readonly in immutable subclasses
852-
def _hasnans(self) -> bool:
844+
def _hasna(self) -> bool:
853845
"""
854846
return if I have any nans; enables various perf speedups
855847
"""
@@ -874,7 +866,7 @@ def _maybe_mask_results(
874866
875867
This is an internal routine.
876868
"""
877-
if self._hasnans:
869+
if self._hasna:
878870
if convert:
879871
result = result.astype(convert)
880872
if fill_value is None:
@@ -1133,7 +1125,7 @@ def _add_timedelta_arraylike(self, other):
11331125
new_values = checked_add_with_arr(
11341126
self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan
11351127
)
1136-
if self._hasnans or other._hasnans:
1128+
if self._hasna or other._hasna:
11371129
mask = self._isnan | other._isnan
11381130
np.putmask(new_values, mask, iNaT)
11391131

pandas/core/arrays/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ def _sub_datetime_arraylike(self, other):
738738
other_i8 = other.asi8
739739
arr_mask = self._isnan | other._isnan
740740
new_values = checked_add_with_arr(self_i8, -other_i8, arr_mask=arr_mask)
741-
if self._hasnans or other._hasnans:
741+
if self._hasna or other._hasna:
742742
np.putmask(new_values, arr_mask, iNaT)
743743
return new_values.view("timedelta64[ns]")
744744

@@ -1356,7 +1356,7 @@ def isocalendar(self) -> DataFrame:
13561356
iso_calendar_df = DataFrame(
13571357
sarray, columns=["year", "week", "day"], dtype="UInt32"
13581358
)
1359-
if self._hasnans:
1359+
if self._hasna:
13601360
iso_calendar_df.iloc[self._isnan] = None
13611361
return iso_calendar_df
13621362

pandas/core/arrays/floating.py

-3
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,6 @@ def _coerce_to_array(
244244
) -> tuple[np.ndarray, np.ndarray]:
245245
return coerce_to_array(value, dtype=dtype, copy=copy)
246246

247-
def _values_for_argsort(self) -> np.ndarray:
248-
return self._data
249-
250247

251248
_dtype_docstring = """
252249
An ExtensionDtype for {dtype} data.

0 commit comments

Comments
 (0)