Skip to content

Commit 54c0465

Browse files
committed
Merge remote-tracking branch 'upstream/master' into GH_37544
2 parents eb6905c + a057135 commit 54c0465

File tree

102 files changed

+1689
-1331
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1689
-1331
lines changed

.pre-commit-config.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,36 @@ repos:
119119
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
120120
types: [python]
121121
exclude: ^(asv_bench|pandas/tests|doc)/
122+
- id: FrameOrSeriesUnion
123+
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
124+
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
125+
language: pygrep
126+
types: [python]
127+
exclude: ^pandas/_typing\.py$
128+
- id: type-not-class
129+
name: Check for use of foo.__class__ instead of type(foo)
130+
entry: \.__class__
131+
language: pygrep
132+
files: \.(py|pyx)$
133+
- id: unwanted-typing
134+
name: Check for use of comment-based annotation syntax and missing error codes
135+
entry: |
136+
(?x)
137+
\#\ type:\ (?!ignore)|
138+
\#\ type:\s?ignore(?!\[)
139+
language: pygrep
140+
types: [python]
141+
- id: no-os-remove
142+
name: Check code for instances of os.remove
143+
entry: os\.remove
144+
language: pygrep
145+
types: [python]
146+
files: ^pandas/tests/
147+
exclude: |
148+
(?x)^
149+
pandas/tests/io/excel/test_writers\.py|
150+
pandas/tests/io/pytables/common\.py|
151+
pandas/tests/io/pytables/test_store\.py$
122152
- repo: https://github.com/asottile/yesqa
123153
rev: v1.2.2
124154
hooks:

ci/code_checks.sh

-23
Original file line numberDiff line numberDiff line change
@@ -122,29 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
122122
RET=$(($RET + $?)) ; echo $MSG "DONE"
123123

124124
# -------------------------------------------------------------------------
125-
# Type annotations
126-
127-
MSG='Check for use of comment-based annotation syntax' ; echo $MSG
128-
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
129-
RET=$(($RET + $?)) ; echo $MSG "DONE"
130-
131-
MSG='Check for missing error codes with # type: ignore' ; echo $MSG
132-
invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
133-
RET=$(($RET + $?)) ; echo $MSG "DONE"
134-
135-
MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG
136-
invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas
137-
RET=$(($RET + $?)) ; echo $MSG "DONE"
138-
139-
# -------------------------------------------------------------------------
140-
MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
141-
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
142-
RET=$(($RET + $?)) ; echo $MSG "DONE"
143-
144-
MSG='Check code for instances of os.remove' ; echo $MSG
145-
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
146-
RET=$(($RET + $?)) ; echo $MSG "DONE"
147-
148125
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
149126
for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do
150127
check_namespace ${class}

doc/source/whatsnew/v1.1.5.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Fixed regressions
2323

2424
Bug fixes
2525
~~~~~~~~~
26-
-
26+
- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
2727
-
2828

2929
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.2.0.rst

+11
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ Other enhancements
217217
- ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
218218
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
219219
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
220+
-
220221
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
221222
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
222223
- Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
@@ -341,6 +342,8 @@ Deprecations
341342
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
342343
- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`)
343344
- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`)
345+
- :meth:`Series.slice_shift` and :meth:`DataFrame.slice_shift` are deprecated, use :meth:`Series.shift` or :meth:`DataFrame.shift` instead (:issue:`37601`)
346+
344347

345348
.. ---------------------------------------------------------------------------
346349
@@ -391,14 +394,18 @@ Datetimelike
391394
- Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`)
392395
- :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`)
393396
- Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`)
397+
- :meth:`to_json` and :meth:`read_json` now implements timezones parsing when orient structure is 'table'.
398+
- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`).
394399
- Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`)
395400
- Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
396401
- Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)
402+
- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`)
397403

398404
Timedelta
399405
^^^^^^^^^
400406
- Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`)
401407
- Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`)
408+
- Bug in :func:`to_timedelta` with a read-only array incorrectly raising (:issue:`34857`)
402409

403410
Timezones
404411
^^^^^^^^^
@@ -469,6 +476,7 @@ MultiIndex
469476

470477
- Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
471478
- Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
479+
- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
472480

473481
I/O
474482
^^^
@@ -492,6 +500,8 @@ I/O
492500
- Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
493501
- Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
494502
- Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
503+
- :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`)
504+
- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
495505

496506
Plotting
497507
^^^^^^^^
@@ -523,6 +533,7 @@ Groupby/resample/rolling
523533
- Using :meth:`Rolling.var()` instead of :meth:`Rolling.std()` avoids numerical issues for :meth:`Rolling.corr()` when :meth:`Rolling.var()` is still within floating point precision while :meth:`Rolling.std()` is not (:issue:`31286`)
524534
- Bug in :meth:`df.groupby(..).quantile() <pandas.core.groupby.DataFrameGroupBy.quantile>` and :meth:`df.resample(..).quantile() <pandas.core.resample.Resampler.quantile>` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`)
525535
- Bug in :meth:`Rolling.median` and :meth:`Rolling.quantile` returned wrong values for :class:`BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`)
536+
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
526537

527538
Reshaping
528539
^^^^^^^^^

pandas/_libs/join.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ ctypedef fused join_t:
268268

269269
@cython.wraparound(False)
270270
@cython.boundscheck(False)
271-
def left_join_indexer_unique(join_t[:] left, join_t[:] right):
271+
def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
272272
cdef:
273273
Py_ssize_t i, j, nleft, nright
274274
ndarray[int64_t] indexer

pandas/_libs/lib.pyx

+18-11
Original file line numberDiff line numberDiff line change
@@ -896,21 +896,28 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys,
896896

897897
if lab != cur:
898898
if lab != -1:
899-
tup = PyTuple_New(k)
900-
for j in range(k):
901-
val = keys[j][sorted_labels[j][i - 1]]
902-
PyTuple_SET_ITEM(tup, j, val)
903-
Py_INCREF(val)
904-
899+
if k == 1:
900+
# When k = 1 we do not want to return a tuple as key
901+
tup = keys[0][sorted_labels[0][i - 1]]
902+
else:
903+
tup = PyTuple_New(k)
904+
for j in range(k):
905+
val = keys[j][sorted_labels[j][i - 1]]
906+
PyTuple_SET_ITEM(tup, j, val)
907+
Py_INCREF(val)
905908
result[tup] = index[start:i]
906909
start = i
907910
cur = lab
908911

909-
tup = PyTuple_New(k)
910-
for j in range(k):
911-
val = keys[j][sorted_labels[j][n - 1]]
912-
PyTuple_SET_ITEM(tup, j, val)
913-
Py_INCREF(val)
912+
if k == 1:
913+
# When k = 1 we do not want to return a tuple as key
914+
tup = keys[0][sorted_labels[0][n - 1]]
915+
else:
916+
tup = PyTuple_New(k)
917+
for j in range(k):
918+
val = keys[j][sorted_labels[j][n - 1]]
919+
PyTuple_SET_ITEM(tup, j, val)
920+
Py_INCREF(val)
914921
result[tup] = index[start:]
915922

916923
return result

0 commit comments

Comments
 (0)