Skip to content

Commit cb757e4

Browse files
committed
Merge remote-tracking branch 'upstream/master' into move-metadata-to-cfg
2 parents 674e728 + d558bce commit cb757e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2355
-1747
lines changed

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ repos:
2424
hooks:
2525
- id: isort
2626
- repo: https://github.com/asottile/pyupgrade
27-
rev: v2.7.4
27+
rev: v2.9.0
2828
hooks:
2929
- id: pyupgrade
30-
args: [--py37-plus]
30+
args: [--py37-plus, --keep-runtime-typing]
3131
- repo: https://github.com/pre-commit/pygrep-hooks
3232
rev: v1.7.0
3333
hooks:

asv_bench/benchmarks/rolling.py

+10
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,11 @@ class Pairwise:
140140

141141
def setup(self, window, method, pairwise):
142142
N = 10 ** 4
143+
n_groups = 20
144+
groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
143145
arr = np.random.random(N)
144146
self.df = pd.DataFrame(arr)
147+
self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
145148

146149
def time_pairwise(self, window, method, pairwise):
147150
if window is None:
@@ -150,6 +153,13 @@ def time_pairwise(self, window, method, pairwise):
150153
r = self.df.rolling(window=window)
151154
getattr(r, method)(self.df, pairwise=pairwise)
152155

156+
def time_groupby(self, window, method, pairwise):
157+
if window is None:
158+
r = self.df_group.expanding()
159+
else:
160+
r = self.df_group.rolling(window=window)
161+
getattr(r, method)(self.df, pairwise=pairwise)
162+
153163

154164
class Quantile:
155165
params = (

doc/source/user_guide/style.ipynb

+5-6
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,7 @@
180180
"\n",
181181
"styles = [\n",
182182
" hover(),\n",
183-
" {'selector': \"th\", 'props': [(\"font-size\", \"150%\"),\n",
184-
" (\"text-align\", \"center\")]}\n",
183+
" {'selector': \"th\", 'props': [(\"font-size\", \"150%\"), (\"text-align\", \"center\")]}\n",
185184
"]\n",
186185
"\n",
187186
"df.style.set_table_styles(styles)"
@@ -224,7 +223,7 @@
224223
"cell_type": "markdown",
225224
"metadata": {},
226225
"source": [
227-
"We can also chain all of the above by setting the `overwrite` argument to `False` so that it preserves previous settings."
226+
"We can also chain all of the above by setting the `overwrite` argument to `False` so that it preserves previous settings. We also show the CSS string input rather than the list of tuples."
228227
]
229228
},
230229
{
@@ -238,13 +237,13 @@
238237
" set_table_styles(styles).\\\n",
239238
" set_table_styles({\n",
240239
" 'A': [{'selector': '',\n",
241-
" 'props': [('color', 'red')]}],\n",
240+
" 'props': 'color:red;'}],\n",
242241
" 'B': [{'selector': 'td',\n",
243-
" 'props': [('color', 'blue')]}]\n",
242+
" 'props': 'color:blue;'}]\n",
244243
" }, axis=0, overwrite=False).\\\n",
245244
" set_table_styles({\n",
246245
" 3: [{'selector': 'td',\n",
247-
" 'props': [('color', 'green')]}]\n",
246+
" 'props': 'color:green;font-weight:bold;'}]\n",
248247
" }, axis=1, overwrite=False)\n",
249248
"s"
250249
]

doc/source/whatsnew/v1.2.2.rst

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
2222
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
2323
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
24+
- Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
2425
-
2526

2627
.. ---------------------------------------------------------------------------
@@ -31,6 +32,7 @@ Bug fixes
3132
~~~~~~~~~
3233

3334
- :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`)
35+
- Fixed bug in :func:`pandas.read_excel` producing incorrect results when the engine ``openpyxl`` is used and the excel file is missing or has incorrect dimension information; the fix requires ``openpyxl`` >= 3.0.0, prior versions may still fail (:issue:`38956`, :issue:`39001`)
3436
-
3537

3638
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.3.0.rst

+14-2
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,12 @@ Other enhancements
5353
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
5454
- :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`)
5555
- :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`)
56-
- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes.
56+
- :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`)
57+
- :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`)
5758
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
5859
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
5960

61+
6062
.. ---------------------------------------------------------------------------
6163
6264
.. _whatsnew_130.notable_bug_fixes:
@@ -251,7 +253,8 @@ Performance improvements
251253
- Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`)
252254
- Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`)
253255
- Performance improvement in :meth:`DataFrame.corr` for method=kendall (:issue:`28329`)
254-
- Performance improvement in :meth:`core.window.Rolling.corr` and :meth:`core.window.Rolling.cov` (:issue:`39388`)
256+
- Performance improvement in :meth:`core.window.rolling.Rolling.corr` and :meth:`core.window.rolling.Rolling.cov` (:issue:`39388`)
257+
- Performance improvement in :meth:`core.window.rolling.RollingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` (:issue:`39591`)
255258

256259
.. ---------------------------------------------------------------------------
257260
@@ -281,6 +284,7 @@ Datetimelike
281284
- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
282285
- Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`)
283286
- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
287+
- Bug in :meth:`Timedelta.round`, :meth:`Timedelta.floor`, :meth:`Timedelta.ceil` for values near the implementation bounds of :class:`Timedelta` (:issue:`38964`)
284288
- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
285289

286290
Timedelta
@@ -343,6 +347,8 @@ Indexing
343347
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
344348
- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
345349
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
350+
- Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
351+
-
346352

347353
Missing
348354
^^^^^^^
@@ -406,6 +412,8 @@ Groupby/resample/rolling
406412
- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
407413
- Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`)
408414
- Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`)
415+
- Bug in :meth:`core.window.rolling.RollingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.corr` where the groupby column would return 0 instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`)
416+
- Bug in :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` where 1 would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`)
409417

410418
Reshaping
411419
^^^^^^^^^
@@ -415,7 +423,9 @@ Reshaping
415423
- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
416424
- Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
417425
- :meth:`Series.value_counts` and :meth:`Series.mode` return consistent keys in original order (:issue:`12679`, :issue:`11227` and :issue:`39007`)
426+
- Bug in :meth:`DataFrame.stack` not handling ``NaN`` in :class:`MultiIndex` columns correct (:issue:`39481`)
418427
- Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
428+
- Bug in :meth:`DataFrame.sort_values` not reshaping index correctly after sorting on columns, when ``ignore_index=True`` (:issue:`39464`)
419429
- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``ExtensionDtype`` dtypes (:issue:`39454`)
420430

421431
Sparse
@@ -439,6 +449,8 @@ Other
439449
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
440450
- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
441451
- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
452+
- Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`)
453+
-
442454
-
443455

444456
.. ---------------------------------------------------------------------------

pandas/_libs/tslibs/fields.pyx

+151
Original file line numberDiff line numberDiff line change
@@ -636,3 +636,154 @@ def get_locale_names(name_type: str, locale: object = None):
636636
"""
637637
with set_locale(locale, LC_TIME):
638638
return getattr(LocaleTime(), name_type)
639+
640+
641+
# ---------------------------------------------------------------------
642+
# Rounding
643+
644+
645+
class RoundTo:
646+
"""
647+
enumeration defining the available rounding modes
648+
649+
Attributes
650+
----------
651+
MINUS_INFTY
652+
round towards -∞, or floor [2]_
653+
PLUS_INFTY
654+
round towards +∞, or ceil [3]_
655+
NEAREST_HALF_EVEN
656+
round to nearest, tie-break half to even [6]_
657+
NEAREST_HALF_MINUS_INFTY
658+
round to nearest, tie-break half to -∞ [5]_
659+
NEAREST_HALF_PLUS_INFTY
660+
round to nearest, tie-break half to +∞ [4]_
661+
662+
663+
References
664+
----------
665+
.. [1] "Rounding - Wikipedia"
666+
https://en.wikipedia.org/wiki/Rounding
667+
.. [2] "Rounding down"
668+
https://en.wikipedia.org/wiki/Rounding#Rounding_down
669+
.. [3] "Rounding up"
670+
https://en.wikipedia.org/wiki/Rounding#Rounding_up
671+
.. [4] "Round half up"
672+
https://en.wikipedia.org/wiki/Rounding#Round_half_up
673+
.. [5] "Round half down"
674+
https://en.wikipedia.org/wiki/Rounding#Round_half_down
675+
.. [6] "Round half to even"
676+
https://en.wikipedia.org/wiki/Rounding#Round_half_to_even
677+
"""
678+
@property
679+
def MINUS_INFTY(self) -> int:
680+
return 0
681+
682+
@property
683+
def PLUS_INFTY(self) -> int:
684+
return 1
685+
686+
@property
687+
def NEAREST_HALF_EVEN(self) -> int:
688+
return 2
689+
690+
@property
691+
def NEAREST_HALF_PLUS_INFTY(self) -> int:
692+
return 3
693+
694+
@property
695+
def NEAREST_HALF_MINUS_INFTY(self) -> int:
696+
return 4
697+
698+
699+
cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
700+
cdef:
701+
Py_ssize_t i, n = len(values)
702+
ndarray[int64_t] result = np.empty(n, dtype="i8")
703+
int64_t res, value
704+
705+
with cython.overflowcheck(True):
706+
for i in range(n):
707+
value = values[i]
708+
if value == NPY_NAT:
709+
res = NPY_NAT
710+
else:
711+
res = value - value % unit
712+
result[i] = res
713+
714+
return result
715+
716+
717+
cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
718+
cdef:
719+
Py_ssize_t i, n = len(values)
720+
ndarray[int64_t] result = np.empty(n, dtype="i8")
721+
int64_t res, value
722+
723+
with cython.overflowcheck(True):
724+
for i in range(n):
725+
value = values[i]
726+
727+
if value == NPY_NAT:
728+
res = NPY_NAT
729+
else:
730+
remainder = value % unit
731+
if remainder == 0:
732+
res = value
733+
else:
734+
res = value + (unit - remainder)
735+
736+
result[i] = res
737+
738+
return result
739+
740+
741+
cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit):
742+
return _ceil_int64(values - unit // 2, unit)
743+
744+
745+
cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
746+
return _floor_int64(values + unit // 2, unit)
747+
748+
749+
def round_nsint64(values: np.ndarray, mode: RoundTo, nanos) -> np.ndarray:
750+
"""
751+
Applies rounding mode at given frequency
752+
753+
Parameters
754+
----------
755+
values : np.ndarray[int64_t]`
756+
mode : instance of `RoundTo` enumeration
757+
nanos : np.int64
758+
Freq to round to, expressed in nanoseconds
759+
760+
Returns
761+
-------
762+
np.ndarray[int64_t]
763+
"""
764+
cdef:
765+
int64_t unit = nanos
766+
767+
if mode == RoundTo.MINUS_INFTY:
768+
return _floor_int64(values, unit)
769+
elif mode == RoundTo.PLUS_INFTY:
770+
return _ceil_int64(values, unit)
771+
elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY:
772+
return _rounddown_int64(values, unit)
773+
elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY:
774+
return _roundup_int64(values, unit)
775+
elif mode == RoundTo.NEAREST_HALF_EVEN:
776+
# for odd unit there is no need of a tie break
777+
if unit % 2:
778+
return _rounddown_int64(values, unit)
779+
quotient, remainder = np.divmod(values, unit)
780+
mask = np.logical_or(
781+
remainder > (unit // 2),
782+
np.logical_and(remainder == (unit // 2), quotient % 2)
783+
)
784+
quotient[mask] += 1
785+
return quotient * unit
786+
787+
# if/elif above should catch all rounding modes defined in enum 'RoundTo':
788+
# if flow of control arrives here, it is a bug
789+
raise ValueError("round_nsint64 called with an unrecognized rounding mode")

pandas/_libs/tslibs/timedeltas.pyx

+12-7
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ from pandas._libs.tslibs.util cimport (
4747
is_integer_object,
4848
is_timedelta64_object,
4949
)
50+
from pandas._libs.tslibs.fields import RoundTo, round_nsint64
5051

5152
# ----------------------------------------------------------------------
5253
# Constants
@@ -1297,14 +1298,18 @@ class Timedelta(_Timedelta):
12971298
object_state = self.value,
12981299
return (Timedelta, object_state)
12991300

1300-
def _round(self, freq, rounder):
1301+
@cython.cdivision(True)
1302+
def _round(self, freq, mode):
13011303
cdef:
1302-
int64_t result, unit
1304+
int64_t result, unit, remainder
1305+
ndarray[int64_t] arr
13031306

13041307
from pandas._libs.tslibs.offsets import to_offset
13051308
unit = to_offset(freq).nanos
1306-
result = unit * rounder(self.value / float(unit))
1307-
return Timedelta(result, unit='ns')
1309+
1310+
arr = np.array([self.value], dtype="i8")
1311+
result = round_nsint64(arr, mode, unit)[0]
1312+
return Timedelta(result, unit="ns")
13081313

13091314
def round(self, freq):
13101315
"""
@@ -1323,7 +1328,7 @@ class Timedelta(_Timedelta):
13231328
------
13241329
ValueError if the freq cannot be converted
13251330
"""
1326-
return self._round(freq, np.round)
1331+
return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
13271332

13281333
def floor(self, freq):
13291334
"""
@@ -1334,7 +1339,7 @@ class Timedelta(_Timedelta):
13341339
freq : str
13351340
Frequency string indicating the flooring resolution.
13361341
"""
1337-
return self._round(freq, np.floor)
1342+
return self._round(freq, RoundTo.MINUS_INFTY)
13381343

13391344
def ceil(self, freq):
13401345
"""
@@ -1345,7 +1350,7 @@ class Timedelta(_Timedelta):
13451350
freq : str
13461351
Frequency string indicating the ceiling resolution.
13471352
"""
1348-
return self._round(freq, np.ceil)
1353+
return self._round(freq, RoundTo.PLUS_INFTY)
13491354

13501355
# ----------------------------------------------------------------
13511356
# Arithmetic Methods

0 commit comments

Comments
 (0)