Skip to content

Commit 4a52b9b

Browse files
author
Mark Graham
committed
merge with master
1 parent 73f87ce commit 4a52b9b

File tree

180 files changed

+3874
-4050
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

180 files changed

+3874
-4050
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
*.log
1313
*.swp
1414
*.pdb
15+
*.zip
1516
.project
1617
.pydevproject
1718
.settings

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ repos:
2626
name: isort (cython)
2727
types: [cython]
2828
- repo: https://github.com/asottile/pyupgrade
29-
rev: v2.7.3
29+
rev: v2.7.4
3030
hooks:
3131
- id: pyupgrade
3232
args: [--py37-plus]

asv_bench/benchmarks/categoricals.py

+43
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import string
2+
import sys
13
import warnings
24

35
import numpy as np
@@ -67,6 +69,47 @@ def time_existing_series(self):
6769
pd.Categorical(self.series)
6870

6971

72+
class AsType:
73+
def setup(self):
74+
N = 10 ** 5
75+
76+
random_pick = np.random.default_rng().choice
77+
78+
categories = {
79+
"str": list(string.ascii_letters),
80+
"int": np.random.randint(2 ** 16, size=154),
81+
"float": sys.maxsize * np.random.random((38,)),
82+
"timestamp": [
83+
pd.Timestamp(x, unit="s") for x in np.random.randint(2 ** 18, size=578)
84+
],
85+
}
86+
87+
self.df = pd.DataFrame(
88+
{col: random_pick(cats, N) for col, cats in categories.items()}
89+
)
90+
91+
for col in ("int", "float", "timestamp"):
92+
self.df[col + "_as_str"] = self.df[col].astype(str)
93+
94+
for col in self.df.columns:
95+
self.df[col] = self.df[col].astype("category")
96+
97+
def astype_str(self):
98+
[self.df[col].astype("str") for col in "int float timestamp".split()]
99+
100+
def astype_int(self):
101+
[self.df[col].astype("int") for col in "int_as_str timestamp".split()]
102+
103+
def astype_float(self):
104+
[
105+
self.df[col].astype("float")
106+
for col in "float_as_str int int_as_str timestamp".split()
107+
]
108+
109+
def astype_datetime(self):
110+
self.df["float"].astype(pd.DatetimeTZDtype(tz="US/Pacific"))
111+
112+
70113
class Concat:
71114
def setup(self):
72115
N = 10 ** 5

asv_bench/benchmarks/rolling.py

+13
Original file line numberDiff line numberDiff line change
@@ -225,4 +225,17 @@ def time_rolling_offset(self, method):
225225
getattr(self.groupby_roll_offset, method)()
226226

227227

228+
class GroupbyEWM:
229+
230+
params = ["cython", "numba"]
231+
param_names = ["engine"]
232+
233+
def setup(self, engine):
234+
df = pd.DataFrame({"A": range(50), "B": range(50)})
235+
self.gb_ewm = df.groupby("A").ewm(com=1.0)
236+
237+
def time_groupby_mean(self, engine):
238+
self.gb_ewm.mean(engine=engine)
239+
240+
228241
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/series_methods.py

+49
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,55 @@ def time_isin_long_series_long_values_floats(self):
9090
self.s_long_floats.isin(self.vals_long_floats)
9191

9292

93+
class IsInLongSeriesLookUpDominates:
94+
params = [
95+
["int64", "int32", "float64", "float32", "object"],
96+
[5, 1000],
97+
["random_hits", "random_misses", "monotone_hits", "monotone_misses"],
98+
]
99+
param_names = ["dtype", "MaxNumber", "series_type"]
100+
101+
def setup(self, dtype, MaxNumber, series_type):
102+
N = 10 ** 7
103+
if series_type == "random_hits":
104+
np.random.seed(42)
105+
array = np.random.randint(0, MaxNumber, N)
106+
if series_type == "random_misses":
107+
np.random.seed(42)
108+
array = np.random.randint(0, MaxNumber, N) + MaxNumber
109+
if series_type == "monotone_hits":
110+
array = np.repeat(np.arange(MaxNumber), N // MaxNumber)
111+
if series_type == "monotone_misses":
112+
array = np.arange(N) + MaxNumber
113+
self.series = Series(array).astype(dtype)
114+
self.values = np.arange(MaxNumber).astype(dtype)
115+
116+
def time_isin(self, dtypes, MaxNumber, series_type):
117+
self.series.isin(self.values)
118+
119+
120+
class IsInLongSeriesValuesDominate:
121+
params = [
122+
["int64", "int32", "float64", "float32", "object"],
123+
["random", "monotone"],
124+
]
125+
param_names = ["dtype", "series_type"]
126+
127+
def setup(self, dtype, series_type):
128+
N = 10 ** 7
129+
if series_type == "random":
130+
np.random.seed(42)
131+
vals = np.random.randint(0, 10 * N, N)
132+
if series_type == "monotone":
133+
vals = np.arange(N)
134+
self.values = vals.astype(dtype)
135+
M = 10 ** 6 + 1
136+
self.series = Series(np.arange(M)).astype(dtype)
137+
138+
def time_isin(self, dtypes, series_type):
139+
self.series.isin(self.values)
140+
141+
93142
class NSort:
94143

95144
params = ["first", "last", "all"]

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ fi
225225
### DOCSTRINGS ###
226226
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
227227

228-
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS02, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03)' ; echo $MSG
228+
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS01, SS02, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03)' ; echo $MSG
229229
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS02,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03
230230
RET=$(($RET + $?)) ; echo $MSG "DONE"
231231

ci/run_tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s
2525
if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then
2626
# GH#37455 windows py38 build appears to be running out of memory
2727
# skip collection of window tests
28-
PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/"
28+
PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/ --ignore=pandas/tests/plotting/"
2929
fi
3030

3131
echo $PYTEST_CMD

doc/source/ecosystem.rst

+10
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,16 @@ D-Tale integrates seamlessly with Jupyter notebooks, Python terminals, Kaggle
178178
& Google Colab. Here are some demos of the `grid <http://alphatechadmin.pythonanywhere.com/>`__
179179
and `chart-builder <http://alphatechadmin.pythonanywhere.com/charts/4?chart_type=surface&query=&x=date&z=Col0&agg=raw&cpg=false&y=%5B%22security_id%22%5D>`__.
180180

181+
`hvplot <https://hvplot.holoviz.org/index.html>`__
182+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183+
184+
hvPlot is a high-level plotting API for the PyData ecosystem built on `HoloViews <https://holoviews.org/>`__.
185+
It can be loaded as a native pandas plotting backend via
186+
187+
.. code:: python
188+
189+
pd.set_option("plotting.backend", "hvplot")
190+
181191
.. _ecosystem.ide:
182192

183193
IDE

doc/source/reference/window.rst

+22-6
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.roll
1010
Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc.
1111
ExponentialMovingWindow objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc.
1212

13-
Standard moving window functions
14-
--------------------------------
13+
.. _api.functions_rolling:
14+
15+
Rolling window functions
16+
------------------------
1517
.. currentmodule:: pandas.core.window.rolling
1618

1719
.. autosummary::
@@ -33,15 +35,25 @@ Standard moving window functions
3335
Rolling.aggregate
3436
Rolling.quantile
3537
Rolling.sem
38+
39+
.. _api.functions_window:
40+
41+
Weighted window functions
42+
-------------------------
43+
.. currentmodule:: pandas.core.window.rolling
44+
45+
.. autosummary::
46+
:toctree: api/
47+
3648
Window.mean
3749
Window.sum
3850
Window.var
3951
Window.std
4052

4153
.. _api.functions_expanding:
4254

43-
Standard expanding window functions
44-
-----------------------------------
55+
Expanding window functions
56+
--------------------------
4557
.. currentmodule:: pandas.core.window.expanding
4658

4759
.. autosummary::
@@ -64,8 +76,10 @@ Standard expanding window functions
6476
Expanding.quantile
6577
Expanding.sem
6678

67-
Exponentially-weighted moving window functions
68-
----------------------------------------------
79+
.. _api.functions_ewm:
80+
81+
Exponentially-weighted window functions
82+
---------------------------------------
6983
.. currentmodule:: pandas.core.window.ewm
7084

7185
.. autosummary::
@@ -77,6 +91,8 @@ Exponentially-weighted moving window functions
7791
ExponentialMovingWindow.corr
7892
ExponentialMovingWindow.cov
7993

94+
.. _api.indexers_window:
95+
8096
Window indexer
8197
--------------
8298
.. currentmodule:: pandas

doc/source/user_guide/basics.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -538,8 +538,8 @@ standard deviation of 1), very concisely:
538538
539539
Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
540540
preserve the location of ``NaN`` values. This is somewhat different from
541-
:meth:`~DataFrame.expanding` and :meth:`~DataFrame.rolling`.
542-
For more details please see :ref:`this note <stats.moments.expanding.note>`.
541+
:meth:`~DataFrame.expanding` and :meth:`~DataFrame.rolling` since ``NaN`` behavior
542+
is furthermore dictated by a ``min_periods`` parameter.
543543

544544
.. ipython:: python
545545
@@ -945,7 +945,7 @@ Aggregation API
945945

946946
The aggregation API allows one to express possibly multiple aggregation operations in a single concise way.
947947
This API is similar across pandas objects, see :ref:`groupby API <groupby.aggregate>`, the
948-
:ref:`window functions API <stats.aggregate>`, and the :ref:`resample API <timeseries.aggregate>`.
948+
:ref:`window API <window.overview>`, and the :ref:`resample API <timeseries.aggregate>`.
949949
The entry point for aggregation is :meth:`DataFrame.aggregate`, or the alias
950950
:meth:`DataFrame.agg`.
951951

0 commit comments

Comments
 (0)