Skip to content

Commit 76cd5c6

Browse files
committed
Merge branch 'master' into 42916
2 parents ee7cc16 + e7e7b40 commit 76cd5c6

File tree

135 files changed

+3409
-2221
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+3409
-2221
lines changed

.circleci/config.yml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
version: 2.1
2+
3+
jobs:
4+
test-arm:
5+
machine:
6+
image: ubuntu-2004:202101-01
7+
resource_class: arm.medium
8+
environment:
9+
ENV_FILE: ci/deps/circle-38-arm64.yaml
10+
PYTEST_WORKERS: auto
11+
PATTERN: "not slow and not network and not clipboard and not arm_slow"
12+
PYTEST_TARGET: "pandas"
13+
steps:
14+
- checkout
15+
- run: ci/setup_env.sh
16+
- run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
17+
18+
workflows:
19+
test:
20+
jobs:
21+
- test-arm

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
[![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/master/LICENSE)
1313
[![Azure Build Status](https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master)](https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master)
1414
[![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master)](https://codecov.io/gh/pandas-dev/pandas)
15-
[![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org)
15+
[![Downloads](https://static.pepy.tech/personalized-badge/pandas?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads%20per%20month)](https://pepy.tech/project/pandas)
1616
[![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
1717
[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
1818
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

asv_bench/benchmarks/groupby.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,16 @@ def setup(self, dtype, method, application, ncols):
454454
# DataFrameGroupBy doesn't have these methods
455455
raise NotImplementedError
456456

457+
if application == "transformation" and method in [
458+
"head",
459+
"tail",
460+
"unique",
461+
"value_counts",
462+
"size",
463+
]:
464+
# DataFrameGroupBy doesn't have these methods
465+
raise NotImplementedError
466+
457467
ngroups = 1000
458468
size = ngroups * 2
459469
rng = np.arange(ngroups).reshape(-1, 1)
@@ -480,7 +490,7 @@ def setup(self, dtype, method, application, ncols):
480490
if len(cols) == 1:
481491
cols = cols[0]
482492

483-
if application == "transform":
493+
if application == "transformation":
484494
if method == "describe":
485495
raise NotImplementedError
486496

asv_bench/benchmarks/inference.py

+5
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ def setup(self):
173173
self.strings_tz_space = [
174174
x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
175175
]
176+
self.strings_zero_tz = [x.strftime("%Y-%m-%d %H:%M:%S") + "Z" for x in rng]
176177

177178
def time_iso8601(self):
178179
to_datetime(self.strings)
@@ -189,6 +190,10 @@ def time_iso8601_format_no_sep(self):
189190
def time_iso8601_tz_spaceformat(self):
190191
to_datetime(self.strings_tz_space)
191192

193+
def time_iso8601_infer_zero_tz_fromat(self):
194+
# GH 41047
195+
to_datetime(self.strings_zero_tz, infer_datetime_format=True)
196+
192197

193198
class ToDatetimeNONISO8601:
194199
def setup(self):

asv_bench/benchmarks/rolling.py

+27
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,33 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
180180
self.roll.quantile(percentile, interpolation=interpolation)
181181

182182

183+
class Rank:
184+
params = (
185+
["DataFrame", "Series"],
186+
[10, 1000],
187+
["int", "float"],
188+
[True, False],
189+
[True, False],
190+
["min", "max", "average"],
191+
)
192+
param_names = [
193+
"constructor",
194+
"window",
195+
"dtype",
196+
"percentile",
197+
"ascending",
198+
"method",
199+
]
200+
201+
def setup(self, constructor, window, dtype, percentile, ascending, method):
202+
N = 10 ** 5
203+
arr = np.random.random(N).astype(dtype)
204+
self.roll = getattr(pd, constructor)(arr).rolling(window)
205+
206+
def time_rank(self, constructor, window, dtype, percentile, ascending, method):
207+
self.roll.rank(pct=percentile, ascending=ascending, method=method)
208+
209+
183210
class PeakMemFixedWindowMinMax:
184211

185212
params = ["min", "max"]

asv_bench/benchmarks/series_methods.py

+13
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,19 @@ def time_constructor(self, data):
2727
Series(data=self.data, index=self.idx)
2828

2929

30+
class ToFrame:
31+
params = [["int64", "datetime64[ns]", "category", "Int64"], [None, "foo"]]
32+
param_names = ["dtype", "name"]
33+
34+
def setup(self, dtype, name):
35+
arr = np.arange(10 ** 5)
36+
ser = Series(arr, dtype=dtype)
37+
self.ser = ser
38+
39+
def time_to_frame(self, dtype, name):
40+
self.ser.to_frame(name)
41+
42+
3043
class NSort:
3144

3245
params = ["first", "last", "all"]

asv_bench/benchmarks/sparse.py

+28
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,20 @@ def time_sparse_series_to_coo_single_level(self, sort_labels):
9191
self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
9292

9393

94+
class ToCooFrame:
95+
def setup(self):
96+
N = 10000
97+
k = 10
98+
arr = np.full((N, k), np.nan)
99+
arr[0, 0] = 3.0
100+
arr[12, 7] = -1.0
101+
arr[0, 9] = 11.2
102+
self.df = pd.DataFrame(arr, dtype=pd.SparseDtype("float"))
103+
104+
def time_to_coo(self):
105+
self.df.sparse.to_coo()
106+
107+
94108
class Arithmetic:
95109

96110
params = ([0.1, 0.01], [0, np.nan])
@@ -152,4 +166,18 @@ def time_division(self, fill_value):
152166
self.arr1 / self.arr2
153167

154168

169+
class MinMax:
170+
171+
params = (["min", "max"], [0.0, np.nan])
172+
param_names = ["func", "fill_value"]
173+
174+
def setup(self, func, fill_value):
175+
N = 1_000_000
176+
arr = make_array(N, 1e-5, fill_value, np.float64)
177+
self.sp_arr = SparseArray(arr, fill_value=fill_value)
178+
179+
def time_min_max(self, func, fill_value):
180+
getattr(self.sp_arr, func)()
181+
182+
155183
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/azure/posix.yml

+7-1
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@ jobs:
99
strategy:
1010
matrix:
1111
${{ if eq(parameters.name, 'macOS') }}:
12-
py38_macos:
12+
py38_macos_1:
1313
ENV_FILE: ci/deps/azure-macos-38.yaml
1414
CONDA_PY: "38"
1515
PATTERN: "not slow and not network"
16+
PYTEST_TARGET: "pandas/tests/[a-h]*"
17+
py38_macos_2:
18+
ENV_FILE: ci/deps/azure-macos-38.yaml
19+
CONDA_PY: "38"
20+
PATTERN: "not slow and not network"
21+
PYTEST_TARGET: "pandas/tests/[i-z]*"
1622

1723
steps:
1824
- script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'

ci/azure/windows.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,28 @@ jobs:
1313
CONDA_PY: "38"
1414
PATTERN: "not slow and not network"
1515
PYTEST_WORKERS: 2 # GH-42236
16-
PYTEST_TARGET: "pandas/tests/[a-i]*"
16+
PYTEST_TARGET: "pandas/tests/[a-h]*"
1717

1818
py38_np18_2:
1919
ENV_FILE: ci/deps/azure-windows-38.yaml
2020
CONDA_PY: "38"
2121
PATTERN: "not slow and not network"
2222
PYTEST_WORKERS: 2 # GH-42236
23-
PYTEST_TARGET: "pandas/tests/[j-z]*"
23+
PYTEST_TARGET: "pandas/tests/[i-z]*"
2424

2525
py39_1:
2626
ENV_FILE: ci/deps/azure-windows-39.yaml
2727
CONDA_PY: "39"
2828
PATTERN: "not slow and not network and not high_memory"
2929
PYTEST_WORKERS: 2 # GH-42236
30-
PYTEST_TARGET: "pandas/tests/[a-i]*"
30+
PYTEST_TARGET: "pandas/tests/[a-h]*"
3131

3232
py39_2:
3333
ENV_FILE: ci/deps/azure-windows-39.yaml
3434
CONDA_PY: "39"
3535
PATTERN: "not slow and not network and not high_memory"
3636
PYTEST_WORKERS: 2 # GH-42236
37-
PYTEST_TARGET: "pandas/tests/[j-z]*"
37+
PYTEST_TARGET: "pandas/tests/[i-z]*"
3838

3939
steps:
4040
- powershell: |

doc/source/reference/style.rst

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Style application
3939
Styler.apply_index
4040
Styler.applymap_index
4141
Styler.format
42+
Styler.format_index
4243
Styler.hide_index
4344
Styler.hide_columns
4445
Styler.set_td_classes

doc/source/reference/window.rst

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Rolling window functions
3535
Rolling.aggregate
3636
Rolling.quantile
3737
Rolling.sem
38+
Rolling.rank
3839

3940
.. _api.functions_window:
4041

@@ -75,6 +76,7 @@ Expanding window functions
7576
Expanding.aggregate
7677
Expanding.quantile
7778
Expanding.sem
79+
Expanding.rank
7880

7981
.. _api.functions_ewm:
8082

doc/source/user_guide/10min.rst

+8
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,14 @@ The :meth:`~plt.close` method is used to `close <https://matplotlib.org/3.1.1/ap
733733
@savefig series_plot_basic.png
734734
ts.plot();
735735
736+
If running under Jupyter Notebook, the plot will appear on :meth:`~ts.plot`. Otherwise use
737+
`matplotlib.pyplot.show <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.show.html>`__ to show it or
738+
`matplotlib.pyplot.savefig <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.savefig.html>`__ to write it to a file.
739+
740+
.. ipython:: python
741+
742+
plt.show();
743+
736744
On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all
737745
of the columns with labels:
738746

doc/source/user_guide/style.ipynb

+48-7
Original file line numberDiff line numberDiff line change
@@ -150,15 +150,14 @@
150150
"\n",
151151
"### Formatting Values\n",
152152
"\n",
153-
"Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value. To control the display value, the text is printed in each cell, and we can use the [.format()][formatfunc] method to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table or for individual columns. \n",
153+
"Before adding styles it is useful to show that the [Styler][styler] can distinguish the *display* value from the *actual* value, in both datavlaues and index or columns headers. To control the display value, the text is printed in each cell as string, and we can use the [.format()][formatfunc] and [.format_index()][formatfuncindex] methods to manipulate this according to a [format spec string][format] or a callable that takes a single value and returns a string. It is possible to define this for the whole table, or index, or for individual columns, or MultiIndex levels. \n",
154154
"\n",
155-
"Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):`\n",
156-
"\n",
157-
"Here is an example of using the multiple options to control the formatting generally and with specific column formatters.\n",
155+
"Additionally, the format function has a **precision** argument to specifically help formatting floats, as well as **decimal** and **thousands** separators to support other locales, an **na_rep** argument to display missing data, and an **escape** argument to help displaying safe-HTML or safe-LaTeX. The default formatter is configured to adopt pandas' regular `display.precision` option, controllable using `with pd.option_context('display.precision', 2):` \n",
158156
"\n",
159157
"[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n",
160158
"[format]: https://docs.python.org/3/library/string.html#format-specification-mini-language\n",
161-
"[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst"
159+
"[formatfunc]: ../reference/api/pandas.io.formats.style.Styler.format.rst\n",
160+
"[formatfuncindex]: ../reference/api/pandas.io.formats.style.Styler.format_index.rst"
162161
]
163162
},
164163
{
@@ -173,6 +172,49 @@
173172
" })"
174173
]
175174
},
175+
{
176+
"cell_type": "markdown",
177+
"metadata": {},
178+
"source": [
179+
"Using Styler to manipulate the display is a useful feature because maintaining the indexing and datavalues for other purposes gives greater control. You do not have to overwrite your DataFrame to display it how you like. Here is an example of using the formatting functions whilst still relying on the underlying data for indexing and calculations."
180+
]
181+
},
182+
{
183+
"cell_type": "code",
184+
"execution_count": null,
185+
"metadata": {},
186+
"outputs": [],
187+
"source": [
188+
"weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n",
189+
" index=pd.date_range(start=\"2021-01-01\", periods=10),\n",
190+
" columns=[\"Tokyo\", \"Beijing\"])\n",
191+
"\n",
192+
"def rain_condition(v): \n",
193+
" if v < 1.75:\n",
194+
" return \"Dry\"\n",
195+
" elif v < 2.75:\n",
196+
" return \"Rain\"\n",
197+
" return \"Heavy Rain\"\n",
198+
"\n",
199+
"def make_pretty(styler):\n",
200+
" styler.set_caption(\"Weather Conditions\")\n",
201+
" styler.format(rain_condition)\n",
202+
" styler.format_index(lambda v: v.strftime(\"%A\"))\n",
203+
" styler.background_gradient(axis=None, vmin=1, vmax=5, cmap=\"YlGnBu\")\n",
204+
" return styler\n",
205+
"\n",
206+
"weather_df"
207+
]
208+
},
209+
{
210+
"cell_type": "code",
211+
"execution_count": null,
212+
"metadata": {},
213+
"outputs": [],
214+
"source": [
215+
"weather_df.loc[\"2021-01-04\":\"2021-01-08\"].style.pipe(make_pretty)"
216+
]
217+
},
176218
{
177219
"cell_type": "markdown",
178220
"metadata": {},
@@ -187,7 +229,7 @@
187229
"\n",
188230
"Hiding does not change the integer arrangement of CSS classes, e.g. hiding the first two columns of a DataFrame means the column class indexing will start at `col2`, since `col0` and `col1` are simply ignored.\n",
189231
"\n",
190-
"We can update our `Styler` object to hide some data and format the values.\n",
232+
"We can update our `Styler` object from before to hide some data and format the values.\n",
191233
"\n",
192234
"[hideidx]: ../reference/api/pandas.io.formats.style.Styler.hide_index.rst\n",
193235
"[hidecols]: ../reference/api/pandas.io.formats.style.Styler.hide_columns.rst"
@@ -1974,7 +2016,6 @@
19742016
}
19752017
],
19762018
"metadata": {
1977-
"celltoolbar": "Edit Metadata",
19782019
"kernelspec": {
19792020
"display_name": "Python 3",
19802021
"language": "python",

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.3
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.3.4
2728
v1.3.3
2829
v1.3.2
2930
v1.3.1

0 commit comments

Comments
 (0)