Skip to content

Commit 5ac507b

Browse files
committed
Merge branch 'master' into issue-37643.
2 parents d4c13f6 + ab82fb0 commit 5ac507b

File tree

449 files changed

+19221
-15467
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

449 files changed

+19221
-15467
lines changed

.github/ISSUE_TEMPLATE/documentation_improvement.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ labels: "Docs, Needs Triage"
99

1010
#### Location of the documentation
1111

12-
[this should provide the location of the documentation, e.g. "pandas.read_csv" or the URL of the documentation, e.g. "https://dev.pandas.io/docs/reference/api/pandas.read_csv.html"]
12+
[this should provide the location of the documentation, e.g. "pandas.read_csv" or the URL of the documentation, e.g. "https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html"]
1313

1414
**Note**: You can check the latest versions of the docs on `master` [here](https://pandas.pydata.org/docs/dev/).
1515

.github/workflows/comment_bot.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- name: Install-pre-commit
3030
run: python -m pip install --upgrade pre-commit
3131
- name: Run pre-commit
32-
run: pre-commit run --all-files || (exit 0)
32+
run: pre-commit run --from-ref=origin/master --to-ref=HEAD --all-files || (exit 0)
3333
- name: Commit results
3434
run: |
3535
git config user.name "$(git log -1 --pretty=format:%an)"

.pre-commit-config.yaml

+42-21
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ repos:
2424
hooks:
2525
- id: isort
2626
- repo: https://github.com/asottile/pyupgrade
27-
rev: v2.7.4
27+
rev: v2.10.0
2828
hooks:
2929
- id: pyupgrade
30-
args: [--py37-plus]
30+
args: [--py37-plus, --keep-runtime-typing]
3131
- repo: https://github.com/pre-commit/pygrep-hooks
32-
rev: v1.7.0
32+
rev: v1.7.1
3333
hooks:
3434
- id: rst-backticks
3535
- id: rst-directive-colons
@@ -60,11 +60,11 @@ repos:
6060
entry: |
6161
(?x)
6262
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
63-
from\ pandas\.core\.common\ import|
64-
from\ pandas\.core\ import\ common|
63+
from\ pandas\.core\.common\ import
64+
|from\ pandas\.core\ import\ common
6565
6666
# Check for imports from collections.abc instead of `from collections import abc`
67-
from\ collections\.abc\ import
67+
|from\ collections\.abc\ import
6868
6969
- id: non-standard-numpy.random-related-imports
7070
name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
@@ -73,24 +73,24 @@ repos:
7373
entry: |
7474
(?x)
7575
# Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
76-
from\ numpy\ import\ random|
77-
from\ numpy.random\ import
76+
from\ numpy\ import\ random
77+
|from\ numpy.random\ import
7878
types: [python]
7979
- id: non-standard-imports-in-tests
8080
name: Check for non-standard imports in test suite
8181
language: pygrep
8282
entry: |
8383
(?x)
8484
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
85-
from\ pandas\._testing\ import|
86-
from\ pandas\ import\ _testing\ as\ tm|
85+
from\ pandas\._testing\ import
86+
|from\ pandas\ import\ _testing\ as\ tm
8787
8888
# No direct imports from conftest
89-
conftest\ import|
90-
import\ conftest
89+
|conftest\ import
90+
|import\ conftest
9191
9292
# Check for use of pandas.testing instead of tm
93-
pd\.testing\.
93+
|pd\.testing\.
9494
types: [python]
9595
files: ^pandas/tests/
9696
- id: incorrect-code-directives
@@ -127,6 +127,12 @@ repos:
127127
types: [python]
128128
files: ^pandas/tests/
129129
exclude: ^pandas/tests/extension/
130+
- id: unwanted-patters-pytest-xfail
131+
name: Check for use of pytest.xfail
132+
entry: pytest\.xfail
133+
language: pygrep
134+
types: [python]
135+
files: ^pandas/tests/
130136
- id: inconsistent-namespace-usage
131137
name: 'Check for inconsistent use of pandas namespace in tests'
132138
entry: python scripts/check_for_inconsistent_pandas_namespace.py
@@ -135,7 +141,7 @@ repos:
135141
files: ^pandas/tests/
136142
- id: FrameOrSeriesUnion
137143
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
138-
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
144+
entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\]
139145
language: pygrep
140146
types: [python]
141147
exclude: ^pandas/_typing\.py$
@@ -148,9 +154,8 @@ repos:
148154
name: Check for outdated annotation syntax and missing error codes
149155
entry: |
150156
(?x)
151-
\#\ type:\ (?!ignore)|
152-
\#\ type:\s?ignore(?!\[)|
153-
\)\ ->\ \"
157+
\#\ type:\ (?!ignore)
158+
|\#\ type:\s?ignore(?!\[)
154159
language: pygrep
155160
types: [python]
156161
- id: np-bool
@@ -166,9 +171,15 @@ repos:
166171
files: ^pandas/tests/
167172
exclude: |
168173
(?x)^
169-
pandas/tests/io/excel/test_writers\.py|
170-
pandas/tests/io/pytables/common\.py|
171-
pandas/tests/io/pytables/test_store\.py$
174+
pandas/tests/io/excel/test_writers\.py
175+
|pandas/tests/io/pytables/common\.py
176+
|pandas/tests/io/pytables/test_store\.py$
177+
- id: no-pandas-api-types
178+
name: Check code for instances of pd.api.types
179+
entry: (pd|pandas)\.api\.types\.
180+
language: pygrep
181+
types: [python]
182+
files: ^pandas/tests/
172183
- repo: https://github.com/asottile/yesqa
173184
rev: v1.2.2
174185
hooks:
@@ -184,4 +195,14 @@ repos:
184195
hooks:
185196
- id: codespell
186197
types_or: [python, rst, markdown]
187-
files: ^pandas/core/
198+
files: ^pandas/
199+
exclude: ^pandas/tests/
200+
- repo: https://github.com/MarcoGorelli/no-string-hints
201+
rev: v0.1.7
202+
hooks:
203+
- id: no-string-hints
204+
- repo: https://github.com/MarcoGorelli/abs-imports
205+
rev: v0.1.2
206+
hooks:
207+
- id: abs-imports
208+
files: ^pandas/

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<div align="center">
2-
<img src="https://dev.pandas.io/static/img/pandas.svg"><br>
2+
<img src="https://pandas.pydata.org/static/img/pandas.svg"><br>
33
</div>
44

55
-----------------

asv_bench/benchmarks/categoricals.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,29 @@ def setup(self):
118118
self.a = pd.Categorical(list("aabbcd") * N)
119119
self.b = pd.Categorical(list("bbcdjk") * N)
120120

121+
self.idx_a = pd.CategoricalIndex(range(N), range(N))
122+
self.idx_b = pd.CategoricalIndex(range(N + 1), range(N + 1))
123+
self.df_a = pd.DataFrame(range(N), columns=["a"], index=self.idx_a)
124+
self.df_b = pd.DataFrame(range(N + 1), columns=["a"], index=self.idx_b)
125+
121126
def time_concat(self):
122127
pd.concat([self.s, self.s])
123128

124129
def time_union(self):
125130
union_categoricals([self.a, self.b])
126131

132+
def time_append_overlapping_index(self):
133+
self.idx_a.append(self.idx_a)
134+
135+
def time_append_non_overlapping_index(self):
136+
self.idx_a.append(self.idx_b)
137+
138+
def time_concat_overlapping_index(self):
139+
pd.concat([self.df_a, self.df_a])
140+
141+
def time_concat_non_overlapping_index(self):
142+
pd.concat([self.df_a, self.df_b])
143+
127144

128145
class ValueCounts:
129146

@@ -306,7 +323,7 @@ def time_get_loc(self):
306323
self.index.get_loc(self.category)
307324

308325
def time_shallow_copy(self):
309-
self.index._shallow_copy()
326+
self.index._view()
310327

311328
def time_align(self):
312329
pd.DataFrame({"a": self.series, "b": self.series[:500]})

asv_bench/benchmarks/hash_functions.py

+9
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ def time_isin_outside(self, dtype, exponent):
2525
self.s.isin(self.values_outside)
2626

2727

28+
class UniqueForLargePyObjectInts:
29+
def setup(self):
30+
lst = [x << 32 for x in range(5000)]
31+
self.arr = np.array(lst, dtype=np.object_)
32+
33+
def time_unique(self):
34+
pd.unique(self.arr)
35+
36+
2837
class IsinWithRandomFloat:
2938
params = [
3039
[np.float64, np.object],

asv_bench/benchmarks/indexing.py

+14
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,20 @@ def time_loc_list(self, monotonic):
243243
monotonic.loc[80000:]
244244

245245

246+
class DatetimeIndexIndexing:
247+
def setup(self):
248+
dti = date_range("2016-01-01", periods=10000, tz="US/Pacific")
249+
dti2 = dti.tz_convert("UTC")
250+
self.dti = dti
251+
self.dti2 = dti2
252+
253+
def time_get_indexer_mismatched_tz(self):
254+
# reached via e.g.
255+
# ser = Series(range(len(dti)), index=dti)
256+
# ser[dti2]
257+
self.dti.get_indexer(self.dti2)
258+
259+
246260
class CategoricalIndexIndexing:
247261

248262
params = ["monotonic_incr", "monotonic_decr", "non_monotonic"]

asv_bench/benchmarks/io/csv.py

+48
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,54 @@ def time_frame(self, obs):
7676
self.data.to_csv(self.fname)
7777

7878

79+
class ToCSVIndexes(BaseIO):
80+
81+
fname = "__test__.csv"
82+
83+
@staticmethod
84+
def _create_df(rows, cols):
85+
index_cols = {
86+
"index1": np.random.randint(0, rows, rows),
87+
"index2": np.full(rows, 1, dtype=int),
88+
"index3": np.full(rows, 1, dtype=int),
89+
}
90+
data_cols = {
91+
f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)
92+
}
93+
df = DataFrame({**index_cols, **data_cols})
94+
return df
95+
96+
def setup(self):
97+
ROWS = 100000
98+
COLS = 5
99+
# For tests using .head(), create an initial dataframe with this many times
100+
# more rows
101+
HEAD_ROW_MULTIPLIER = 10
102+
103+
self.df_standard_index = self._create_df(ROWS, COLS)
104+
105+
self.df_custom_index_then_head = (
106+
self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
107+
.set_index(["index1", "index2", "index3"])
108+
.head(ROWS)
109+
)
110+
111+
self.df_head_then_custom_index = (
112+
self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
113+
.head(ROWS)
114+
.set_index(["index1", "index2", "index3"])
115+
)
116+
117+
def time_standard_index(self):
118+
self.df_standard_index.to_csv(self.fname)
119+
120+
def time_multiindex(self):
121+
self.df_head_then_custom_index.to_csv(self.fname)
122+
123+
def time_head_of_multiindex(self):
124+
self.df_custom_index_then_head.to_csv(self.fname)
125+
126+
79127
class StringIORewind:
80128
def data(self, stringio_object):
81129
stringio_object.seek(0)

asv_bench/benchmarks/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def time_get_loc(self):
8686
self.index.get_loc(self.period)
8787

8888
def time_shallow_copy(self):
89-
self.index._shallow_copy()
89+
self.index._view()
9090

9191
def time_series_loc(self):
9292
self.series.loc[self.period]

asv_bench/benchmarks/reshape.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas as pd
77
from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
8+
from pandas.api.types import CategoricalDtype
89

910

1011
class Melt:
@@ -196,7 +197,7 @@ def setup(self):
196197
categories = list(string.ascii_letters[:12])
197198
s = pd.Series(
198199
np.random.choice(categories, size=1000000),
199-
dtype=pd.api.types.CategoricalDtype(categories),
200+
dtype=CategoricalDtype(categories),
200201
)
201202
self.s = s
202203

asv_bench/benchmarks/rolling.py

+23
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,11 @@ class Pairwise:
140140

141141
def setup(self, window, method, pairwise):
142142
N = 10 ** 4
143+
n_groups = 20
144+
groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
143145
arr = np.random.random(N)
144146
self.df = pd.DataFrame(arr)
147+
self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
145148

146149
def time_pairwise(self, window, method, pairwise):
147150
if window is None:
@@ -150,6 +153,13 @@ def time_pairwise(self, window, method, pairwise):
150153
r = self.df.rolling(window=window)
151154
getattr(r, method)(self.df, pairwise=pairwise)
152155

156+
def time_groupby(self, window, method, pairwise):
157+
if window is None:
158+
r = self.df_group.expanding()
159+
else:
160+
r = self.df_group.rolling(window=window)
161+
getattr(r, method)(self.df, pairwise=pairwise)
162+
153163

154164
class Quantile:
155165
params = (
@@ -245,6 +255,19 @@ def time_rolling_multiindex_creation(self):
245255

246256
class GroupbyEWM:
247257

258+
params = ["var", "std", "cov", "corr"]
259+
param_names = ["method"]
260+
261+
def setup(self, method):
262+
df = pd.DataFrame({"A": range(50), "B": range(50)})
263+
self.gb_ewm = df.groupby("A").ewm(com=1.0)
264+
265+
def time_groupby_method(self, method):
266+
getattr(self.gb_ewm, method)()
267+
268+
269+
class GroupbyEWMEngine:
270+
248271
params = ["cython", "numba"]
249272
param_names = ["engine"]
250273

0 commit comments

Comments
 (0)