Skip to content

Commit df57420

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 62e6c1a + 68db2d2 commit df57420

File tree

245 files changed

+9797
-8622
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

245 files changed

+9797
-8622
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3-
- [ ] passes `black pandas`
4-
- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
3+
- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing.html#code-standards) for how to run them
54
- [ ] whatsnew entry

.pre-commit-config.yaml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
minimum_pre_commit_version: 2.9.2
2+
exclude: ^LICENSES/|\.(html|csv|svg)$
23
repos:
34
- repo: https://github.com/python/black
45
rev: 20.8b1
@@ -19,11 +20,9 @@ repos:
1920
types: [text]
2021
args: [--append-config=flake8/cython-template.cfg]
2122
- repo: https://github.com/PyCQA/isort
22-
rev: 5.6.4
23+
rev: 5.7.0
2324
hooks:
2425
- id: isort
25-
types: [text] # overwrite upstream `types: [python]`
26-
types_or: [python, cython]
2726
- repo: https://github.com/asottile/pyupgrade
2827
rev: v2.7.4
2928
hooks:
@@ -121,6 +120,13 @@ repos:
121120
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
122121
types: [python]
123122
exclude: ^(asv_bench|pandas/tests|doc)/
123+
- id: unwanted-patterns-bare-pytest-raises
124+
name: Check for use of bare pytest raises
125+
language: python
126+
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
127+
types: [python]
128+
files: ^pandas/tests/
129+
exclude: ^pandas/tests/extension/
124130
- id: inconsistent-namespace-usage
125131
name: 'Check for inconsistent use of pandas namespace in tests'
126132
entry: python scripts/check_for_inconsistent_pandas_namespace.py
@@ -137,7 +143,7 @@ repos:
137143
name: Check for use of foo.__class__ instead of type(foo)
138144
entry: \.__class__
139145
language: pygrep
140-
files: \.(py|pyx)$
146+
types_or: [python, cython]
141147
- id: unwanted-typing
142148
name: Check for use of comment-based annotation syntax and missing error codes
143149
entry: |
@@ -165,9 +171,8 @@ repos:
165171
rev: v3.4.0
166172
hooks:
167173
- id: end-of-file-fixer
168-
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
174+
exclude: \.txt$
169175
- id: trailing-whitespace
170-
exclude: \.(html|svg)$
171176
- repo: https://github.com/codespell-project/codespell
172177
rev: v2.0.0
173178
hooks:

asv_bench/benchmarks/arithmetic.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,18 @@ def setup(self, op):
122122
n_rows = 500
123123

124124
# construct dataframe with 2 blocks
125-
arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
126-
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
125+
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
126+
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
127127
df = pd.concat(
128128
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
129129
)
130130
# should already be the case, but just to be sure
131131
df._consolidate_inplace()
132132

133133
# TODO: GH#33198 the setting here shoudlnt need two steps
134-
arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
135-
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
136-
arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
134+
arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8")
135+
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
136+
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
137137
df2 = pd.concat(
138138
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
139139
axis=1,

asv_bench/benchmarks/frame_methods.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ class Repr:
263263
def setup(self):
264264
nrows = 10000
265265
data = np.random.randn(nrows, 10)
266-
arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
266+
arrays = np.tile(np.random.randn(3, nrows // 100), 100)
267267
idx = MultiIndex.from_arrays(arrays)
268268
self.df3 = DataFrame(data, index=idx)
269269
self.df4 = DataFrame(data, index=np.random.randn(nrows))
@@ -648,9 +648,9 @@ class Describe:
648648
def setup(self):
649649
self.df = DataFrame(
650650
{
651-
"a": np.random.randint(0, 100, int(1e6)),
652-
"b": np.random.randint(0, 100, int(1e6)),
653-
"c": np.random.randint(0, 100, int(1e6)),
651+
"a": np.random.randint(0, 100, 10 ** 6),
652+
"b": np.random.randint(0, 100, 10 ** 6),
653+
"c": np.random.randint(0, 100, 10 ** 6),
654654
}
655655
)
656656

asv_bench/benchmarks/groupby.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ def setup(self, data, key):
126126
def time_series_groups(self, data, key):
127127
self.ser.groupby(self.ser).groups
128128

129+
def time_series_indices(self, data, key):
130+
self.ser.groupby(self.ser).indices
131+
129132

130133
class GroupManyLabels:
131134

asv_bench/benchmarks/hash_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ class Float64GroupIndex:
103103
# GH28303
104104
def setup(self):
105105
self.df = pd.date_range(
106-
start="1/1/2018", end="1/2/2018", periods=1e6
106+
start="1/1/2018", end="1/2/2018", periods=10 ** 6
107107
).to_frame()
108-
self.group_index = np.round(self.df.index.astype(int) / 1e9)
108+
self.group_index = np.round(self.df.index.astype(int) / 10 ** 9)
109109

110110
def time_groupby(self):
111111
self.df.groupby(self.group_index).last()

asv_bench/benchmarks/inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ToNumericDowncast:
4242
]
4343

4444
N = 500000
45-
N2 = int(N / 2)
45+
N2 = N // 2
4646

4747
data_dict = {
4848
"string-int": ["1"] * N2 + [2] * N2,

asv_bench/benchmarks/join_merge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def setup(self):
158158
daily_dates = date_index.to_period("D").to_timestamp("S", "S")
159159
self.fracofday = date_index.values - daily_dates.values
160160
self.fracofday = self.fracofday.astype("timedelta64[ns]")
161-
self.fracofday = self.fracofday.astype(np.float64) / 86400000000000.0
161+
self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000
162162
self.fracofday = Series(self.fracofday, daily_dates)
163163
index = date_range(date_index.min(), date_index.max(), freq="D")
164164
self.temp = Series(1.0, index)[self.fracofday.index]

asv_bench/benchmarks/rolling.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,24 @@ class Engine:
5050
["int", "float"],
5151
[np.sum, lambda x: np.sum(x) + 5],
5252
["cython", "numba"],
53+
["sum", "max", "min", "median", "mean"],
5354
)
54-
param_names = ["constructor", "dtype", "function", "engine"]
55+
param_names = ["constructor", "dtype", "function", "engine", "method"]
5556

56-
def setup(self, constructor, dtype, function, engine):
57+
def setup(self, constructor, dtype, function, engine, method):
5758
N = 10 ** 3
5859
arr = (100 * np.random.random(N)).astype(dtype)
5960
self.data = getattr(pd, constructor)(arr)
6061

61-
def time_rolling_apply(self, constructor, dtype, function, engine):
62+
def time_rolling_apply(self, constructor, dtype, function, engine, method):
6263
self.data.rolling(10).apply(function, raw=True, engine=engine)
6364

64-
def time_expanding_apply(self, constructor, dtype, function, engine):
65+
def time_expanding_apply(self, constructor, dtype, function, engine, method):
6566
self.data.expanding().apply(function, raw=True, engine=engine)
6667

68+
def time_rolling_methods(self, constructor, dtype, function, engine, method):
69+
getattr(self.data.rolling(10), method)(engine=engine)
70+
6771

6872
class ExpandingMethods:
6973

@@ -171,7 +175,7 @@ class PeakMemFixedWindowMinMax:
171175
params = ["min", "max"]
172176

173177
def setup(self, operation):
174-
N = int(1e6)
178+
N = 10 ** 6
175179
arr = np.random.random(N)
176180
self.roll = pd.Series(arr).rolling(2)
177181

@@ -233,7 +237,7 @@ class GroupbyLargeGroups:
233237

234238
def setup(self):
235239
N = 100000
236-
self.df = pd.DataFrame({"A": [1, 2] * int(N / 2), "B": np.random.randn(N)})
240+
self.df = pd.DataFrame({"A": [1, 2] * (N // 2), "B": np.random.randn(N)})
237241

238242
def time_rolling_multiindex_creation(self):
239243
self.df.groupby("A").rolling(3).mean()

asv_bench/benchmarks/series_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def time_dir_strings(self):
284284
class SeriesGetattr:
285285
# https://github.com/pandas-dev/pandas/issues/19764
286286
def setup(self):
287-
self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=int(1e6)))
287+
self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=10 ** 6))
288288

289289
def time_series_datetimeindex_repr(self):
290290
getattr(self.s, "a", None)

asv_bench/benchmarks/timeseries.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def time_iso8601_tz_spaceformat(self):
346346
class ToDatetimeNONISO8601:
347347
def setup(self):
348348
N = 10000
349-
half = int(N / 2)
349+
half = N // 2
350350
ts_string_1 = "March 1, 2018 12:00:00+0400"
351351
ts_string_2 = "March 1, 2018 12:00:00+0500"
352352
self.same_offset = [ts_string_1] * N
@@ -376,7 +376,7 @@ def setup(self):
376376
self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
377377
self.diff_offset = [
378378
f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
379-
] * int(N / 10)
379+
] * (N // 10)
380380

381381
def time_exact(self):
382382
to_datetime(self.s2, format="%d%b%y")

ci/deps/actions-37-locale.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dependencies:
3030
- openpyxl
3131
- pandas-gbq
3232
- google-cloud-bigquery>=1.27.2 # GH 36436
33-
- pyarrow>=0.17
33+
- pyarrow=0.17 # GH 38803
3434
- pytables>=3.5.1
3535
- scipy
3636
- xarray=0.12.3

doc/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@
432432

433433

434434
ipython_warning_is_error = False
435-
ipython_exec_lines = [
435+
ipython_execlines = [
436436
"import numpy as np",
437437
"import pandas as pd",
438438
# This ensures correct rendering on system with console encoding != utf8

doc/source/development/test_writing.rst

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,6 @@ be located.
149149
``frame_or_series`` fixture, by convention it goes in the
150150
``tests.frame`` file.
151151

152-
- tests.generic.methods.test_mymethod
153-
154-
.. note::
155-
156-
The generic/methods/ directory is only for methods with tests
157-
that are fully parametrized over Series/DataFrame
158-
159152
7. Is your test for an Index method, not depending on Series/DataFrame?
160153
This test likely belongs in one of:
161154

0 commit comments

Comments
 (0)