Skip to content

Commit 6d6d1fb

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into BUG29751
2 parents f0b1f98 + 3526a71 commit 6d6d1fb

File tree

255 files changed

+10198
-9340
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

255 files changed

+10198
-9340
lines changed

.github/PULL_REQUEST_TEMPLATE.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3-
- [ ] passes `black pandas`
4-
- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
3+
- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing.html#code-standards) for how to run them
54
- [ ] whatsnew entry

.pre-commit-config.yaml

+10-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
minimum_pre_commit_version: 2.9.2
2+
exclude: ^LICENSES/|\.(html|csv|svg)$
23
repos:
34
- repo: https://github.com/python/black
45
rev: 20.8b1
@@ -121,6 +122,13 @@ repos:
121122
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
122123
types: [python]
123124
exclude: ^(asv_bench|pandas/tests|doc)/
125+
- id: unwanted-patterns-bare-pytest-raises
126+
name: Check for use of bare pytest raises
127+
language: python
128+
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
129+
types: [python]
130+
files: ^pandas/tests/
131+
exclude: ^pandas/tests/(computation|extension|io)/
124132
- id: inconsistent-namespace-usage
125133
name: 'Check for inconsistent use of pandas namespace in tests'
126134
entry: python scripts/check_for_inconsistent_pandas_namespace.py
@@ -137,7 +145,7 @@ repos:
137145
name: Check for use of foo.__class__ instead of type(foo)
138146
entry: \.__class__
139147
language: pygrep
140-
files: \.(py|pyx)$
148+
types_or: [python, cython]
141149
- id: unwanted-typing
142150
name: Check for use of comment-based annotation syntax and missing error codes
143151
entry: |
@@ -165,9 +173,8 @@ repos:
165173
rev: v3.4.0
166174
hooks:
167175
- id: end-of-file-fixer
168-
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
176+
exclude: \.txt$
169177
- id: trailing-whitespace
170-
exclude: \.(html|svg)$
171178
- repo: https://github.com/codespell-project/codespell
172179
rev: v2.0.0
173180
hooks:

asv_bench/benchmarks/arithmetic.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,18 @@ def setup(self, op):
122122
n_rows = 500
123123

124124
# construct dataframe with 2 blocks
125-
arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
126-
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
125+
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
126+
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
127127
df = pd.concat(
128128
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
129129
)
130130
# should already be the case, but just to be sure
131131
df._consolidate_inplace()
132132

133133
# TODO: GH#33198 the setting here shoudlnt need two steps
134-
arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
135-
arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
136-
arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
134+
arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8")
135+
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
136+
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
137137
df2 = pd.concat(
138138
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
139139
axis=1,

asv_bench/benchmarks/frame_methods.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ class Repr:
263263
def setup(self):
264264
nrows = 10000
265265
data = np.random.randn(nrows, 10)
266-
arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100)
266+
arrays = np.tile(np.random.randn(3, nrows // 100), 100)
267267
idx = MultiIndex.from_arrays(arrays)
268268
self.df3 = DataFrame(data, index=idx)
269269
self.df4 = DataFrame(data, index=np.random.randn(nrows))
@@ -597,6 +597,19 @@ def time_frame_quantile(self, axis):
597597
self.df.quantile([0.1, 0.5], axis=axis)
598598

599599

600+
class Rank:
601+
param_names = ["dtype"]
602+
params = [
603+
["int", "uint", "float", "object"],
604+
]
605+
606+
def setup(self, dtype):
607+
self.df = DataFrame(np.random.randn(10000, 10), columns=range(10), dtype=dtype)
608+
609+
def time_rank(self, dtype):
610+
self.df.rank()
611+
612+
600613
class GetDtypeCounts:
601614
# 2807
602615
def setup(self):
@@ -635,9 +648,9 @@ class Describe:
635648
def setup(self):
636649
self.df = DataFrame(
637650
{
638-
"a": np.random.randint(0, 100, int(1e6)),
639-
"b": np.random.randint(0, 100, int(1e6)),
640-
"c": np.random.randint(0, 100, int(1e6)),
651+
"a": np.random.randint(0, 100, 10 ** 6),
652+
"b": np.random.randint(0, 100, 10 ** 6),
653+
"c": np.random.randint(0, 100, 10 ** 6),
641654
}
642655
)
643656

asv_bench/benchmarks/groupby.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
"skew",
3030
"cumprod",
3131
"cummax",
32-
"rank",
3332
"pct_change",
3433
"min",
3534
"var",
@@ -127,6 +126,9 @@ def setup(self, data, key):
127126
def time_series_groups(self, data, key):
128127
self.ser.groupby(self.ser).groups
129128

129+
def time_series_indices(self, data, key):
130+
self.ser.groupby(self.ser).indices
131+
130132

131133
class GroupManyLabels:
132134

asv_bench/benchmarks/hash_functions.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ class Float64GroupIndex:
103103
# GH28303
104104
def setup(self):
105105
self.df = pd.date_range(
106-
start="1/1/2018", end="1/2/2018", periods=1e6
106+
start="1/1/2018", end="1/2/2018", periods=10 ** 6
107107
).to_frame()
108-
self.group_index = np.round(self.df.index.astype(int) / 1e9)
108+
self.group_index = np.round(self.df.index.astype(int) / 10 ** 9)
109109

110110
def time_groupby(self):
111111
self.df.groupby(self.group_index).last()

asv_bench/benchmarks/inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ToNumericDowncast:
4242
]
4343

4444
N = 500000
45-
N2 = int(N / 2)
45+
N2 = N // 2
4646

4747
data_dict = {
4848
"string-int": ["1"] * N2 + [2] * N2,

asv_bench/benchmarks/join_merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def setup(self):
158158
daily_dates = date_index.to_period("D").to_timestamp("S", "S")
159159
self.fracofday = date_index.values - daily_dates.values
160160
self.fracofday = self.fracofday.astype("timedelta64[ns]")
161-
self.fracofday = self.fracofday.astype(np.float64) / 86400000000000.0
161+
self.fracofday = self.fracofday.astype(np.float64) / 86_400_000_000_000
162162
self.fracofday = Series(self.fracofday, daily_dates)
163163
index = date_range(date_index.min(), date_index.max(), freq="D")
164164
self.temp = Series(1.0, index)[self.fracofday.index]

asv_bench/benchmarks/rolling.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class PeakMemFixedWindowMinMax:
171171
params = ["min", "max"]
172172

173173
def setup(self, operation):
174-
N = int(1e6)
174+
N = 10 ** 6
175175
arr = np.random.random(N)
176176
self.roll = pd.Series(arr).rolling(2)
177177

@@ -233,7 +233,7 @@ class GroupbyLargeGroups:
233233

234234
def setup(self):
235235
N = 100000
236-
self.df = pd.DataFrame({"A": [1, 2] * int(N / 2), "B": np.random.randn(N)})
236+
self.df = pd.DataFrame({"A": [1, 2] * (N // 2), "B": np.random.randn(N)})
237237

238238
def time_rolling_multiindex_creation(self):
239239
self.df.groupby("A").rolling(3).mean()

asv_bench/benchmarks/series_methods.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def time_dir_strings(self):
284284
class SeriesGetattr:
285285
# https://github.com/pandas-dev/pandas/issues/19764
286286
def setup(self):
287-
self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=int(1e6)))
287+
self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=10 ** 6))
288288

289289
def time_series_datetimeindex_repr(self):
290290
getattr(self.s, "a", None)
@@ -349,4 +349,18 @@ def time_func(self, func, N, dtype):
349349
self.func()
350350

351351

352+
class Rank:
353+
354+
param_names = ["dtype"]
355+
params = [
356+
["int", "uint", "float", "object"],
357+
]
358+
359+
def setup(self, dtype):
360+
self.s = Series(np.random.randint(0, 1000, size=100000), dtype=dtype)
361+
362+
def time_rank(self, dtype):
363+
self.s.rank()
364+
365+
352366
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/timeseries.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def time_iso8601_tz_spaceformat(self):
346346
class ToDatetimeNONISO8601:
347347
def setup(self):
348348
N = 10000
349-
half = int(N / 2)
349+
half = N // 2
350350
ts_string_1 = "March 1, 2018 12:00:00+0400"
351351
ts_string_2 = "March 1, 2018 12:00:00+0500"
352352
self.same_offset = [ts_string_1] * N
@@ -376,7 +376,7 @@ def setup(self):
376376
self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
377377
self.diff_offset = [
378378
f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
379-
] * int(N / 10)
379+
] * (N // 10)
380380

381381
def time_exact(self):
382382
to_datetime(self.s2, format="%d%b%y")

doc/source/conf.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,12 @@
6868
"contributors", # custom pandas extension
6969
]
7070

71-
exclude_patterns = ["**.ipynb_checkpoints"]
71+
exclude_patterns = [
72+
"**.ipynb_checkpoints",
73+
# to ensure that include files (partial pages) aren't built, exclude them
74+
# https://github.com/sphinx-doc/sphinx/issues/1965#issuecomment-124732907
75+
"**/includes/**",
76+
]
7277
try:
7378
import nbconvert
7479
except ImportError:

doc/source/development/test_writing.rst

-7
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,6 @@ be located.
149149
``frame_or_series`` fixture, by convention it goes in the
150150
``tests.frame`` file.
151151

152-
- tests.generic.methods.test_mymethod
153-
154-
.. note::
155-
156-
The generic/methods/ directory is only for methods with tests
157-
that are fully parametrized over Series/DataFrame
158-
159152
7. Is your test for an Index method, not depending on Series/DataFrame?
160153
This test likely belongs in one of:
161154

0 commit comments

Comments
 (0)