Skip to content

Commit 115667e

Browse files
Merge branch 'master' into fix/combine-first
2 parents 9f841c9 + 6929e26 commit 115667e

File tree

540 files changed

+17321
-7897
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

540 files changed

+17321
-7897
lines changed

.github/workflows/ci.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ on:
44
push:
55
branches: master
66
pull_request:
7-
branches: master
7+
branches:
8+
- master
9+
- 1.1.x
810

911
env:
1012
ENV_FILE: environment.yml

.github/workflows/stale-pr.yml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: "Stale PRs"
2+
on:
3+
schedule:
4+
# * is a special character in YAML so you have to quote this string
5+
- cron: "0 */6 * * *"
6+
7+
jobs:
8+
stale:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/stale@v3
12+
with:
13+
repo-token: ${{ secrets.GITHUB_TOKEN }}
14+
stale-pr-message: "This pull request is stale because it has been open for thirty days with no activity."
15+
skip-stale-pr-message: false
16+
stale-pr-label: "Stale"
17+
exempt-pr-labels: "Needs Review,Blocked"
18+
days-before-stale: 30
19+
days-before-close: -1
20+
remove-stale-when-updated: true
21+
debug-only: true

.pre-commit-config.yaml

-12
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,3 @@ repos:
3030
- id: isort
3131
language: python_venv
3232
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
33-
- repo: https://github.com/pre-commit/mirrors-mypy
34-
rev: v0.730
35-
hooks:
36-
- id: mypy
37-
args:
38-
# As long as a some files are excluded from check-untyped-defs
39-
# we have to exclude it from the pre-commit hook as the configuration
40-
# is based on modules but the hook runs on files.
41-
- --no-check-untyped-defs
42-
- --follow-imports
43-
- skip
44-
files: pandas/

.travis.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ matrix:
4545
- JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
4646

4747
- env:
48-
- JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
48+
- JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
4949
services:
5050
- mysql
5151
- postgresql
@@ -54,7 +54,7 @@ matrix:
5454
# Enabling Deprecations when running tests
5555
# PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
5656
# See pandas/_testing.py for more details.
57-
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
57+
- JOB="3.7, coverage" ENV_FILE="ci/deps/travis-37-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
5858
services:
5959
- mysql
6060
- postgresql

Makefile

+13
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,16 @@ doc:
2525
cd doc; \
2626
python make.py clean; \
2727
python make.py html
28+
29+
check:
30+
python3 scripts/validate_unwanted_patterns.py \
31+
--validation-type="private_function_across_module" \
32+
--included-file-extensions="py" \
33+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
34+
pandas/
35+
36+
python3 scripts/validate_unwanted_patterns.py \
37+
--validation-type="private_import_across_module" \
38+
--included-file-extensions="py" \
39+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
40+
pandas/

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ its way towards this goal.
3232
Here are just a few of the things that pandas does well:
3333

3434
- Easy handling of [**missing data**][missing-data] (represented as
35-
`NaN`) in floating point as well as non-floating point data
35+
`NaN`, `NA`, or `NaT`) in floating point as well as non-floating point data
3636
- Size mutability: columns can be [**inserted and
3737
deleted**][insertion-deletion] from DataFrame and higher dimensional
3838
objects

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
// The Pythons you'd like to test against. If not provided, defaults
2727
// to the current version of Python used to run `asv`.
2828
// "pythons": ["2.7", "3.4"],
29-
"pythons": ["3.6"],
29+
"pythons": ["3.8"],
3030

3131
// The matrix of dependencies to test. Each key is the name of a
3232
// package (in PyPI) and the values are version numbers. An empty

asv_bench/benchmarks/frame_ctor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .pandas_vb_common import tm
77

88
try:
9-
from pandas.tseries.offsets import Nano, Hour
9+
from pandas.tseries.offsets import Hour, Nano
1010
except ImportError:
1111
# For compatibility with older versions
1212
from pandas.core.datetools import * # noqa

asv_bench/benchmarks/gil.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77

88
try:
99
from pandas import (
10-
rolling_median,
10+
rolling_kurt,
11+
rolling_max,
1112
rolling_mean,
13+
rolling_median,
1214
rolling_min,
13-
rolling_max,
14-
rolling_var,
1515
rolling_skew,
16-
rolling_kurt,
1716
rolling_std,
17+
rolling_var,
1818
)
1919

2020
have_rolling_methods = True

asv_bench/benchmarks/groupby.py

+32-14
Original file line numberDiff line numberDiff line change
@@ -627,49 +627,63 @@ def time_first(self):
627627

628628

629629
class TransformEngine:
630-
def setup(self):
630+
631+
param_names = ["parallel"]
632+
params = [[True, False]]
633+
634+
def setup(self, parallel):
631635
N = 10 ** 3
632636
data = DataFrame(
633637
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
634638
columns=[0, 1],
635639
)
640+
self.parallel = parallel
636641
self.grouper = data.groupby(0)
637642

638-
def time_series_numba(self):
643+
def time_series_numba(self, parallel):
639644
def function(values, index):
640645
return values * 5
641646

642-
self.grouper[1].transform(function, engine="numba")
647+
self.grouper[1].transform(
648+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
649+
)
643650

644-
def time_series_cython(self):
651+
def time_series_cython(self, parallel):
645652
def function(values):
646653
return values * 5
647654

648655
self.grouper[1].transform(function, engine="cython")
649656

650-
def time_dataframe_numba(self):
657+
def time_dataframe_numba(self, parallel):
651658
def function(values, index):
652659
return values * 5
653660

654-
self.grouper.transform(function, engine="numba")
661+
self.grouper.transform(
662+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
663+
)
655664

656-
def time_dataframe_cython(self):
665+
def time_dataframe_cython(self, parallel):
657666
def function(values):
658667
return values * 5
659668

660669
self.grouper.transform(function, engine="cython")
661670

662671

663672
class AggEngine:
664-
def setup(self):
673+
674+
param_names = ["parallel"]
675+
params = [[True, False]]
676+
677+
def setup(self, parallel):
665678
N = 10 ** 3
666679
data = DataFrame(
667680
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668681
columns=[0, 1],
669682
)
683+
self.parallel = parallel
670684
self.grouper = data.groupby(0)
671685

672-
def time_series_numba(self):
686+
def time_series_numba(self, parallel):
673687
def function(values, index):
674688
total = 0
675689
for i, value in enumerate(values):
@@ -679,9 +693,11 @@ def function(values, index):
679693
total += value * 2
680694
return total
681695

682-
self.grouper[1].agg(function, engine="numba")
696+
self.grouper[1].agg(
697+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
698+
)
683699

684-
def time_series_cython(self):
700+
def time_series_cython(self, parallel):
685701
def function(values):
686702
total = 0
687703
for i, value in enumerate(values):
@@ -693,7 +709,7 @@ def function(values):
693709

694710
self.grouper[1].agg(function, engine="cython")
695711

696-
def time_dataframe_numba(self):
712+
def time_dataframe_numba(self, parallel):
697713
def function(values, index):
698714
total = 0
699715
for i, value in enumerate(values):
@@ -703,9 +719,11 @@ def function(values, index):
703719
total += value * 2
704720
return total
705721

706-
self.grouper.agg(function, engine="numba")
722+
self.grouper.agg(
723+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
724+
)
707725

708-
def time_dataframe_cython(self):
726+
def time_dataframe_cython(self, parallel):
709727
def function(values):
710728
total = 0
711729
for i, value in enumerate(values):

asv_bench/benchmarks/index_object.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ def time_datetime_difference_disjoint(self):
5757

5858
class Range:
5959
def setup(self):
60-
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)
61-
self.idx_dec = RangeIndex(start=10 ** 7, stop=-1, step=-3)
60+
self.idx_inc = RangeIndex(start=0, stop=10 ** 6, step=3)
61+
self.idx_dec = RangeIndex(start=10 ** 6, stop=-1, step=-3)
6262

6363
def time_max(self):
6464
self.idx_inc.max()
@@ -73,15 +73,23 @@ def time_min_trivial(self):
7373
self.idx_inc.min()
7474

7575
def time_get_loc_inc(self):
76-
self.idx_inc.get_loc(900000)
76+
self.idx_inc.get_loc(900_000)
7777

7878
def time_get_loc_dec(self):
79-
self.idx_dec.get_loc(100000)
79+
self.idx_dec.get_loc(100_000)
80+
81+
def time_iter_inc(self):
82+
for _ in self.idx_inc:
83+
pass
84+
85+
def time_iter_dec(self):
86+
for _ in self.idx_dec:
87+
pass
8088

8189

8290
class IndexEquals:
8391
def setup(self):
84-
idx_large_fast = RangeIndex(100000)
92+
idx_large_fast = RangeIndex(100_000)
8593
idx_small_slow = date_range(start="1/1/2012", periods=1)
8694
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
8795

@@ -94,7 +102,7 @@ def time_non_object_equals_multiindex(self):
94102
class IndexAppend:
95103
def setup(self):
96104

97-
N = 10000
105+
N = 10_000
98106
self.range_idx = RangeIndex(0, 100)
99107
self.int_idx = self.range_idx.astype(int)
100108
self.obj_idx = self.int_idx.astype(str)
@@ -168,7 +176,7 @@ def time_get_loc_non_unique_sorted(self, dtype):
168176
class Float64IndexMethod:
169177
# GH 13166
170178
def setup(self):
171-
N = 100000
179+
N = 100_000
172180
a = np.arange(N)
173181
self.ind = Float64Index(a * 4.8000000418824129e-08)
174182

@@ -212,7 +220,7 @@ class GC:
212220
params = [1, 2, 5]
213221

214222
def create_use_drop(self):
215-
idx = Index(list(range(1000 * 1000)))
223+
idx = Index(list(range(1_000_000)))
216224
idx._engine
217225

218226
def peakmem_gc_instances(self, N):

asv_bench/benchmarks/io/parsers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
try:
44
from pandas._libs.tslibs.parsing import (
5-
concat_date_cols,
65
_does_string_look_like_datetime,
6+
concat_date_cols,
77
)
88
except ImportError:
99
# Avoid whole benchmark suite import failure on asv (currently 0.4)

asv_bench/benchmarks/package.py

+9-15
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,16 @@
44
import subprocess
55
import sys
66

7-
from pandas.compat import PY37
8-
97

108
class TimeImport:
119
def time_import(self):
12-
if PY37:
13-
# on py37+ we the "-X importtime" usage gives us a more precise
14-
# measurement of the import time we actually care about,
15-
# without the subprocess or interpreter overhead
16-
cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
17-
p = subprocess.run(cmd, stderr=subprocess.PIPE)
18-
19-
line = p.stderr.splitlines()[-1]
20-
field = line.split(b"|")[-2].strip()
21-
total = int(field) # microseconds
22-
return total
10+
# on py37+ we the "-X importtime" usage gives us a more precise
11+
# measurement of the import time we actually care about,
12+
# without the subprocess or interpreter overhead
13+
cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
14+
p = subprocess.run(cmd, stderr=subprocess.PIPE)
2315

24-
cmd = [sys.executable, "-c", "import pandas as pd"]
25-
subprocess.run(cmd, stderr=subprocess.PIPE)
16+
line = p.stderr.splitlines()[-1]
17+
field = line.split(b"|")[-2].strip()
18+
total = int(field) # microseconds
19+
return total

asv_bench/benchmarks/strings.py

+15
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,21 @@
77
from .pandas_vb_common import tm
88

99

10+
class Construction:
11+
12+
params = ["str", "string"]
13+
param_names = ["dtype"]
14+
15+
def setup(self, dtype):
16+
self.data = tm.rands_array(nchars=10 ** 5, size=10)
17+
18+
def time_construction(self, dtype):
19+
Series(self.data, dtype=dtype)
20+
21+
def peakmem_construction(self, dtype):
22+
Series(self.data, dtype=dtype)
23+
24+
1025
class Methods:
1126
def setup(self):
1227
self.s = Series(tm.makeStringIndex(10 ** 5))

asv_bench/benchmarks/tslibs/normalize.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
try:
2-
from pandas._libs.tslibs import normalize_i8_timestamps, is_date_array_normalized
2+
from pandas._libs.tslibs import is_date_array_normalized, normalize_i8_timestamps
33
except ImportError:
44
from pandas._libs.tslibs.conversion import (
55
normalize_i8_timestamps,

azure-pipelines.yml

+2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml
22
trigger:
33
- master
4+
- 1.1.x
45

56
pr:
67
- master
8+
- 1.1.x
79

810
variables:
911
PYTEST_WORKERS: auto

0 commit comments

Comments
 (0)