Skip to content

Commit babf3ff

Browse files
authored
Merge branch 'master' into get_loc-nan
2 parents 245d4bb + 7e791e4 commit babf3ff

File tree

851 files changed

+34025
-30265
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

851 files changed

+34025
-30265
lines changed

.github/FUNDING.yml

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
custom: https://pandas.pydata.org/donate.html
2+
github: [numfocus]
23
tidelift: pypi/pandas

.github/workflows/assign.yml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: Assign
2+
on:
3+
issue_comment:
4+
types: created
5+
6+
jobs:
7+
one:
8+
runs-on: ubuntu-latest
9+
steps:
10+
- name:
11+
run: |
12+
if [[ "${{ github.event.comment.body }}" == "take" ]]; then
13+
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
14+
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
15+
fi

.github/workflows/ci.yml

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: master
6+
pull_request:
7+
branches: master
8+
9+
env:
10+
ENV_FILE: environment.yml
11+
12+
jobs:
13+
checks:
14+
name: Checks
15+
runs-on: ubuntu-latest
16+
steps:
17+
18+
- name: Checkout
19+
uses: actions/checkout@v1
20+
21+
- name: Setting conda path
22+
run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
23+
24+
- name: Looking for unwanted patterns
25+
run: ci/code_checks.sh patterns
26+
if: true
27+
28+
- name: Setup environment and build pandas
29+
run: ci/setup_env.sh
30+
if: true
31+
32+
- name: Linting
33+
run: |
34+
source activate pandas-dev
35+
ci/code_checks.sh lint
36+
if: true
37+
38+
- name: Dependencies consistency
39+
run: |
40+
source activate pandas-dev
41+
ci/code_checks.sh dependencies
42+
if: true
43+
44+
- name: Checks on imported code
45+
run: |
46+
source activate pandas-dev
47+
ci/code_checks.sh code
48+
if: true
49+
50+
- name: Running doctests
51+
run: |
52+
source activate pandas-dev
53+
ci/code_checks.sh doctests
54+
if: true
55+
56+
- name: Docstring validation
57+
run: |
58+
source activate pandas-dev
59+
ci/code_checks.sh docstrings
60+
if: true
61+
62+
- name: Typing validation
63+
run: |
64+
source activate pandas-dev
65+
ci/code_checks.sh typing
66+
if: true
67+
68+
- name: Testing docstring validation script
69+
run: |
70+
source activate pandas-dev
71+
pytest --capture=no --strict scripts
72+
if: true
73+
74+
- name: Running benchmarks
75+
run: |
76+
source activate pandas-dev
77+
cd asv_bench
78+
asv check -E existing
79+
git remote add upstream https://github.com/pandas-dev/pandas.git
80+
git fetch upstream
81+
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
82+
asv machine --yes
83+
asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log
84+
if grep "failed" benchmarks.log > /dev/null ; then
85+
exit 1
86+
fi
87+
else
88+
echo "Benchmarks did not run, no changes detected"
89+
fi
90+
if: true
91+
92+
- name: Publish benchmarks artifact
93+
uses: actions/upload-artifact@master
94+
with:
95+
name: Benchmarks log
96+
path: asv_bench/benchmarks.log
97+
if: failure()

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
repos:
22
- repo: https://github.com/python/black
3-
rev: stable
3+
rev: 19.10b0
44
hooks:
55
- id: black
66
language_version: python3.7
@@ -9,7 +9,7 @@ repos:
99
hooks:
1010
- id: flake8
1111
language: python_venv
12-
additional_dependencies: [flake8-comprehensions]
12+
additional_dependencies: [flake8-comprehensions>=3.1.0]
1313
- repo: https://github.com/pre-commit/mirrors-isort
1414
rev: v4.3.20
1515
hooks:

.travis.yml

+6-8
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ matrix:
3030
- python: 3.5
3131

3232
include:
33+
- dist: trusty
34+
env:
35+
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
36+
3337
- dist: trusty
3438
env:
3539
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
@@ -71,23 +75,17 @@ before_install:
7175
# This overrides travis and tells it to look nowhere.
7276
- export BOTO_CONFIG=/dev/null
7377

78+
7479
install:
7580
- echo "install start"
7681
- ci/prep_cython_cache.sh
7782
- ci/setup_env.sh
7883
- ci/submit_cython_cache.sh
7984
- echo "install done"
8085

81-
before_script:
82-
# display server (for clipboard functionality) needs to be started here,
83-
# does not work if done in install:setup_env.sh (GH-26103)
84-
- export DISPLAY=":99.0"
85-
- echo "sh -e /etc/init.d/xvfb start"
86-
- sh -e /etc/init.d/xvfb start
87-
- sleep 3
88-
8986
script:
9087
- echo "script start"
88+
- echo "$JOB"
9189
- source activate pandas-dev
9290
- ci/run_tests.sh
9391

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ lint-diff:
1515
git diff upstream/master --name-only -- "*.py" | xargs flake8
1616

1717
black:
18-
black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
18+
black .
1919

2020
develop: build
2121
python -m pip install --no-build-isolation -e .

README.md

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<div align="center">
2-
<img src="https://github.com/pandas-dev/pandas/blob/master/doc/logo/pandas_logo.png"><br>
2+
<img src="https://dev.pandas.io/static/img/pandas.svg"><br>
33
</div>
44

55
-----------------
@@ -164,12 +164,11 @@ pip install pandas
164164
```
165165

166166
## Dependencies
167-
- [NumPy](https://www.numpy.org): 1.13.3 or higher
168-
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
169-
- [pytz](https://pythonhosted.org/pytz): 2015.4 or higher
167+
- [NumPy](https://www.numpy.org)
168+
- [python-dateutil](https://labix.org/python-dateutil)
169+
- [pytz](https://pythonhosted.org/pytz)
170170

171-
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies)
172-
for recommended and optional dependencies.
171+
See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
173172

174173
## Installation from sources
175174
To install pandas from source you need Cython in addition to the normal
@@ -190,7 +189,7 @@ or for installing in [development mode](https://pip.pypa.io/en/latest/reference/
190189

191190

192191
```sh
193-
python -m pip install --no-build-isolation -e .
192+
python -m pip install -e . --no-build-isolation --no-use-pep517
194193
```
195194

196195
If you have `make`, you can also use `make develop` to run the same command.

asv_bench/benchmarks/categoricals.py

+47-21
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,6 @@
1414
pass
1515

1616

17-
class Concat:
18-
def setup(self):
19-
N = 10 ** 5
20-
self.s = pd.Series(list("aabbcd") * N).astype("category")
21-
22-
self.a = pd.Categorical(list("aabbcd") * N)
23-
self.b = pd.Categorical(list("bbcdjk") * N)
24-
25-
def time_concat(self):
26-
pd.concat([self.s, self.s])
27-
28-
def time_union(self):
29-
union_categoricals([self.a, self.b])
30-
31-
3217
class Constructor:
3318
def setup(self):
3419
N = 10 ** 5
@@ -77,14 +62,41 @@ def time_existing_series(self):
7762
pd.Categorical(self.series)
7863

7964

65+
class CategoricalOps:
66+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
67+
param_names = ["op"]
68+
69+
def setup(self, op):
70+
N = 10 ** 5
71+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
72+
73+
def time_categorical_op(self, op):
74+
getattr(self.cat, op)("b")
75+
76+
77+
class Concat:
78+
def setup(self):
79+
N = 10 ** 5
80+
self.s = pd.Series(list("aabbcd") * N).astype("category")
81+
82+
self.a = pd.Categorical(list("aabbcd") * N)
83+
self.b = pd.Categorical(list("bbcdjk") * N)
84+
85+
def time_concat(self):
86+
pd.concat([self.s, self.s])
87+
88+
def time_union(self):
89+
union_categoricals([self.a, self.b])
90+
91+
8092
class ValueCounts:
8193

8294
params = [True, False]
8395
param_names = ["dropna"]
8496

8597
def setup(self, dropna):
8698
n = 5 * 10 ** 5
87-
arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
99+
arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
88100
self.ts = pd.Series(arr).astype("category")
89101

90102
def time_value_counts(self, dropna):
@@ -102,7 +114,7 @@ def time_rendering(self):
102114
class SetCategories:
103115
def setup(self):
104116
n = 5 * 10 ** 5
105-
arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
117+
arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
106118
self.ts = pd.Series(arr).astype("category")
107119

108120
def time_set_categories(self):
@@ -112,7 +124,7 @@ def time_set_categories(self):
112124
class RemoveCategories:
113125
def setup(self):
114126
n = 5 * 10 ** 5
115-
arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
127+
arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
116128
self.ts = pd.Series(arr).astype("category")
117129

118130
def time_remove_categories(self):
@@ -164,9 +176,9 @@ def setup(self, dtype):
164176
np.random.seed(1234)
165177
n = 5 * 10 ** 5
166178
sample_size = 100
167-
arr = [i for i in np.random.randint(0, n // 10, size=n)]
179+
arr = list(np.random.randint(0, n // 10, size=n))
168180
if dtype == "object":
169-
arr = ["s{:04d}".format(i) for i in arr]
181+
arr = [f"s{i:04d}" for i in arr]
170182
self.sample = np.random.choice(arr, sample_size)
171183
self.series = pd.Series(arr).astype("category")
172184

@@ -225,7 +237,7 @@ def setup(self, index):
225237
elif index == "non_monotonic":
226238
self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories)
227239
else:
228-
raise ValueError("Invalid index param: {}".format(index))
240+
raise ValueError(f"Invalid index param: {index}")
229241

230242
self.scalar = 10000
231243
self.list = list(range(10000))
@@ -282,4 +294,18 @@ def time_sort_values(self):
282294
self.index.sort_values(ascending=False)
283295

284296

297+
class SearchSorted:
298+
def setup(self):
299+
N = 10 ** 5
300+
self.ci = tm.makeCategoricalIndex(N).sort_values()
301+
self.c = self.ci.values
302+
self.key = self.ci.categories[1]
303+
304+
def time_categorical_index_contains(self):
305+
self.ci.searchsorted(self.key)
306+
307+
def time_categorical_contains(self):
308+
self.c.searchsorted(self.key)
309+
310+
285311
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/ctors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class SeriesConstructors:
6767
def setup(self, data_fmt, with_index, dtype):
6868
if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
6969
raise NotImplementedError(
70-
"Series constructors do not support " "using generators with indexes"
70+
"Series constructors do not support using generators with indexes"
7171
)
7272
N = 10 ** 4
7373
if dtype == "float":

asv_bench/benchmarks/eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def time_add(self, engine, threads):
2727

2828
def time_and(self, engine, threads):
2929
pd.eval(
30-
"(self.df > 0) & (self.df2 > 0) & " "(self.df3 > 0) & (self.df4 > 0)",
30+
"(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
3131
engine=engine,
3232
)
3333

asv_bench/benchmarks/frame_ctor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ class FromLists:
9999
def setup(self):
100100
N = 1000
101101
M = 100
102-
self.data = [[j for j in range(M)] for i in range(N)]
102+
self.data = [list(range(M)) for i in range(N)]
103103

104104
def time_frame_from_lists(self):
105105
self.df = DataFrame(self.data)

asv_bench/benchmarks/frame_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def setup(self):
565565

566566
def time_frame_get_dtype_counts(self):
567567
with warnings.catch_warnings(record=True):
568-
self.df.get_dtype_counts()
568+
self.df._data.get_dtype_counts()
569569

570570
def time_info(self):
571571
self.df.info()

asv_bench/benchmarks/gil.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def wrapper(fname):
3737
return wrapper
3838

3939

40-
from .pandas_vb_common import BaseIO # noqa: E402 isort:skip
40+
from .pandas_vb_common import BaseIO # isort:skip
4141

4242

4343
class ParallelGroupbyMethods:
@@ -250,13 +250,11 @@ def setup(self, dtype):
250250
np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows)
251251
),
252252
"object": DataFrame(
253-
"foo",
254-
index=range(rows),
255-
columns=["object%03d".format(i) for i in range(5)],
253+
"foo", index=range(rows), columns=["object%03d" for _ in range(5)]
256254
),
257255
}
258256

259-
self.fname = "__test_{}__.csv".format(dtype)
257+
self.fname = f"__test_{dtype}__.csv"
260258
df = data[dtype]
261259
df.to_csv(self.fname)
262260

0 commit comments

Comments
 (0)