Skip to content

Commit 90a435a

Browse files
committed
Merge branch 'main' into diff-low-precision-ints
2 parents 8885700 + af8ad6d commit 90a435a

File tree

411 files changed

+10042
-5275
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

411 files changed

+10042
-5275
lines changed

.github/workflows/code-checks.yml

+24
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,27 @@ jobs:
156156
name: Benchmarks log
157157
path: asv_bench/benchmarks.log
158158
if: failure()
159+
160+
build_docker_dev_environment:
161+
name: Build Docker Dev Environment
162+
runs-on: ubuntu-latest
163+
defaults:
164+
run:
165+
shell: bash -l {0}
166+
167+
concurrency:
168+
# https://github.community/t/concurrecy-not-work-for-push/183068/7
169+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment
170+
cancel-in-progress: true
171+
172+
steps:
173+
- name: Clean up dangling images
174+
run: docker image prune -f
175+
176+
- name: Checkout
177+
uses: actions/checkout@v2
178+
with:
179+
fetch-depth: 0
180+
181+
- name: Build image
182+
run: docker build --pull --no-cache --tag pandas-dev-env .

.github/workflows/datamanger.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ jobs:
2020
data_manager:
2121
name: Test experimental data manager
2222
runs-on: ubuntu-latest
23+
timeout-minutes: 120
2324
services:
2425
moto:
2526
image: motoserver/moto
@@ -45,7 +46,7 @@ jobs:
4546
- name: Run tests
4647
env:
4748
PANDAS_DATA_MANAGER: array
48-
PATTERN: "not network and not clipboard"
49+
PATTERN: "not network and not clipboard and not single_cpu"
4950
PYTEST_WORKERS: "auto"
5051
PYTEST_TARGET: pandas
5152
run: |

.github/workflows/posix.yml

+48-26
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ on:
1313
- "doc/**"
1414

1515
env:
16-
PYTEST_WORKERS: "auto"
1716
PANDAS_CI: 1
1817

1918
jobs:
@@ -22,35 +21,54 @@ jobs:
2221
defaults:
2322
run:
2423
shell: bash -l {0}
24+
timeout-minutes: 120
2525
strategy:
2626
matrix:
27-
settings: [
28-
[actions-38-downstream_compat.yaml, "not slow and not network and not clipboard", "", "", "", "", ""],
29-
[actions-38-minimum_versions.yaml, "not clipboard", "", "", "", "", ""],
30-
[actions-38.yaml, "not slow and not network", "language-pack-it xsel", "it_IT.utf8", "it_IT.utf8", "", ""],
31-
[actions-38.yaml, "not slow and not network", "language-pack-zh-hans xsel", "zh_CN.utf8", "zh_CN.utf8", "", ""],
32-
[actions-38.yaml, "not clipboard", "", "", "", "", ""],
33-
[actions-pypy-38.yaml, "not slow and not clipboard", "", "", "", "", "--max-worker-restart 0"],
34-
[actions-39.yaml, "not clipboard", "", "", "", "", ""],
35-
[actions-310-numpydev.yaml, "not slow and not network", "xsel", "", "", "deprecate", "-W error"],
36-
[actions-310.yaml, "not clipboard", "", "", "", "", ""],
37-
]
27+
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
28+
pattern: ["not single_cpu", "single_cpu"]
29+
# Don't test pyarrow v2/3: Causes timeouts in read_csv engine
30+
# even if tests are skipped/xfailed
31+
pyarrow_version: ["5", "6", "7"]
32+
include:
33+
- env_file: actions-38-downstream_compat.yaml
34+
pattern: "not slow and not network and not single_cpu"
35+
pytest_target: "pandas/tests/test_downstream.py"
36+
- env_file: actions-38-minimum_versions.yaml
37+
pattern: "not slow and not network and not single_cpu"
38+
- env_file: actions-38.yaml
39+
pattern: "not slow and not network and not single_cpu"
40+
extra_apt: "language-pack-it"
41+
lang: "it_IT.utf8"
42+
lc_all: "it_IT.utf8"
43+
- env_file: actions-38.yaml
44+
pattern: "not slow and not network and not single_cpu"
45+
extra_apt: "language-pack-zh-hans"
46+
lang: "zh_CN.utf8"
47+
lc_all: "zh_CN.utf8"
48+
- env_file: actions-pypy-38.yaml
49+
pattern: "not slow and not network and not single_cpu"
50+
test_args: "--max-worker-restart 0"
51+
- env_file: actions-310-numpydev.yaml
52+
pattern: "not slow and not network and not single_cpu"
53+
pandas_testing_mode: "deprecate"
54+
test_args: "-W error"
3855
fail-fast: false
3956
env:
40-
ENV_FILE: ci/deps/${{ matrix.settings[0] }}
41-
PATTERN: ${{ matrix.settings[1] }}
42-
EXTRA_APT: ${{ matrix.settings[2] }}
43-
LANG: ${{ matrix.settings[3] }}
44-
LC_ALL: ${{ matrix.settings[4] }}
45-
PANDAS_TESTING_MODE: ${{ matrix.settings[5] }}
46-
TEST_ARGS: ${{ matrix.settings[6] }}
47-
PYTEST_TARGET: pandas
48-
IS_PYPY: ${{ contains(matrix.settings[0], 'pypy') }}
57+
ENV_FILE: ci/deps/${{ matrix.env_file }}
58+
PATTERN: ${{ matrix.pattern }}
59+
EXTRA_APT: ${{ matrix.extra_apt || '' }}
60+
LANG: ${{ matrix.lang || '' }}
61+
LC_ALL: ${{ matrix.lc_all || '' }}
62+
PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }}
63+
TEST_ARGS: ${{ matrix.test_args || '' }}
64+
PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
65+
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
66+
IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
4967
# TODO: re-enable coverage on pypy, its slow
50-
COVERAGE: ${{ !contains(matrix.settings[0], 'pypy') }}
68+
COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }}
5169
concurrency:
5270
# https://github.community/t/concurrecy-not-work-for-push/183068/7
53-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.settings[0] }}-${{ matrix.settings[1] }}-${{ matrix.settings[2] }}
71+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.pyarrow_version || '' }}-${{ matrix.extra_apt || '' }}
5472
cancel-in-progress: true
5573

5674
services:
@@ -105,7 +123,8 @@ jobs:
105123
hashFiles('${{ env.ENV_FILE }}') }}
106124

107125
- name: Extra installs
108-
run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 ${{ env.EXTRA_APT }}
126+
# xsel for clipboard tests
127+
run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }}
109128

110129
- uses: conda-incubator/setup-miniconda@v2
111130
with:
@@ -117,6 +136,10 @@ jobs:
117136
use-only-tar-bz2: true
118137
if: ${{ env.IS_PYPY == 'false' }} # No pypy3.8 support
119138

139+
- name: Upgrade Arrow version
140+
run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=${{ matrix.pyarrow_version }}
141+
if: ${{ matrix.pyarrow_version }}
142+
120143
- name: Setup PyPy
121144
uses: actions/setup-python@v2
122145
with:
@@ -127,8 +150,7 @@ jobs:
127150
shell: bash
128151
run: |
129152
# TODO: re-enable cov, its slowing the tests down though
130-
# TODO: Unpin Cython, the new Cython 0.29.26 is causing compilation errors
131-
pip install Cython==0.29.25 numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
153+
pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
132154
if: ${{ env.IS_PYPY == 'true' }}
133155

134156
- name: Build Pandas

.github/workflows/python-dev.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ on:
2323
env:
2424
PYTEST_WORKERS: "auto"
2525
PANDAS_CI: 1
26-
PATTERN: "not slow and not network and not clipboard"
26+
PATTERN: "not slow and not network and not clipboard and not single_cpu"
2727
COVERAGE: true
2828
PYTEST_TARGET: pandas
2929

.pre-commit-config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ci:
44
autofix_prs: false
55
repos:
66
- repo: https://github.com/MarcoGorelli/absolufy-imports
7-
rev: v0.3.0
7+
rev: v0.3.1
88
hooks:
99
- id: absolufy-imports
1010
files: ^pandas/
@@ -16,7 +16,7 @@ repos:
1616
pass_filenames: true
1717
require_serial: false
1818
- repo: https://github.com/python/black
19-
rev: 21.12b0
19+
rev: 22.1.0
2020
hooks:
2121
- id: black
2222
- repo: https://github.com/codespell-project/codespell
@@ -50,7 +50,7 @@ repos:
5050
- flake8==4.0.1
5151
- flake8-comprehensions==3.7.0
5252
- flake8-bugbear==21.3.2
53-
- pandas-dev-flaker==0.2.0
53+
- pandas-dev-flaker==0.4.0
5454
- repo: https://github.com/PyCQA/isort
5555
rev: 5.10.1
5656
hooks:

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM quay.io/condaforge/miniforge3
1+
FROM quay.io/condaforge/miniforge3:4.11.0-0
22

33
# if you forked pandas, you can pass in your own GitHub username to use your fork
44
# i.e. gh_username=myname
@@ -45,4 +45,4 @@ RUN . /opt/conda/etc/profile.d/conda.sh \
4545
&& cd "$pandas_home" \
4646
&& export \
4747
&& python setup.py build_ext -j 4 \
48-
&& python -m pip install -e .
48+
&& python -m pip install --no-build-isolation -e .

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ or alternatively
136136
python setup.py develop
137137
```
138138

139-
See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source).
139+
See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html#installing-from-source).
140140

141141
## License
142142
[BSD 3](LICENSE)

asv_bench/benchmarks/algorithms.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class Factorize:
3434
param_names = ["unique", "sort", "dtype"]
3535

3636
def setup(self, unique, sort, dtype):
37-
N = 10 ** 5
37+
N = 10**5
3838
string_index = tm.makeStringIndex(N)
3939
string_arrow = None
4040
if dtype == "string[pyarrow]":
@@ -74,7 +74,7 @@ class Duplicated:
7474
param_names = ["unique", "keep", "dtype"]
7575

7676
def setup(self, unique, keep, dtype):
77-
N = 10 ** 5
77+
N = 10**5
7878
data = {
7979
"int": pd.Index(np.arange(N), dtype="int64"),
8080
"uint": pd.Index(np.arange(N), dtype="uint64"),
@@ -97,7 +97,7 @@ def time_duplicated(self, unique, keep, dtype):
9797

9898
class Hashing:
9999
def setup_cache(self):
100-
N = 10 ** 5
100+
N = 10**5
101101

102102
df = pd.DataFrame(
103103
{
@@ -145,7 +145,7 @@ class Quantile:
145145
param_names = ["quantile", "interpolation", "dtype"]
146146

147147
def setup(self, quantile, interpolation, dtype):
148-
N = 10 ** 5
148+
N = 10**5
149149
data = {
150150
"int": np.arange(N),
151151
"uint": np.arange(N).astype(np.uint64),
@@ -158,7 +158,7 @@ def time_quantile(self, quantile, interpolation, dtype):
158158

159159

160160
class SortIntegerArray:
161-
params = [10 ** 3, 10 ** 5]
161+
params = [10**3, 10**5]
162162

163163
def setup(self, N):
164164
data = np.arange(N, dtype=float)

asv_bench/benchmarks/algos/isin.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def setup(self, dtype):
4949

5050
elif dtype in ["category[object]", "category[int]"]:
5151
# Note: sizes are different in this case than others
52-
n = 5 * 10 ** 5
52+
n = 5 * 10**5
5353
sample_size = 100
5454

5555
arr = list(np.random.randint(0, n // 10, size=n))
@@ -174,7 +174,7 @@ class IsinWithArange:
174174

175175
def setup(self, dtype, M, offset_factor):
176176
offset = int(M * offset_factor)
177-
tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
177+
tmp = Series(np.random.randint(offset, M + offset, 10**6))
178178
self.series = tmp.astype(dtype)
179179
self.values = np.arange(M).astype(dtype)
180180

@@ -191,8 +191,8 @@ class IsInFloat64:
191191
param_names = ["dtype", "title"]
192192

193193
def setup(self, dtype, title):
194-
N_many = 10 ** 5
195-
N_few = 10 ** 6
194+
N_many = 10**5
195+
N_few = 10**6
196196
self.series = Series([1, 2], dtype=dtype)
197197

198198
if title == "many_different_values":
@@ -240,10 +240,10 @@ class IsInForObjects:
240240
param_names = ["series_type", "vals_type"]
241241

242242
def setup(self, series_type, vals_type):
243-
N_many = 10 ** 5
243+
N_many = 10**5
244244

245245
if series_type == "nans":
246-
ser_vals = np.full(10 ** 4, np.nan)
246+
ser_vals = np.full(10**4, np.nan)
247247
elif series_type == "short":
248248
ser_vals = np.arange(2)
249249
elif series_type == "long":
@@ -254,7 +254,7 @@ def setup(self, series_type, vals_type):
254254
self.series = Series(ser_vals).astype(object)
255255

256256
if vals_type == "nans":
257-
values = np.full(10 ** 4, np.nan)
257+
values = np.full(10**4, np.nan)
258258
elif vals_type == "short":
259259
values = np.arange(2)
260260
elif vals_type == "long":
@@ -277,7 +277,7 @@ class IsInLongSeriesLookUpDominates:
277277
param_names = ["dtype", "MaxNumber", "series_type"]
278278

279279
def setup(self, dtype, MaxNumber, series_type):
280-
N = 10 ** 7
280+
N = 10**7
281281

282282
if series_type == "random_hits":
283283
array = np.random.randint(0, MaxNumber, N)
@@ -304,15 +304,15 @@ class IsInLongSeriesValuesDominate:
304304
param_names = ["dtype", "series_type"]
305305

306306
def setup(self, dtype, series_type):
307-
N = 10 ** 7
307+
N = 10**7
308308

309309
if series_type == "random":
310310
vals = np.random.randint(0, 10 * N, N)
311311
if series_type == "monotone":
312312
vals = np.arange(N)
313313

314314
self.values = vals.astype(dtype.lower())
315-
M = 10 ** 6 + 1
315+
M = 10**6 + 1
316316
self.series = Series(np.arange(M)).astype(dtype)
317317

318318
def time_isin(self, dtypes, series_type):

0 commit comments

Comments
 (0)