Skip to content

Sync Fork from Upstream Repo #150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Mar 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 8 additions & 37 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ jobs:
data_manager:
name: Test experimental data manager
runs-on: ubuntu-latest
strategy:
matrix:
pattern: ["not slow and not network and not clipboard", "slow"]
steps:

- name: Checkout
Expand All @@ -152,43 +155,11 @@ jobs:
- name: Run tests
env:
PANDAS_DATA_MANAGER: array
PATTERN: ${{ matrix.pattern }}
PYTEST_WORKERS: "auto"
run: |
source activate pandas-dev
ci/run_tests.sh

pytest pandas/tests/frame/
pytest pandas/tests/reductions/
pytest pandas/tests/generic/test_generic.py
pytest pandas/tests/arithmetic/
pytest pandas/tests/groupby/
pytest pandas/tests/resample/
pytest pandas/tests/reshape/merge
pytest pandas/tests/series/
pytest pandas/tests/indexing/

pytest pandas/tests/test_*
pytest pandas/tests/api/
pytest pandas/tests/apply/
pytest pandas/tests/arrays/
pytest pandas/tests/base/
pytest pandas/tests/computation/
pytest pandas/tests/config/
pytest pandas/tests/dtypes/
pytest pandas/tests/extension/
pytest pandas/tests/generic/
pytest pandas/tests/indexes/
pytest pandas/tests/internals/
pytest pandas/tests/io/test_* -m "not slow and not clipboard"
pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
pytest pandas/tests/libs/
pytest pandas/tests/plotting/
pytest pandas/tests/scalar/
pytest pandas/tests/strings/
pytest pandas/tests/tools/
pytest pandas/tests/tseries/
pytest pandas/tests/tslibs/
pytest pandas/tests/util/
pytest pandas/tests/window/
- name: Print skipped tests
run: python ci/print_skipped.py
102 changes: 9 additions & 93 deletions .github/workflows/database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,19 @@ env:
PYTEST_WORKERS: "auto"
PANDAS_CI: 1
PATTERN: ((not slow and not network and not clipboard) or (single and db))
COVERAGE: true

jobs:
Linux_py37_locale:
Linux_py37_IO:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}

env:
ENV_FILE: ci/deps/actions-37-locale.yaml
LOCALE_OVERRIDE: zh_CN.UTF-8
strategy:
matrix:
ENV_FILE: [ci/deps/actions-37-db-min.yaml, ci/deps/actions-37-db.yaml]
fail-fast: false

services:
mysql:
Expand Down Expand Up @@ -63,106 +65,20 @@ jobs:
with:
path: ~/conda_pkgs_dir
key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
hashFiles('${{ env.ENV_FILE }}') }}
hashFiles('${{ matrix.ENV_FILE }}') }}

- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: pandas-dev
channel-priority: strict
environment-file: ${{ env.ENV_FILE }}
environment-file: ${{ matrix.ENV_FILE }}
use-only-tar-bz2: true

- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Test
run: ci/run_tests.sh
if: always()

- name: Build Version
run: pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd

- name: Publish test results
uses: actions/upload-artifact@master
with:
name: Test results
path: test-data.xml
if: failure()

- name: Print skipped tests
run: python ci/print_skipped.py

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
flags: unittests
name: codecov-pandas
fail_ci_if_error: false

Linux_py37_cov:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}

env:
ENV_FILE: ci/deps/actions-37-cov.yaml
PANDAS_TESTING_MODE: deprecate
COVERAGE: true

services:
mysql:
image: mysql
env:
MYSQL_ALLOW_EMPTY_PASSWORD: yes
MYSQL_DATABASE: pandas
options: >-
--health-cmd "mysqladmin ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 3306:3306

postgres:
image: postgres
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: pandas
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432

steps:
- name: Checkout
uses: actions/checkout@v1

- name: Cache conda
uses: actions/cache@v1
env:
CACHE_NUMBER: 0
with:
path: ~/conda_pkgs_dir
key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
hashFiles('${{ env.ENV_FILE }}') }}

- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: pandas-dev
channel-priority: strict
environment-file: ${{ env.ENV_FILE }}
use-only-tar-bz2: true

- name: Build Pandas
uses: ./.github/actions/build_pandas

- name: Test
run: ci/run_tests.sh
run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml -s --cov=pandas --cov-report=xml pandas/tests/io
if: always()

- name: Build Version
Expand Down
38 changes: 12 additions & 26 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ repos:
rev: 5.7.0
hooks:
- id: isort
- repo: https://github.com/MarcoGorelli/no-string-hints
rev: v0.1.7
hooks:
- id: no-string-hints
- repo: https://github.com/asottile/pyupgrade
rev: v2.10.0
hooks:
Expand Down Expand Up @@ -111,12 +107,6 @@ repos:
pandas/tests/io/excel/test_writers\.py
|pandas/tests/io/pytables/common\.py
|pandas/tests/io/pytables/test_store\.py$
- id: no-pandas-api-types
name: Check code for instances of pd.api.types
entry: (pd|pandas)\.api\.types\.
language: pygrep
types: [python]
files: ^pandas/tests/
- id: non-standard-imports
name: Check for non-standard imports
language: pygrep
Expand All @@ -128,6 +118,11 @@ repos:

# Check for imports from collections.abc instead of `from collections import abc`
|from\ collections\.abc\ import

# Numpy
|from\ numpy\ import\ random
|from\ numpy\.random\ import
types: [python]
- id: non-standard-imports-in-tests
name: Check for non-standard imports in test suite
language: pygrep
Expand All @@ -143,26 +138,17 @@ repos:

# Check for use of pandas.testing instead of tm
|pd\.testing\.

# Check for pd.api.types instead of from pandas.api.types import ...
|(pd|pandas)\.api\.types\.
types: [python]
files: ^pandas/tests/
- id: non-standard-numpy-random-related-imports
name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
language: pygrep
exclude: pandas/_testing.py
- id: np-bool-and-np-object
name: Check for use of np.bool/np.object instead of np.bool_/np.object_
entry: |
(?x)
# Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
from\ numpy\ import\ random
|from\ numpy.random\ import
types: [python]
- id: np-bool
name: Check for use of np.bool instead of np.bool_
entry: np\.bool[^_8]
language: pygrep
types_or: [python, cython, rst]
- id: np-object
name: Check for use of np.object instead of np.object_
entry: np\.object[^_8]
np\.bool[^_8]
|np\.object[^_8]
language: pygrep
types_or: [python, cython, rst]
- id: pip-to-conda
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies:
# tools
- cython>=0.29.21
- pytest>=5.0.1
- pytest-cov
- pytest-xdist>=1.21
- hypothesis>=3.58.0

Expand Down
File renamed without changes.
15 changes: 9 additions & 6 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -199,20 +199,23 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):

Returns
-------
tuple
1-d indexer ordered by groups, group counts.
ndarray[intp_t, ndim=1]
Indexer
ndarray[int64_t, ndim=1]
Group Counts

Notes
-----
This is a reverse of the label factorization process.
"""
cdef:
Py_ssize_t i, loc, label, n
ndarray[int64_t] counts, where, result
ndarray[int64_t] counts, where
ndarray[intp_t] indexer

counts = np.zeros(ngroups + 1, dtype=np.int64)
n = len(index)
result = np.zeros(n, dtype=np.int64)
indexer = np.zeros(n, dtype=np.intp)
where = np.zeros(ngroups + 1, dtype=np.int64)

with nogil:
Expand All @@ -228,10 +231,10 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
# this is our indexer
for i in range(n):
label = index[i] + 1
result[where[label]] = i
indexer[where[label]] = i
where[label] += 1

return result, counts
return indexer, counts


@cython.boundscheck(False)
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values,
{{else}}
def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values,
{{endif}}
const int64_t[:] indexer,
const intp_t[:] indexer,
{{c_type_out}}[:] out,
fill_value=np.nan):

Expand Down Expand Up @@ -102,7 +102,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
{{else}}
def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
{{endif}}
ndarray[int64_t] indexer,
ndarray[intp_t] indexer,
{{c_type_out}}[:, :] out,
fill_value=np.nan):
cdef:
Expand Down Expand Up @@ -156,7 +156,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
{{else}}
def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
{{endif}}
ndarray[int64_t] indexer,
ndarray[intp_t] indexer,
{{c_type_out}}[:, :] out,
fill_value=np.nan):

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ from numpy cimport (
int16_t,
int32_t,
int64_t,
intp_t,
ndarray,
uint8_t,
uint16_t,
Expand Down Expand Up @@ -141,6 +142,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
Py_ssize_t i, j, N, K, ngroups, size
ndarray[int64_t] _counts
ndarray[float64_t, ndim=2] data
ndarray[intp_t] indexer
float64_t* ptr

assert min_count == -1, "'min_count' only used in add and prod"
Expand Down
Loading