Skip to content

Commit 5437a9a

Browse files
authored
Merge pull request #192 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 07c9344 + f33480d commit 5437a9a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+3414
-1668
lines changed

.github/workflows/python-dev.yml

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
name: Python Dev
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
branches:
9+
- master
10+
11+
jobs:
12+
build:
13+
runs-on: ubuntu-latest
14+
name: actions-310-dev
15+
timeout-minutes: 60
16+
17+
steps:
18+
- uses: actions/checkout@v2
19+
with:
20+
fetch-depth: 0
21+
22+
- name: Set up Python Dev Version
23+
uses: actions/setup-python@v2
24+
with:
25+
python-version: '3.10-dev'
26+
27+
- name: Install dependencies
28+
run: |
29+
python -m pip install --upgrade pip setuptools wheel
30+
pip install git+https://github.com/numpy/numpy.git
31+
pip install git+https://github.com/pytest-dev/pytest.git
32+
pip install git+https://github.com/nedbat/coveragepy.git
33+
pip install cython python-dateutil pytz hypothesis pytest-xdist
34+
pip list
35+
36+
- name: Build Pandas
37+
run: |
38+
python setup.py build_ext -q -j2
39+
python -m pip install -e . --no-build-isolation --no-use-pep517
40+
41+
- name: Build Version
42+
run: |
43+
python -c "import pandas; pandas.show_versions();"
44+
45+
- name: Test with pytest
46+
run: |
47+
coverage run -m pytest -m 'not slow and not network and not clipboard' pandas
48+
continue-on-error: true
49+
50+
- name: Publish test results
51+
uses: actions/upload-artifact@master
52+
with:
53+
name: Test results
54+
path: test-data.xml
55+
if: failure()
56+
57+
- name: Print skipped tests
58+
run: |
59+
python ci/print_skipped.py
60+
61+
- name: Report Coverage
62+
run: |
63+
coverage report -m
64+
65+
- name: Upload coverage to Codecov
66+
uses: codecov/codecov-action@v1
67+
with:
68+
flags: unittests
69+
name: codecov-pandas
70+
fail_ci_if_error: true

ci/code_checks.sh

+28-78
Original file line numberDiff line numberDiff line change
@@ -106,84 +106,34 @@ fi
106106
### DOCTESTS ###
107107
if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
108108

109-
# Individual files
110-
111-
MSG='Doctests accessor.py' ; echo $MSG
112-
pytest -q --doctest-modules pandas/core/accessor.py
113-
RET=$(($RET + $?)) ; echo $MSG "DONE"
114-
115-
MSG='Doctests aggregation.py' ; echo $MSG
116-
pytest -q --doctest-modules pandas/core/aggregation.py
117-
RET=$(($RET + $?)) ; echo $MSG "DONE"
118-
119-
MSG='Doctests base.py' ; echo $MSG
120-
pytest -q --doctest-modules pandas/core/base.py
121-
RET=$(($RET + $?)) ; echo $MSG "DONE"
122-
123-
MSG='Doctests construction.py' ; echo $MSG
124-
pytest -q --doctest-modules pandas/core/construction.py
125-
RET=$(($RET + $?)) ; echo $MSG "DONE"
126-
127-
MSG='Doctests frame.py' ; echo $MSG
128-
pytest -q --doctest-modules pandas/core/frame.py
129-
RET=$(($RET + $?)) ; echo $MSG "DONE"
130-
131-
MSG='Doctests generic.py' ; echo $MSG
132-
pytest -q --doctest-modules pandas/core/generic.py
133-
RET=$(($RET + $?)) ; echo $MSG "DONE"
134-
135-
MSG='Doctests series.py' ; echo $MSG
136-
pytest -q --doctest-modules pandas/core/series.py
137-
RET=$(($RET + $?)) ; echo $MSG "DONE"
138-
139-
MSG='Doctests strings.py' ; echo $MSG
140-
pytest -q --doctest-modules pandas/core/strings/
141-
RET=$(($RET + $?)) ; echo $MSG "DONE"
142-
143-
MSG='Doctests sql.py' ; echo $MSG
144-
pytest -q --doctest-modules pandas/io/sql.py
145-
RET=$(($RET + $?)) ; echo $MSG "DONE"
146-
147-
# Directories
148-
149-
MSG='Doctests arrays'; echo $MSG
150-
pytest -q --doctest-modules pandas/core/arrays/
151-
RET=$(($RET + $?)) ; echo $MSG "DONE"
152-
153-
MSG='Doctests computation' ; echo $MSG
154-
pytest -q --doctest-modules pandas/core/computation/
155-
RET=$(($RET + $?)) ; echo $MSG "DONE"
156-
157-
MSG='Doctests dtypes'; echo $MSG
158-
pytest -q --doctest-modules pandas/core/dtypes/
159-
RET=$(($RET + $?)) ; echo $MSG "DONE"
160-
161-
MSG='Doctests groupby' ; echo $MSG
162-
pytest -q --doctest-modules pandas/core/groupby/
163-
RET=$(($RET + $?)) ; echo $MSG "DONE"
164-
165-
MSG='Doctests indexes' ; echo $MSG
166-
pytest -q --doctest-modules pandas/core/indexes/
167-
RET=$(($RET + $?)) ; echo $MSG "DONE"
168-
169-
MSG='Doctests ops' ; echo $MSG
170-
pytest -q --doctest-modules pandas/core/ops/
171-
RET=$(($RET + $?)) ; echo $MSG "DONE"
172-
173-
MSG='Doctests reshape' ; echo $MSG
174-
pytest -q --doctest-modules pandas/core/reshape/
175-
RET=$(($RET + $?)) ; echo $MSG "DONE"
176-
177-
MSG='Doctests tools' ; echo $MSG
178-
pytest -q --doctest-modules pandas/core/tools/
179-
RET=$(($RET + $?)) ; echo $MSG "DONE"
180-
181-
MSG='Doctests window' ; echo $MSG
182-
pytest -q --doctest-modules pandas/core/window/
183-
RET=$(($RET + $?)) ; echo $MSG "DONE"
184-
185-
MSG='Doctests tseries' ; echo $MSG
186-
pytest -q --doctest-modules pandas/tseries/
109+
MSG='Doctests for individual files' ; echo $MSG
110+
pytest -q --doctest-modules \
111+
pandas/core/accessor.py \
112+
pandas/core/aggregation.py \
113+
pandas/core/algorithms.py \
114+
pandas/core/base.py \
115+
pandas/core/construction.py \
116+
pandas/core/frame.py \
117+
pandas/core/generic.py \
118+
pandas/core/indexers.py \
119+
pandas/core/nanops.py \
120+
pandas/core/series.py \
121+
pandas/io/sql.py
122+
RET=$(($RET + $?)) ; echo $MSG "DONE"
123+
124+
MSG='Doctests for directories' ; echo $MSG
125+
pytest -q --doctest-modules \
126+
pandas/core/arrays/ \
127+
pandas/core/computation/ \
128+
pandas/core/dtypes/ \
129+
pandas/core/groupby/ \
130+
pandas/core/indexes/ \
131+
pandas/core/ops/ \
132+
pandas/core/reshape/ \
133+
pandas/core/strings/ \
134+
pandas/core/tools/ \
135+
pandas/core/window/ \
136+
pandas/tseries/
187137
RET=$(($RET + $?)) ; echo $MSG "DONE"
188138

189139
fi

doc/source/development/contributing.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ version control to allow many people to work together on the project.
110110
Some great resources for learning Git:
111111

112112
* the `GitHub help pages <https://help.github.com/>`_.
113-
* the `NumPy's documentation <https://numpy.org/doc/stable/dev/index.html>`_.
114-
* Matthew Brett's `Pydagogue <https://matthew-brett.github.com/pydagogue/>`_.
113+
* the `NumPy documentation <https://numpy.org/doc/stable/dev/index.html>`_.
114+
* Matthew Brett's `Pydagogue <https://matthew-brett.github.io/pydagogue/>`_.
115115

116116
Getting started with Git
117117
------------------------

doc/source/whatsnew/v1.3.0.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ Numeric
696696
- Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)
697697
- Bug in :meth:`DataFrameGroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`)
698698
- Bug in :meth:`Series.count` would result in an ``int32`` result on 32-bit platforms when argument ``level=None`` (:issue:`40908`)
699+
- Bug in :meth:`Series.clip` would fail if series contains NA values and has nullable int or float as a data type (:issue:`40851`)
699700

700701
Conversion
701702
^^^^^^^^^^
@@ -798,6 +799,7 @@ I/O
798799
- Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
799800
- Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
800801
- Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
802+
- Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
801803

802804
Period
803805
^^^^^^
@@ -850,6 +852,8 @@ Groupby/resample/rolling
850852
- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`)
851853
- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
852854
- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
855+
- Bug in :meth:`SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`)
856+
- Bug in :meth:`DataFrameGroupBy.min` and :meth:`DataFrameGroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising ``ValueError`` (:issue:41111`)
853857

854858
Reshaping
855859
^^^^^^^^^
@@ -877,7 +881,7 @@ Sparse
877881

878882
- Bug in :meth:`DataFrame.sparse.to_coo` raising ``KeyError`` with columns that are a numeric :class:`Index` without a 0 (:issue:`18414`)
879883
- Bug in :meth:`SparseArray.astype` with ``copy=False`` producing incorrect results when going from integer dtype to floating dtype (:issue:`34456`)
880-
-
884+
- Implemented :meth:`SparseArray.max` and :meth:`SparseArray.min` (:issue:`40921`)
881885

882886
ExtensionArray
883887
^^^^^^^^^^^^^^

pandas/_libs/lib.pyi

+51-7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ from typing import (
55
Any,
66
Callable,
77
Generator,
8+
Literal,
9+
overload,
810
)
911

1012
import numpy as np
@@ -51,23 +53,65 @@ def is_float_array(values: np.ndarray, skipna: bool = False): ...
5153
def is_integer_array(values: np.ndarray, skipna: bool = False): ...
5254
def is_bool_array(values: np.ndarray, skipna: bool = False): ...
5355

54-
def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
56+
def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> np.ndarray: ...
5557

5658
def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ...
5759
def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
5860
def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...
5961

6062
def map_infer(
6163
arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False
64+
) -> np.ndarray: ...
65+
66+
67+
@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
68+
def maybe_convert_objects(
69+
objects: np.ndarray, # np.ndarray[object]
70+
try_float: bool = ...,
71+
safe: bool = ...,
72+
convert_datetime: Literal[False] = ...,
73+
convert_timedelta: bool = ...,
74+
convert_to_nullable_integer: Literal[False] = ...,
75+
) -> np.ndarray: ...
76+
77+
@overload
78+
def maybe_convert_objects(
79+
objects: np.ndarray, # np.ndarray[object]
80+
try_float: bool = ...,
81+
safe: bool = ...,
82+
convert_datetime: Literal[False] = False,
83+
convert_timedelta: bool = ...,
84+
convert_to_nullable_integer: Literal[True] = ...,
6285
) -> ArrayLike: ...
6386

87+
@overload
6488
def maybe_convert_objects(
6589
objects: np.ndarray, # np.ndarray[object]
66-
try_float: bool = False,
67-
safe: bool = False,
68-
convert_datetime: bool = False,
69-
convert_timedelta: bool = False,
70-
convert_to_nullable_integer: bool = False,
90+
try_float: bool = ...,
91+
safe: bool = ...,
92+
convert_datetime: Literal[True] = ...,
93+
convert_timedelta: bool = ...,
94+
convert_to_nullable_integer: Literal[False] = ...,
95+
) -> ArrayLike: ...
96+
97+
@overload
98+
def maybe_convert_objects(
99+
objects: np.ndarray, # np.ndarray[object]
100+
try_float: bool = ...,
101+
safe: bool = ...,
102+
convert_datetime: Literal[True] = ...,
103+
convert_timedelta: bool = ...,
104+
convert_to_nullable_integer: Literal[True] = ...,
105+
) -> ArrayLike: ...
106+
107+
@overload
108+
def maybe_convert_objects(
109+
objects: np.ndarray, # np.ndarray[object]
110+
try_float: bool = ...,
111+
safe: bool = ...,
112+
convert_datetime: bool = ...,
113+
convert_timedelta: bool = ...,
114+
convert_to_nullable_integer: bool = ...,
71115
) -> ArrayLike: ...
72116

73117
def maybe_convert_numeric(
@@ -140,7 +184,7 @@ def map_infer_mask(
140184
convert: bool = ...,
141185
na_value: Any = ...,
142186
dtype: np.dtype = ...,
143-
) -> ArrayLike: ...
187+
) -> np.ndarray: ...
144188

145189
def indices_fast(
146190
index: np.ndarray, # ndarray[intp_t]

pandas/_libs/lib.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -2488,7 +2488,7 @@ no_default = NoDefault.no_default # Sentinel indicating the default value.
24882488
@cython.wraparound(False)
24892489
def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
24902490
object na_value=no_default, cnp.dtype dtype=np.dtype(object)
2491-
) -> "ArrayLike":
2491+
) -> np.ndarray:
24922492
"""
24932493
Substitute for np.vectorize with pandas-friendly dtype inference.
24942494

@@ -2508,7 +2508,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
25082508

25092509
Returns
25102510
-------
2511-
np.ndarray or ExtensionArray
2511+
np.ndarray
25122512
"""
25132513
cdef:
25142514
Py_ssize_t i, n
@@ -2545,7 +2545,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
25452545
@cython.wraparound(False)
25462546
def map_infer(
25472547
ndarray arr, object f, bint convert=True, bint ignore_na=False
2548-
) -> "ArrayLike":
2548+
) -> np.ndarray:
25492549
"""
25502550
Substitute for np.vectorize with pandas-friendly dtype inference.
25512551

@@ -2559,7 +2559,7 @@ def map_infer(
25592559

25602560
Returns
25612561
-------
2562-
np.ndarray or ExtensionArray
2562+
np.ndarray
25632563
"""
25642564
cdef:
25652565
Py_ssize_t i, n
@@ -2697,7 +2697,7 @@ def to_object_array_tuples(rows: object) -> np.ndarray:
26972697

26982698
@cython.wraparound(False)
26992699
@cython.boundscheck(False)
2700-
def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> "ArrayLike":
2700+
def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray:
27012701
cdef:
27022702
Py_ssize_t i, n = len(keys)
27032703
object val

pandas/_libs/parsers.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ class TextReader:
5858
true_values=...,
5959
false_values=...,
6060
allow_leading_cols: bool = ...,
61-
low_memory: bool = ...,
6261
skiprows=...,
6362
skipfooter: int = ..., # int64_t
6463
verbose: bool = ...,
@@ -75,3 +74,4 @@ class TextReader:
7574
def close(self) -> None: ...
7675

7776
def read(self, rows: int | None = ...) -> dict[int, ArrayLike]: ...
77+
def read_low_memory(self, rows: int | None) -> list[dict[int, ArrayLike]]: ...

0 commit comments

Comments
 (0)