Skip to content

Commit fbd76aa

Browse files
committed
Merge remote-tracking branch 'upstream/main' into arrow/docs
2 parents c63656e + f98696a commit fbd76aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+480
-330
lines changed

.github/workflows/codeql.yml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: CodeQL
2+
on:
3+
schedule:
4+
# every day at midnight
5+
- cron: "0 0 * * *"
6+
7+
concurrency:
8+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
9+
cancel-in-progress: true
10+
11+
jobs:
12+
analyze:
13+
runs-on: ubuntu-latest
14+
permissions:
15+
actions: read
16+
contents: read
17+
security-events: write
18+
19+
strategy:
20+
fail-fast: false
21+
matrix:
22+
language:
23+
- python
24+
25+
steps:
26+
- uses: actions/checkout@v3
27+
- uses: github/codeql-action/init@v2
28+
with:
29+
languages: ${{ matrix.language }}
30+
- uses: github/codeql-action/autobuild@v2
31+
- uses: github/codeql-action/analyze@v2

.pre-commit-config.yaml

+13-3
Original file line numberDiff line numberDiff line change
@@ -85,32 +85,42 @@ repos:
8585
- repo: local
8686
hooks:
8787
- id: pyright
88+
# note: assumes python env is setup and activated
8889
name: pyright
8990
entry: pyright
90-
# note: assumes python env is setup and activated
9191
language: node
9292
pass_filenames: false
9393
types: [python]
9494
stages: [manual]
9595
additional_dependencies: &pyright_dependencies
9696
9797
- id: pyright_reportGeneralTypeIssues
98+
# note: assumes python env is setup and activated
9899
name: pyright reportGeneralTypeIssues
99100
entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json
100-
# note: assumes python env is setup and activated
101101
language: node
102102
pass_filenames: false
103103
types: [python]
104104
stages: [manual]
105105
additional_dependencies: *pyright_dependencies
106106
- id: mypy
107+
# note: assumes python env is setup and activated
107108
name: mypy
108109
entry: mypy
109-
# note: assumes python env is setup and activated
110110
language: system
111111
pass_filenames: false
112112
types: [python]
113113
stages: [manual]
114+
- id: stubtest
115+
# note: assumes python env is setup and activated
116+
# note: requires pandas dev to be installed
117+
name: mypy (stubtest)
118+
entry: python
119+
language: system
120+
pass_filenames: false
121+
types: [pyi]
122+
args: [scripts/run_stubtest.py]
123+
stages: [manual]
114124
- id: flake8-rst
115125
name: flake8-rst
116126
description: Run flake8 on code snippets in docstrings or RST files

ci/code_checks.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ fi
7878
### DOCSTRINGS ###
7979
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8080

81-
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05)' ; echo $MSG
82-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05
81+
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
82+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8383
RET=$(($RET + $?)) ; echo $MSG "DONE"
8484

8585
fi

doc/source/reference/general_functions.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,4 @@ Importing from other DataFrame libraries
8585
.. autosummary::
8686
:toctree: api/
8787

88-
api.exchange.from_dataframe
88+
api.interchange.from_dataframe

pandas/_config/config.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,9 @@ class RegisteredOption(NamedTuple):
102102

103103
class OptionError(AttributeError, KeyError):
104104
"""
105-
Exception for pandas.options, backwards compatible with KeyError
106-
checks.
105+
Exception raised for pandas.options.
106+
107+
Backwards compatible with KeyError checks.
107108
"""
108109

109110

pandas/_libs/groupby.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def group_any_all(
5050
val_test: Literal["any", "all"],
5151
skipna: bool,
5252
) -> None: ...
53-
def group_add(
53+
def group_sum(
5454
out: np.ndarray, # complexfloating_t[:, ::1]
5555
counts: np.ndarray, # int64_t[::1]
5656
values: np.ndarray, # ndarray[complexfloating_t, ndim=2]

pandas/_libs/groupby.pyx

+15-15
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def group_median_float64(
124124
ndarray[intp_t] indexer
125125
float64_t* ptr
126126

127-
assert min_count == -1, "'min_count' only used in add and prod"
127+
assert min_count == -1, "'min_count' only used in sum and prod"
128128

129129
ngroups = len(counts)
130130
N, K = (<object>values).shape
@@ -502,7 +502,7 @@ def group_any_all(
502502

503503

504504
# ----------------------------------------------------------------------
505-
# group_add, group_prod, group_var, group_mean, group_ohlc
505+
# group_sum, group_prod, group_var, group_mean, group_ohlc
506506
# ----------------------------------------------------------------------
507507

508508
ctypedef fused mean_t:
@@ -511,17 +511,17 @@ ctypedef fused mean_t:
511511
complex64_t
512512
complex128_t
513513

514-
ctypedef fused add_t:
514+
ctypedef fused sum_t:
515515
mean_t
516516
object
517517

518518

519519
@cython.wraparound(False)
520520
@cython.boundscheck(False)
521-
def group_add(
522-
add_t[:, ::1] out,
521+
def group_sum(
522+
sum_t[:, ::1] out,
523523
int64_t[::1] counts,
524-
ndarray[add_t, ndim=2] values,
524+
ndarray[sum_t, ndim=2] values,
525525
const intp_t[::1] labels,
526526
Py_ssize_t min_count=0,
527527
bint is_datetimelike=False,
@@ -531,8 +531,8 @@ def group_add(
531531
"""
532532
cdef:
533533
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
534-
add_t val, t, y
535-
add_t[:, ::1] sumx, compensation
534+
sum_t val, t, y
535+
sum_t[:, ::1] sumx, compensation
536536
int64_t[:, ::1] nobs
537537
Py_ssize_t len_values = len(values), len_labels = len(labels)
538538

@@ -546,7 +546,7 @@ def group_add(
546546

547547
N, K = (<object>values).shape
548548

549-
if add_t is object:
549+
if sum_t is object:
550550
# NB: this does not use 'compensation' like the non-object track does.
551551
for i in range(N):
552552
lab = labels[i]
@@ -588,10 +588,10 @@ def group_add(
588588

589589
# not nan
590590
# With dt64/td64 values, values have been cast to float64
591-
# instead if int64 for group_add, but the logic
591+
# instead if int64 for group_sum, but the logic
592592
# is otherwise the same as in _treat_as_na
593593
if val == val and not (
594-
add_t is float64_t
594+
sum_t is float64_t
595595
and is_datetimelike
596596
and val == <float64_t>NPY_NAT
597597
):
@@ -677,7 +677,7 @@ def group_var(
677677
int64_t[:, ::1] nobs
678678
Py_ssize_t len_values = len(values), len_labels = len(labels)
679679

680-
assert min_count == -1, "'min_count' only used in add and prod"
680+
assert min_count == -1, "'min_count' only used in sum and prod"
681681

682682
if len_values != len_labels:
683683
raise ValueError("len(index) != len(labels)")
@@ -745,7 +745,7 @@ def group_mean(
745745
Array containing unique label for each group, with its
746746
ordering matching up to the corresponding record in `values`.
747747
min_count : Py_ssize_t
748-
Only used in add and prod. Always -1.
748+
Only used in sum and prod. Always -1.
749749
is_datetimelike : bool
750750
True if `values` contains datetime-like entries.
751751
mask : ndarray[bool, ndim=2], optional
@@ -766,7 +766,7 @@ def group_mean(
766766
int64_t[:, ::1] nobs
767767
Py_ssize_t len_values = len(values), len_labels = len(labels)
768768

769-
assert min_count == -1, "'min_count' only used in add and prod"
769+
assert min_count == -1, "'min_count' only used in sum and prod"
770770

771771
if len_values != len_labels:
772772
raise ValueError("len(index) != len(labels)")
@@ -821,7 +821,7 @@ def group_ohlc(
821821
Py_ssize_t i, j, N, K, lab
822822
floating val
823823

824-
assert min_count == -1, "'min_count' only used in add and prod"
824+
assert min_count == -1, "'min_count' only used in sum and prod"
825825

826826
if len(labels) == 0:
827827
return

pandas/_libs/hashtable.pyi

+14-14
Original file line numberDiff line numberDiff line change
@@ -39,72 +39,72 @@ class Int64Factorizer(Factorizer):
3939
) -> npt.NDArray[np.intp]: ...
4040

4141
class Int64Vector:
42-
def __init__(self): ...
42+
def __init__(self, *args): ...
4343
def __len__(self) -> int: ...
4444
def to_array(self) -> npt.NDArray[np.int64]: ...
4545

4646
class Int32Vector:
47-
def __init__(self): ...
47+
def __init__(self, *args): ...
4848
def __len__(self) -> int: ...
4949
def to_array(self) -> npt.NDArray[np.int32]: ...
5050

5151
class Int16Vector:
52-
def __init__(self): ...
52+
def __init__(self, *args): ...
5353
def __len__(self) -> int: ...
5454
def to_array(self) -> npt.NDArray[np.int16]: ...
5555

5656
class Int8Vector:
57-
def __init__(self): ...
57+
def __init__(self, *args): ...
5858
def __len__(self) -> int: ...
5959
def to_array(self) -> npt.NDArray[np.int8]: ...
6060

6161
class UInt64Vector:
62-
def __init__(self): ...
62+
def __init__(self, *args): ...
6363
def __len__(self) -> int: ...
6464
def to_array(self) -> npt.NDArray[np.uint64]: ...
6565

6666
class UInt32Vector:
67-
def __init__(self): ...
67+
def __init__(self, *args): ...
6868
def __len__(self) -> int: ...
6969
def to_array(self) -> npt.NDArray[np.uint32]: ...
7070

7171
class UInt16Vector:
72-
def __init__(self): ...
72+
def __init__(self, *args): ...
7373
def __len__(self) -> int: ...
7474
def to_array(self) -> npt.NDArray[np.uint16]: ...
7575

7676
class UInt8Vector:
77-
def __init__(self): ...
77+
def __init__(self, *args): ...
7878
def __len__(self) -> int: ...
7979
def to_array(self) -> npt.NDArray[np.uint8]: ...
8080

8181
class Float64Vector:
82-
def __init__(self): ...
82+
def __init__(self, *args): ...
8383
def __len__(self) -> int: ...
8484
def to_array(self) -> npt.NDArray[np.float64]: ...
8585

8686
class Float32Vector:
87-
def __init__(self): ...
87+
def __init__(self, *args): ...
8888
def __len__(self) -> int: ...
8989
def to_array(self) -> npt.NDArray[np.float32]: ...
9090

9191
class Complex128Vector:
92-
def __init__(self): ...
92+
def __init__(self, *args): ...
9393
def __len__(self) -> int: ...
9494
def to_array(self) -> npt.NDArray[np.complex128]: ...
9595

9696
class Complex64Vector:
97-
def __init__(self): ...
97+
def __init__(self, *args): ...
9898
def __len__(self) -> int: ...
9999
def to_array(self) -> npt.NDArray[np.complex64]: ...
100100

101101
class StringVector:
102-
def __init__(self): ...
102+
def __init__(self, *args): ...
103103
def __len__(self) -> int: ...
104104
def to_array(self) -> npt.NDArray[np.object_]: ...
105105

106106
class ObjectVector:
107-
def __init__(self): ...
107+
def __init__(self, *args): ...
108108
def __len__(self) -> int: ...
109109
def to_array(self) -> npt.NDArray[np.object_]: ...
110110

pandas/_libs/interval.pyx

+6-4
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,9 @@ cdef class Interval(IntervalMixin):
352352

353353
cdef readonly str inclusive
354354
"""
355-
Whether the interval is inclusive on the left-side, right-side, both or
356-
neither.
355+
String describing the inclusive side the intervals.
356+
357+
Either ``left``, ``right``, ``both`` or ``neither``.
357358
"""
358359

359360
def __init__(self, left, right, inclusive: str | None = None, closed: None | lib.NoDefault = lib.no_default):
@@ -384,10 +385,11 @@ cdef class Interval(IntervalMixin):
384385
@property
385386
def closed(self):
386387
"""
387-
Whether the interval is closed on the left-side, right-side, both or
388-
neither.
388+
String describing the inclusive side the intervals.
389389
390390
.. deprecated:: 1.5.0
391+
392+
Either ``left``, ``right``, ``both`` or ``neither``.
391393
"""
392394
warnings.warn(
393395
"Attribute `closed` is deprecated in favor of `inclusive`.",

pandas/_libs/lib.pyx

+1-2
Original file line numberDiff line numberDiff line change
@@ -1347,8 +1347,7 @@ cdef object _try_infer_map(object dtype):
13471347

13481348
def infer_dtype(value: object, skipna: bool = True) -> str:
13491349
"""
1350-
Efficiently infer the type of a passed val, or list-like
1351-
array of values. Return a string describing the type.
1350+
Return a string label of the type of a scalar or list-like of values.
13521351

13531352
Parameters
13541353
----------

0 commit comments

Comments
 (0)