Skip to content

Commit fc40c52

Browse files
authored
Add static type checking via Mypy (#6381)
Adds static type checking to cuDF Python via MyPy. * An additional `mypy` style check is enabled in CI * `mypy` is run as part of the pre-commit hook * Many parts of the cuDF internal code now have type annotations * Any new internal code is expected to be written with type annotations (not public-facing APIs) Authors: - Ashwin Srinath (@shwina) Approvers: - Dillon Cullinan (@dillon-cullinan) - Keith Kraus (@kkraus14) - Christopher Harris (@cwharris) URL: #6381
1 parent d19cb40 commit fc40c52

37 files changed

+1650
-889
lines changed

.pre-commit-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ repos:
3232
language: system
3333
files: \.(cu|cuh|h|hpp|cpp|inl)$
3434
args: ['-fallback-style=none']
35+
- repo: local
36+
hooks:
37+
- id: mypy
38+
name: mypy
39+
description: mypy
40+
pass_filenames: false
41+
entry: mypy --config-file=python/cudf/setup.cfg python/cudf/cudf
42+
language: system
43+
types: [python]
3544

3645
default_language_version:
3746
python: python3

ci/checks/style.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ FLAKE_RETVAL=$?
2929
FLAKE_CYTHON=`flake8 --config=python/.flake8.cython`
3030
FLAKE_CYTHON_RETVAL=$?
3131

32+
# Run mypy and get results/return code
33+
MYPY_CUDF=`mypy --config=python/cudf/setup.cfg python/cudf/cudf`
34+
MYPY_CUDF_RETVAL=$?
35+
3236
# Run clang-format and check for a consistent code format
3337
CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1`
3438
CLANG_FORMAT_RETVAL=$?
@@ -66,6 +70,14 @@ else
6670
echo -e "\n\n>>>> PASSED: flake8-cython style check\n\n"
6771
fi
6872

73+
if [ "$MYPY_CUDF_RETVAL" != "0" ]; then
74+
echo -e "\n\n>>>> FAILED: mypy style check; begin output\n\n"
75+
echo -e "$MYPY_CUDF"
76+
echo -e "\n\n>>>> FAILED: mypy style check; end output\n\n"
77+
else
78+
echo -e "\n\n>>>> PASSED: mypy style check\n\n"
79+
fi
80+
6981
if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then
7082
echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n"
7183
echo -e "$CLANG_FORMAT"
@@ -79,7 +91,7 @@ HEADER_META=`ci/checks/headers_test.sh`
7991
HEADER_META_RETVAL=$?
8092
echo -e "$HEADER_META"
8193

82-
RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL)
94+
RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL)
8395
IFS=$'\n'
8496
RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1`
8597

conda/environments/cudf_dev_cuda10.1.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ dependencies:
4040
- flake8=3.8.3
4141
- black=19.10
4242
- isort=5.0.7
43+
- mypy=0.782
44+
- typing_extensions
4345
- pre_commit
4446
- dask>=2.22.0
4547
- distributed>=2.22.0

conda/environments/cudf_dev_cuda10.2.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ dependencies:
4040
- flake8=3.8.3
4141
- black=19.10
4242
- isort=5.0.7
43+
- mypy=0.782
44+
- typing_extensions
4345
- pre_commit
4446
- dask>=2.22.0
4547
- distributed>=2.22.0

conda/environments/cudf_dev_cuda11.0.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ dependencies:
4040
- flake8=3.8.3
4141
- black=19.10
4242
- isort=5.0.7
43+
- mypy=0.782
44+
- typing_extensions
4345
- pre_commit
4446
- dask>=2.22.0
4547
- distributed>=2.22.0

conda/recipes/cudf/meta.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ requirements:
3434
run:
3535
- protobuf
3636
- python
37+
- typing_extensions
3738
- pandas >=1.0,<1.2.0dev0
3839
- cupy >7.1.0,<9.0.0a0
3940
- numba >=0.49.0

python/cudf/cudf/_lib/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@
1010
datetime,
1111
filling,
1212
gpuarrow,
13+
groupby,
1314
hash,
1415
interop,
1516
join,
17+
json,
1618
merge,
1719
null_mask,
1820
nvtext,
1921
orc,
22+
parquet,
2023
partitioning,
2124
quantiles,
2225
reduce,
@@ -27,6 +30,7 @@
2730
search,
2831
sort,
2932
stream_compaction,
33+
string_casting,
3034
strings,
3135
table,
3236
transpose,

python/cudf/cudf/_lib/column.pyi

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Copyright (c) 2021, NVIDIA CORPORATION.
2+
3+
from __future__ import annotations
4+
from typing import Tuple, Union, TypeVar, Optional
5+
6+
from cudf._typing import DtypeObj, Dtype, ScalarLike
7+
from cudf.core.buffer import Buffer
8+
from cudf.core.column import ColumnBase
9+
10+
11+
T = TypeVar("T")
12+
13+
class Column:
14+
_data: Optional[Buffer]
15+
_mask: Optional[Buffer]
16+
_base_data: Optional[Buffer]
17+
_base_mask: Optional[Buffer]
18+
_dtype: DtypeObj
19+
_offset: int
20+
_null_count: int
21+
_children: Tuple[ColumnBase, ...]
22+
_base_children: Tuple[ColumnBase, ...]
23+
24+
def __init__(
25+
self,
26+
data: Optional[Buffer],
27+
dtype: Dtype,
28+
size: int = None,
29+
mask: Optional[Buffer] = None,
30+
offset: int = None,
31+
null_count: int = None,
32+
children: Tuple[ColumnBase, ...] = (),
33+
) -> None:
34+
...
35+
36+
@property
37+
def base_size(self) -> int:
38+
...
39+
40+
@property
41+
def dtype(self) -> DtypeObj:
42+
...
43+
44+
@property
45+
def size(self) -> int:
46+
...
47+
48+
@property
49+
def base_data(self) -> Optional[Buffer]:
50+
...
51+
52+
@property
53+
def base_data_ptr(self) -> int:
54+
...
55+
56+
@property
57+
def data(self) -> Optional[Buffer]:
58+
...
59+
60+
@property
61+
def data_ptr(self) -> int:
62+
...
63+
64+
def set_base_data(self, value: Buffer) -> None:
65+
...
66+
67+
@property
68+
def nullable(self) -> bool:
69+
...
70+
71+
@property
72+
def has_nulls(self) -> bool:
73+
...
74+
75+
@property
76+
def base_mask(self) -> Optional[Buffer]:
77+
...
78+
79+
@property
80+
def base_mask_ptr(self) -> int:
81+
...
82+
83+
@property
84+
def mask(self) -> Optional[Buffer]:
85+
...
86+
87+
@property
88+
def mask_ptr(self) -> int:
89+
...
90+
91+
def set_base_mask(self, value: Optional[Buffer]) -> None:
92+
...
93+
94+
def set_mask(self: T, value: Optional[Buffer]) -> T:
95+
...
96+
97+
@property
98+
def null_count(self) -> int:
99+
...
100+
101+
@property
102+
def offset(self) -> int:
103+
...
104+
105+
@property
106+
def base_children(self) -> Tuple[ColumnBase, ...]:
107+
...
108+
109+
@property
110+
def children(self) -> Tuple[ColumnBase, ...]:
111+
...
112+
113+
def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None:
114+
...
115+
116+
def _mimic_inplace(self, other_col: ColumnBase, inplace=False) -> Optional[ColumnBase]:
117+
...
118+
119+
@staticmethod
120+
def from_scalar(
121+
val: ScalarLike,
122+
size: int
123+
) -> ColumnBase: # TODO: This should be Scalar, not ScalarLike
124+
...

python/cudf/cudf/_lib/column.pyx

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,14 @@ cdef class Column:
6060
The *dtype* indicates the Column's element type.
6161
"""
6262
def __init__(
63-
self,
64-
object data,
65-
int size,
66-
object dtype,
67-
object mask=None,
68-
int offset=0,
69-
object null_count=None,
70-
object children=()
63+
self,
64+
object data,
65+
int size,
66+
object dtype,
67+
object mask=None,
68+
int offset=0,
69+
object null_count=None,
70+
object children=()
7171
):
7272

7373
self._size = size
@@ -247,10 +247,10 @@ cdef class Column:
247247
)
248248

249249
return cudf.core.column.build_column(
250-
self.data,
251-
self.dtype,
252-
mask,
253-
self.size,
250+
data=self.data,
251+
dtype=self.dtype,
252+
mask=mask,
253+
size=self.size,
254254
offset=0,
255255
children=self.children
256256
)
@@ -561,25 +561,22 @@ cdef class Column:
561561
children = tuple(children)
562562

563563
result = cudf.core.column.build_column(
564-
data,
565-
dtype,
566-
mask,
567-
size,
568-
offset,
569-
null_count,
570-
tuple(children)
564+
data=data,
565+
dtype=dtype,
566+
mask=mask,
567+
size=size,
568+
offset=offset,
569+
null_count=null_count,
570+
children=tuple(children)
571571
)
572572

573573
return result
574574

575-
576-
def make_column_from_scalar(object py_val, size_type size):
577-
578-
cdef DeviceScalar val = py_val.device_value
579-
580-
cdef const scalar* c_val = val.get_raw_ptr()
581-
cdef unique_ptr[column] c_result
582-
with nogil:
583-
c_result = move(cpp_make_column_from_scalar(c_val[0], size))
584-
585-
return Column.from_unique_ptr(move(c_result))
575+
@staticmethod
576+
def from_scalar(py_val, size_type size):
577+
cdef DeviceScalar val = py_val.device_value
578+
cdef const scalar* c_val = val.get_raw_ptr()
579+
cdef unique_ptr[column] c_result
580+
with nogil:
581+
c_result = move(cpp_make_column_from_scalar(c_val[0], size))
582+
return Column.from_unique_ptr(move(c_result))

python/cudf/cudf/_lib/table.pyi

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2021, NVIDIA CORPORATION.
2+
3+
from typing import List, Any, Optional, TYPE_CHECKING
4+
5+
import cudf
6+
7+
class Table(object):
8+
_data: cudf.core.column_accessor.ColumnAccessor
9+
_index: Optional[cudf.core.index.Index]
10+
11+
def __init__(self, data: object = None, index: object = None) -> None: ...
12+
13+
@property
14+
def _num_columns(self) -> int: ...
15+
16+
@property
17+
def _num_indices(self) -> int: ...
18+
19+
@property
20+
def _num_rows(self) -> int: ...
21+
22+
@property
23+
def _column_names(self) -> List[Any]: ...
24+
25+
@property
26+
def _index_names(self) -> List[Any]: ...
27+
28+
@property
29+
def _columns(self) -> List[Any]: ... # TODO: actually, a list of columns

python/cudf/cudf/_typing.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright (c) 2021, NVIDIA CORPORATION.
2+
3+
from typing import TYPE_CHECKING, Any, TypeVar, Union
4+
5+
import numpy as np
6+
from pandas import Period, Timedelta, Timestamp
7+
from pandas.api.extensions import ExtensionDtype
8+
9+
if TYPE_CHECKING:
10+
import cudf
11+
12+
# Many of these are from
13+
# https://github.com/pandas-dev/pandas/blob/master/pandas/_typing.py
14+
15+
Dtype = Union["ExtensionDtype", str, np.dtype]
16+
DtypeObj = Union["ExtensionDtype", np.dtype]
17+
18+
# scalars
19+
DatetimeLikeScalar = TypeVar(
20+
"DatetimeLikeScalar", Period, Timestamp, Timedelta
21+
)
22+
ScalarLike = Any
23+
24+
# columns
25+
ColumnLike = Any
26+
27+
# binary operation
28+
BinaryOperand = Union["cudf.Scalar", "cudf.core.column.ColumnBase"]

python/cudf/cudf/core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
22

3-
from cudf.core import buffer, column, common
3+
from cudf.core import buffer, column, column_accessor, common
44
from cudf.core.buffer import Buffer
55
from cudf.core.dataframe import DataFrame, from_pandas, merge
66
from cudf.core.index import (

python/cudf/cudf/core/abc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
try:
1313
import pickle5 as pickle
1414
except ImportError:
15-
import pickle
15+
import pickle # type: ignore
1616
else:
17-
import pickle
17+
import pickle # type: ignore
1818

1919

2020
class Serializable(abc.ABC):

0 commit comments

Comments
 (0)