Skip to content

Commit ef295e7

Browse files
jbrockmendelproost
authored andcommitted
CLN: Assorted cleanups (pandas-dev#29175)
1 parent 3b17f23 commit ef295e7

File tree

7 files changed

+37
-49
lines changed

7 files changed

+37
-49
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+22-18
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ from pandas._libs.tslibs.util cimport get_c_string
1313

1414
{{py:
1515

16-
# name, dtype, arg
16+
# name, dtype, c_type
1717
# the generated StringVector is not actually used
1818
# but is included for completeness (rather ObjectVector is used
1919
# for uniques in hashtables)
@@ -24,13 +24,13 @@ dtypes = [('Float64', 'float64', 'float64_t'),
2424
('UInt64', 'uint64', 'uint64_t')]
2525
}}
2626

27-
{{for name, dtype, arg in dtypes}}
27+
{{for name, dtype, c_type in dtypes}}
2828

2929

3030
{{if dtype != 'int64'}}
3131

3232
ctypedef struct {{name}}VectorData:
33-
{{arg}} *data
33+
{{c_type}} *data
3434
Py_ssize_t n, m
3535

3636
{{endif}}
@@ -39,7 +39,7 @@ ctypedef struct {{name}}VectorData:
3939
@cython.wraparound(False)
4040
@cython.boundscheck(False)
4141
cdef inline void append_data_{{dtype}}({{name}}VectorData *data,
42-
{{arg}} x) nogil:
42+
{{c_type}} x) nogil:
4343

4444
data.data[data.n] = x
4545
data.n += 1
@@ -61,14 +61,14 @@ cdef inline bint needs_resize(vector_data *data) nogil:
6161

6262
{{py:
6363

64-
# name, dtype, arg, idtype
65-
dtypes = [('Float64', 'float64', 'float64_t', 'np.float64'),
66-
('UInt64', 'uint64', 'uint64_t', 'np.uint64'),
67-
('Int64', 'int64', 'int64_t', 'np.int64')]
64+
# name, dtype, c_type
65+
dtypes = [('Float64', 'float64', 'float64_t'),
66+
('UInt64', 'uint64', 'uint64_t'),
67+
('Int64', 'int64', 'int64_t')]
6868

6969
}}
7070

71-
{{for name, dtype, arg, idtype in dtypes}}
71+
{{for name, dtype, c_type in dtypes}}
7272

7373
cdef class {{name}}Vector:
7474

@@ -87,13 +87,13 @@ cdef class {{name}}Vector:
8787
self.external_view_exists = False
8888
self.data.n = 0
8989
self.data.m = _INIT_VEC_CAP
90-
self.ao = np.empty(self.data.m, dtype={{idtype}})
91-
self.data.data = <{{arg}}*>self.ao.data
90+
self.ao = np.empty(self.data.m, dtype=np.{{dtype}})
91+
self.data.data = <{{c_type}}*>self.ao.data
9292

9393
cdef resize(self):
9494
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
9595
self.ao.resize(self.data.m, refcheck=False)
96-
self.data.data = <{{arg}}*>self.ao.data
96+
self.data.data = <{{c_type}}*>self.ao.data
9797

9898
def __dealloc__(self):
9999
if self.data is not NULL:
@@ -113,7 +113,7 @@ cdef class {{name}}Vector:
113113
self.external_view_exists = True
114114
return self.ao
115115

116-
cdef inline void append(self, {{arg}} x):
116+
cdef inline void append(self, {{c_type}} x):
117117

118118
if needs_resize(self.data):
119119
if self.external_view_exists:
@@ -123,7 +123,7 @@ cdef class {{name}}Vector:
123123

124124
append_data_{{dtype}}(self.data, x)
125125

126-
cdef extend(self, const {{arg}}[:] x):
126+
cdef extend(self, const {{c_type}}[:] x):
127127
for i in range(len(x)):
128128
self.append(x[i])
129129

@@ -279,7 +279,8 @@ cdef class {{name}}HashTable(HashTable):
279279
self.table = NULL
280280

281281
def __contains__(self, object key):
282-
cdef khiter_t k
282+
cdef:
283+
khiter_t k
283284
k = kh_get_{{dtype}}(self.table, key)
284285
return k != self.table.n_buckets
285286

@@ -290,7 +291,8 @@ cdef class {{name}}HashTable(HashTable):
290291
sizeof(uint32_t)) # flags
291292

292293
cpdef get_item(self, {{dtype}}_t val):
293-
cdef khiter_t k
294+
cdef:
295+
khiter_t k
294296
k = kh_get_{{dtype}}(self.table, val)
295297
if k != self.table.n_buckets:
296298
return self.table.vals[k]
@@ -899,7 +901,8 @@ cdef class PyObjectHashTable(HashTable):
899901
return self.table.size
900902

901903
def __contains__(self, object key):
902-
cdef khiter_t k
904+
cdef:
905+
khiter_t k
903906
hash(key)
904907

905908
k = kh_get_pymap(self.table, <PyObject*>key)
@@ -912,7 +915,8 @@ cdef class PyObjectHashTable(HashTable):
912915
sizeof(uint32_t)) # flags
913916

914917
cpdef get_item(self, object val):
915-
cdef khiter_t k
918+
cdef:
919+
khiter_t k
916920

917921
k = kh_get_pymap(self.table, <PyObject*>val)
918922
if k != self.table.n_buckets:

pandas/_libs/hashtable_func_helper.pxi.in

+8-21
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,17 @@ Template for each `dtype` helper function for hashtable
44
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
55
"""
66

7-
# ----------------------------------------------------------------------
8-
# VectorData
9-
# ----------------------------------------------------------------------
10-
117
{{py:
128

13-
# dtype, ttype
9+
# dtype, ttype, c_type
1410
dtypes = [('float64', 'float64', 'float64_t'),
1511
('uint64', 'uint64', 'uint64_t'),
1612
('object', 'pymap', 'object'),
1713
('int64', 'int64', 'int64_t')]
1814

1915
}}
2016

21-
{{for dtype, ttype, scalar in dtypes}}
17+
{{for dtype, ttype, c_type in dtypes}}
2218

2319

2420
@cython.wraparound(False)
@@ -34,7 +30,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
3430
khiter_t k
3531
Py_ssize_t i, n = len(values)
3632

37-
{{scalar}} val
33+
{{c_type}} val
3834

3935
int ret = 0
4036

@@ -77,7 +73,7 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
7773
{{if dtype == 'object'}}
7874
cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
7975
{{else}}
80-
cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
76+
cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna):
8177
{{endif}}
8278
cdef:
8379
Py_ssize_t i = 0
@@ -127,13 +123,9 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
127123
@cython.wraparound(False)
128124
@cython.boundscheck(False)
129125
{{if dtype == 'object'}}
130-
131-
132126
def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
133127
{{else}}
134-
135-
136-
def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
128+
def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'):
137129
{{endif}}
138130
cdef:
139131
int ret = 0
@@ -212,15 +204,10 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
212204
@cython.wraparound(False)
213205
@cython.boundscheck(False)
214206
{{if dtype == 'object'}}
215-
216-
217-
def ismember_{{dtype}}(ndarray[{{scalar}}] arr, ndarray[{{scalar}}] values):
207+
def ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values):
218208
{{else}}
219-
220-
221-
def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
209+
def ismember_{{dtype}}({{c_type}}[:] arr, {{c_type}}[:] values):
222210
{{endif}}
223-
224211
"""
225212
Return boolean of values in arr on an
226213
element by-element basis
@@ -238,7 +225,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
238225
Py_ssize_t i, n, k
239226
int ret = 0
240227
ndarray[uint8_t] result
241-
{{scalar}} val
228+
{{c_type}} val
242229
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
243230

244231
# construct the table

pandas/_libs/internals.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
284284
return start, stop, step, length
285285

286286

287-
def slice_getitem(slice slc not None, ind):
287+
cdef slice_getitem(slice slc, ind):
288288
cdef:
289289
Py_ssize_t s_start, s_stop, s_step, s_len
290290
Py_ssize_t ind_start, ind_stop, ind_step, ind_len

pandas/_libs/interval.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ cnp.import_array()
1818

1919
cimport pandas._libs.util as util
2020

21-
from pandas._libs.hashtable cimport Int64Vector, Int64VectorData
21+
from pandas._libs.hashtable cimport Int64Vector
2222
from pandas._libs.tslibs.util cimport is_integer_object, is_float_object
2323

2424
from pandas._libs.tslibs import Timestamp

pandas/_libs/lib.pyx

+1-4
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,9 @@ import warnings
99
import cython
1010
from cython import Py_ssize_t
1111

12-
from cpython.list cimport PyList_New
13-
from cpython.object cimport (PyObject_Str, PyObject_RichCompareBool, Py_EQ,
14-
Py_SIZE)
12+
from cpython.object cimport PyObject_RichCompareBool, Py_EQ
1513
from cpython.ref cimport Py_INCREF
1614
from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New
17-
from cpython.unicode cimport PyUnicode_Join
1815

1916
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
2017
PyTime_Check, PyDelta_Check,

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10852,7 +10852,7 @@ def transform(self, func, *args, **kwargs):
1085210852
Also returns None for empty %(klass)s.
1085310853
"""
1085410854

10855-
def _find_valid_index(self, how):
10855+
def _find_valid_index(self, how: str):
1085610856
"""
1085710857
Retrieves the index of the first valid value.
1085810858

pandas/core/util/hashing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
import numpy as np
77

8+
from pandas._libs import Timestamp
89
import pandas._libs.hashing as hashing
9-
import pandas._libs.tslibs as tslibs
1010

1111
from pandas.core.dtypes.cast import infer_dtype_from_scalar
1212
from pandas.core.dtypes.common import (
@@ -337,8 +337,8 @@ def _hash_scalar(val, encoding: str = "utf8", hash_key=None):
337337
# for tz-aware datetimes, we need the underlying naive UTC value and
338338
# not the tz aware object or pd extension type (as
339339
# infer_dtype_from_scalar would do)
340-
if not isinstance(val, tslibs.Timestamp):
341-
val = tslibs.Timestamp(val)
340+
if not isinstance(val, Timestamp):
341+
val = Timestamp(val)
342342
val = val.tz_convert(None)
343343

344344
dtype, val = infer_dtype_from_scalar(val)

0 commit comments

Comments
 (0)