Skip to content

Commit 4c63f3e

Browse files
jbrockmendeljreback
authored andcommitted
CLN: use float64_t consistently instead of double, double_t (#23583)
1 parent 00ca0f9 commit 4c63f3e

29 files changed

+251
-316
lines changed

pandas/_libs/algos.pxd

-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
from util cimport numeric
22

33

4-
cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil
5-
6-
74
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
85
cdef:
96
numeric t

pandas/_libs/algos.pyx

+8-10
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ from numpy cimport (ndarray,
1515
NPY_FLOAT32, NPY_FLOAT64,
1616
NPY_OBJECT,
1717
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
18-
uint32_t, uint64_t, float32_t, float64_t,
19-
double_t)
18+
uint32_t, uint64_t, float32_t, float64_t)
2019
cnp.import_array()
2120

2221

@@ -32,10 +31,9 @@ import missing
3231

3332
cdef float64_t FP_ERR = 1e-13
3433

35-
cdef double NaN = <double>np.NaN
36-
cdef double nan = NaN
34+
cdef float64_t NaN = <float64_t>np.NaN
3735

38-
cdef int64_t iNaT = get_nat()
36+
cdef int64_t NPY_NAT = get_nat()
3937

4038
tiebreakers = {
4139
'average': TIEBREAK_AVERAGE,
@@ -199,7 +197,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
199197

200198
@cython.boundscheck(False)
201199
@cython.wraparound(False)
202-
cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil:
200+
def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
203201
cdef:
204202
Py_ssize_t i, j, l, m, n = a.shape[0]
205203
numeric x
@@ -812,23 +810,23 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
812810
n = len(arr)
813811

814812
if n == 1:
815-
if arr[0] != arr[0] or (timelike and <int64_t>arr[0] == iNaT):
813+
if arr[0] != arr[0] or (timelike and <int64_t>arr[0] == NPY_NAT):
816814
# single value is NaN
817815
return False, False, True
818816
else:
819817
return True, True, True
820818
elif n < 2:
821819
return True, True, True
822820

823-
if timelike and <int64_t>arr[0] == iNaT:
821+
if timelike and <int64_t>arr[0] == NPY_NAT:
824822
return False, False, True
825823

826824
if algos_t is not object:
827825
with nogil:
828826
prev = arr[0]
829827
for i in range(1, n):
830828
cur = arr[i]
831-
if timelike and <int64_t>cur == iNaT:
829+
if timelike and <int64_t>cur == NPY_NAT:
832830
is_monotonic_inc = 0
833831
is_monotonic_dec = 0
834832
break
@@ -853,7 +851,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
853851
prev = arr[0]
854852
for i in range(1, n):
855853
cur = arr[i]
856-
if timelike and <int64_t>cur == iNaT:
854+
if timelike and <int64_t>cur == NPY_NAT:
857855
is_monotonic_inc = 0
858856
is_monotonic_dec = 0
859857
break

pandas/_libs/algos_common_helper.pxi.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
8484

8585
{{endfor}}
8686

87-
#----------------------------------------------------------------------
87+
# ----------------------------------------------------------------------
8888
# ensure_dtype
89-
#----------------------------------------------------------------------
89+
# ----------------------------------------------------------------------
9090

9191
cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
9292

pandas/_libs/algos_rank_helper.pxi.in

+5-5
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
7474
{{elif dtype == 'float64'}}
7575
mask = np.isnan(values)
7676
{{elif dtype == 'int64'}}
77-
mask = values == iNaT
77+
mask = values == NPY_NAT
7878

79-
# create copy in case of iNaT
79+
# create copy in case of NPY_NAT
8080
# values are mutated inplace
8181
if mask.any():
8282
values = values.copy()
@@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
149149
{{if dtype != 'uint64'}}
150150
isnan = sorted_mask[i]
151151
if isnan and keep_na:
152-
ranks[argsorted[i]] = nan
152+
ranks[argsorted[i]] = NaN
153153
continue
154154
{{endif}}
155155

@@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
257257
{{elif dtype == 'float64'}}
258258
mask = np.isnan(values)
259259
{{elif dtype == 'int64'}}
260-
mask = values == iNaT
260+
mask = values == NPY_NAT
261261
{{endif}}
262262

263263
np.putmask(values, mask, nan_value)
@@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
317317
{{else}}
318318
if (val == nan_value) and keep_na:
319319
{{endif}}
320-
ranks[i, argsorted[i, j]] = nan
320+
ranks[i, argsorted[i, j]] = NaN
321321

322322
{{if dtype == 'object'}}
323323
infs += 1

pandas/_libs/algos_take_helper.pxi.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ Template for each `dtype` helper function for take
44
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
55
"""
66

7-
#----------------------------------------------------------------------
7+
# ----------------------------------------------------------------------
88
# take_1d, take_2d
9-
#----------------------------------------------------------------------
9+
# ----------------------------------------------------------------------
1010

1111
{{py:
1212

pandas/_libs/groupby.pyx

+16-18
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
# -*- coding: utf-8 -*-
22

3-
cimport cython
4-
from cython cimport Py_ssize_t
3+
import cython
4+
from cython import Py_ssize_t
55

66
from libc.stdlib cimport malloc, free
77

88
import numpy as np
99
cimport numpy as cnp
1010
from numpy cimport (ndarray,
11-
double_t,
1211
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
1312
uint32_t, uint64_t, float32_t, float64_t)
1413
cnp.import_array()
@@ -20,10 +19,9 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
2019
TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
2120
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
2221

23-
cdef int64_t iNaT = get_nat()
22+
cdef int64_t NPY_NAT = get_nat()
2423

25-
cdef double NaN = <double>np.NaN
26-
cdef double nan = NaN
24+
cdef float64_t NaN = <float64_t>np.NaN
2725

2826

2927
cdef inline float64_t median_linear(float64_t* a, int n) nogil:
@@ -67,13 +65,13 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
6765
return result
6866

6967

70-
# TODO: Is this redundant with algos.kth_smallest?
68+
# TODO: Is this redundant with algos.kth_smallest
7169
cdef inline float64_t kth_smallest_c(float64_t* a,
7270
Py_ssize_t k,
7371
Py_ssize_t n) nogil:
7472
cdef:
7573
Py_ssize_t i, j, l, m
76-
double_t x, t
74+
float64_t x, t
7775

7876
l = 0
7977
m = n - 1
@@ -109,7 +107,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
109107
cdef:
110108
Py_ssize_t i, j, N, K, ngroups, size
111109
ndarray[int64_t] _counts
112-
ndarray data
110+
ndarray[float64_t, ndim=2] data
113111
float64_t* ptr
114112

115113
assert min_count == -1, "'min_count' only used in add and prod"
@@ -139,8 +137,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
139137
@cython.boundscheck(False)
140138
@cython.wraparound(False)
141139
def group_cumprod_float64(float64_t[:, :] out,
142-
float64_t[:, :] values,
143-
int64_t[:] labels,
140+
const float64_t[:, :] values,
141+
const int64_t[:] labels,
144142
bint is_datetimelike,
145143
bint skipna=True):
146144
"""
@@ -177,7 +175,7 @@ def group_cumprod_float64(float64_t[:, :] out,
177175
@cython.wraparound(False)
178176
def group_cumsum(numeric[:, :] out,
179177
numeric[:, :] values,
180-
int64_t[:] labels,
178+
const int64_t[:] labels,
181179
is_datetimelike,
182180
bint skipna=True):
183181
"""
@@ -217,7 +215,7 @@ def group_cumsum(numeric[:, :] out,
217215

218216
@cython.boundscheck(False)
219217
@cython.wraparound(False)
220-
def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
218+
def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
221219
int ngroups, int periods):
222220
cdef:
223221
Py_ssize_t N, i, j, ii
@@ -291,7 +289,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
291289
"""
292290
cdef:
293291
Py_ssize_t i, N
294-
ndarray[int64_t] sorted_labels
292+
int64_t[:] sorted_labels
295293
int64_t idx, curr_fill_idx=-1, filled_vals=0
296294

297295
N = len(out)
@@ -327,10 +325,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
327325

328326
@cython.boundscheck(False)
329327
@cython.wraparound(False)
330-
def group_any_all(ndarray[uint8_t] out,
331-
ndarray[int64_t] labels,
332-
ndarray[uint8_t] values,
333-
ndarray[uint8_t] mask,
328+
def group_any_all(uint8_t[:] out,
329+
const int64_t[:] labels,
330+
const uint8_t[:] values,
331+
const uint8_t[:] mask,
334332
object val_test,
335333
bint skipna):
336334
"""Aggregated boolean values to show truthfulness of group elements

pandas/_libs/groupby_helper.pxi.in

+11-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
55
"""
66

77
cdef extern from "numpy/npy_math.h":
8-
double NAN "NPY_NAN"
8+
float64_t NAN "NPY_NAN"
99
_int64_max = np.iinfo(np.int64).max
1010

1111
# ----------------------------------------------------------------------
@@ -268,16 +268,16 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,
268268

269269
{{endfor}}
270270

271-
#----------------------------------------------------------------------
271+
# ----------------------------------------------------------------------
272272
# group_nth, group_last, group_rank
273-
#----------------------------------------------------------------------
273+
# ----------------------------------------------------------------------
274274

275275
{{py:
276276

277277
# name, c_type, nan_val
278278
dtypes = [('float64', 'float64_t', 'NAN'),
279279
('float32', 'float32_t', 'NAN'),
280-
('int64', 'int64_t', 'iNaT'),
280+
('int64', 'int64_t', 'NPY_NAT'),
281281
('object', 'object', 'NAN')]
282282

283283
def get_dispatch(dtypes):
@@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
527527
# to the result where appropriate
528528
if keep_na and mask[_as[i]]:
529529
for j in range(i - dups + 1, i + 1):
530-
out[_as[j], 0] = nan
530+
out[_as[j], 0] = NaN
531531
grp_na_count = dups
532532
elif tiebreak == TIEBREAK_AVERAGE:
533533
for j in range(i - dups + 1, i + 1):
@@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out,
630630
if groupby_t is int64_t:
631631
# Note: evaluated at compile-time
632632
maxx[:] = -_int64_max
633-
nan_val = iNaT
633+
nan_val = NPY_NAT
634634
else:
635635
maxx[:] = -np.inf
636636
nan_val = NAN
@@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out,
692692
minx = np.empty_like(out)
693693
if groupby_t is int64_t:
694694
minx[:] = _int64_max
695-
nan_val = iNaT
695+
nan_val = NPY_NAT
696696
else:
697697
minx[:] = np.inf
698698
nan_val = NAN
@@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,
762762

763763
# val = nan
764764
if groupby_t is int64_t:
765-
if is_datetimelike and val == iNaT:
766-
out[i, j] = iNaT
765+
if is_datetimelike and val == NPY_NAT:
766+
out[i, j] = NPY_NAT
767767
else:
768768
mval = accum[lab, j]
769769
if val < mval:
@@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out,
809809
val = values[i, j]
810810

811811
if groupby_t is int64_t:
812-
if is_datetimelike and val == iNaT:
813-
out[i, j] = iNaT
812+
if is_datetimelike and val == NPY_NAT:
813+
out[i, j] = NPY_NAT
814814
else:
815815
mval = accum[lab, j]
816816
if val > mval:

pandas/_libs/hashtable.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free
99

1010
import numpy as np
1111
cimport numpy as cnp
12-
from numpy cimport ndarray, uint8_t, uint32_t
12+
from numpy cimport ndarray, uint8_t, uint32_t, float64_t
1313
cnp.import_array()
1414

1515
cdef extern from "numpy/npy_math.h":
16-
double NAN "NPY_NAN"
16+
float64_t NAN "NPY_NAN"
1717

1818

1919
from khash cimport (
@@ -42,9 +42,7 @@ cimport util
4242
from missing cimport checknull
4343

4444

45-
nan = np.nan
46-
47-
cdef int64_t iNaT = util.get_nat()
45+
cdef int64_t NPY_NAT = util.get_nat()
4846
_SIZE_HINT_LIMIT = (1 << 20) + 7
4947

5048

pandas/_libs/hashtable_class_helper.pxi.in

+2-2
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,9 @@ cdef class HashTable:
251251
{{py:
252252

253253
# name, dtype, float_group, default_na_value
254-
dtypes = [('Float64', 'float64', True, 'nan'),
254+
dtypes = [('Float64', 'float64', True, 'np.nan'),
255255
('UInt64', 'uint64', False, 0),
256-
('Int64', 'int64', False, 'iNaT')]
256+
('Int64', 'int64', False, 'NPY_NAT')]
257257

258258
}}
259259

pandas/_libs/index.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash
2525
from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib
2626
from pandas._libs.missing import checknull
2727

28-
cdef int64_t iNaT = util.get_nat()
28+
cdef int64_t NPY_NAT = util.get_nat()
2929

3030

3131
cdef inline bint is_definitely_invalid_key(object val):
@@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value):
520520
elif isinstance(value, (datetime, np.datetime64, date)):
521521
return Timestamp(value).value
522522
elif value is None or value != value:
523-
return iNaT
523+
return NPY_NAT
524524
elif util.is_string_object(value):
525525
return Timestamp(value).value
526526
raise ValueError("cannot set a Timestamp with a non-timestamp")
@@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value):
531531
elif isinstance(value, timedelta):
532532
return Timedelta(value).value
533533
elif value is None or value != value:
534-
return iNaT
534+
return NPY_NAT
535535
elif util.is_string_object(value):
536536
return Timedelta(value).value
537537
raise ValueError("cannot set a Timedelta with a non-timedelta")

0 commit comments

Comments
 (0)