Skip to content

Commit 12cc061

Browse files
committed
CLN: remove some warnings from algos_rank_helper.pxi.in
CLN: and algos_groupby_helper.pxi.in CLN: hashtable warns CLN: parser warnings closes pandas-dev#15190
1 parent 7bb4980 commit 12cc061

7 files changed

+86
-59
lines changed

pandas/parser.pyx

+18-29
Original file line numberDiff line numberDiff line change
@@ -716,11 +716,10 @@ cdef class TextReader:
716716
# header is now a list of lists, so field_count should use header[0]
717717

718718
cdef:
719-
size_t i, start, data_line, field_count, passed_count, hr, unnamed_count # noqa
719+
Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa
720720
char *word
721721
object name
722-
int status
723-
Py_ssize_t size
722+
int status, hr, data_line
724723
char *errors = "strict"
725724
cdef StringPath path = _string_path(self.c_encoding)
726725

@@ -1416,8 +1415,7 @@ cdef _string_box_factorize(parser_t *parser, int col,
14161415
bint na_filter, kh_str_t *na_hashset):
14171416
cdef:
14181417
int error, na_count = 0
1419-
Py_ssize_t i
1420-
size_t lines
1418+
Py_ssize_t i, lines
14211419
coliter_t it
14221420
const char *word = NULL
14231421
ndarray[object] result
@@ -1470,8 +1468,7 @@ cdef _string_box_utf8(parser_t *parser, int col,
14701468
bint na_filter, kh_str_t *na_hashset):
14711469
cdef:
14721470
int error, na_count = 0
1473-
Py_ssize_t i
1474-
size_t lines
1471+
Py_ssize_t i, lines
14751472
coliter_t it
14761473
const char *word = NULL
14771474
ndarray[object] result
@@ -1525,8 +1522,7 @@ cdef _string_box_decode(parser_t *parser, int col,
15251522
char *encoding):
15261523
cdef:
15271524
int error, na_count = 0
1528-
Py_ssize_t i, size
1529-
size_t lines
1525+
Py_ssize_t i, size, lines
15301526
coliter_t it
15311527
const char *word = NULL
15321528
ndarray[object] result
@@ -1586,8 +1582,7 @@ cdef _categorical_convert(parser_t *parser, int col,
15861582
"Convert column data into codes, categories"
15871583
cdef:
15881584
int error, na_count = 0
1589-
Py_ssize_t i, size
1590-
size_t lines
1585+
Py_ssize_t i, size, lines
15911586
coliter_t it
15921587
const char *word = NULL
15931588

@@ -1691,7 +1686,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
16911686
bint na_filter, kh_str_t *na_hashset, object na_flist):
16921687
cdef:
16931688
int error, na_count = 0
1694-
size_t i, lines
1689+
Py_ssize_t i, lines
16951690
coliter_t it
16961691
const char *word = NULL
16971692
char *p_end
@@ -1738,8 +1733,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
17381733
int *na_count) nogil:
17391734
cdef:
17401735
int error,
1741-
size_t i
1742-
size_t lines = line_end - line_start
1736+
Py_ssize_t i, lines = line_end - line_start
17431737
coliter_t it
17441738
const char *word = NULL
17451739
char *p_end
@@ -1801,7 +1795,7 @@ cdef _try_uint64(parser_t *parser, int col, int line_start, int line_end,
18011795
bint na_filter, kh_str_t *na_hashset):
18021796
cdef:
18031797
int error
1804-
size_t i, lines
1798+
Py_ssize_t i, lines
18051799
coliter_t it
18061800
uint64_t *data
18071801
ndarray result
@@ -1837,8 +1831,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int col, int line_start,
18371831
uint64_t *data, uint_state *state) nogil:
18381832
cdef:
18391833
int error
1840-
size_t i
1841-
size_t lines = line_end - line_start
1834+
Py_ssize_t i, lines = line_end - line_start
18421835
coliter_t it
18431836
const char *word = NULL
18441837
khiter_t k
@@ -1873,7 +1866,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
18731866
bint na_filter, kh_str_t *na_hashset):
18741867
cdef:
18751868
int error, na_count = 0
1876-
size_t i, lines
1869+
Py_ssize_t i, lines
18771870
coliter_t it
18781871
int64_t *data
18791872
ndarray result
@@ -1902,8 +1895,7 @@ cdef inline int _try_int64_nogil(parser_t *parser, int col, int line_start,
19021895
int64_t *data, int *na_count) nogil:
19031896
cdef:
19041897
int error
1905-
size_t i
1906-
size_t lines = line_end - line_start
1898+
Py_ssize_t i, lines = line_end - line_start
19071899
coliter_t it
19081900
const char *word = NULL
19091901
khiter_t k
@@ -1939,7 +1931,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
19391931
bint na_filter, kh_str_t *na_hashset):
19401932
cdef:
19411933
int na_count
1942-
size_t lines = line_end - line_start
1934+
Py_ssize_t lines = line_end - line_start
19431935
uint8_t *data
19441936
cnp.ndarray[cnp.uint8_t, ndim=1] result
19451937

@@ -1963,8 +1955,7 @@ cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start,
19631955
uint8_t *data, int *na_count) nogil:
19641956
cdef:
19651957
int error
1966-
size_t lines = line_end - line_start
1967-
size_t i
1958+
Py_ssize_t i, lines = line_end - line_start
19681959
coliter_t it
19691960
const char *word = NULL
19701961
khiter_t k
@@ -2004,7 +1995,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
20041995
const kh_str_t *false_hashset):
20051996
cdef:
20061997
int error, na_count = 0
2007-
size_t i, lines
1998+
Py_ssize_t i, lines
20081999
coliter_t it
20092000
const char *word = NULL
20102001
uint8_t *data
@@ -2033,8 +2024,7 @@ cdef inline int _try_bool_flex_nogil(parser_t *parser, int col, int line_start,
20332024
int *na_count) nogil:
20342025
cdef:
20352026
int error = 0
2036-
size_t i
2037-
size_t lines = line_end - line_start
2027+
Py_ssize_t i, lines = line_end - line_start
20382028
coliter_t it
20392029
const char *word = NULL
20402030
khiter_t k
@@ -2249,8 +2239,7 @@ cdef _apply_converter(object f, parser_t *parser, int col,
22492239
char* c_encoding):
22502240
cdef:
22512241
int error
2252-
Py_ssize_t i
2253-
size_t lines
2242+
Py_ssize_t i, lines
22542243
coliter_t it
22552244
const char *word = NULL
22562245
char *errors = "strict"
@@ -2341,7 +2330,7 @@ def _to_structured_array(dict columns, object names, object usecols):
23412330
cdef _fill_structured_column(char *dst, char* src, int elsize,
23422331
int stride, int length, bint incref):
23432332
cdef:
2344-
size_t i
2333+
Py_ssize_t i
23452334

23462335
if incref:
23472336
util.transfer_object_column(dst, src, stride, length)

pandas/src/algos_groupby_helper.pxi.in

+45-8
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,11 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
361361
val = values[i, j]
362362

363363
# not nan
364+
{{if name == 'int64'}}
365+
if val != {{nan_val}}:
366+
{{else}}
364367
if val == val and val != {{nan_val}}:
368+
{{endif}}
365369
nobs[lab, j] += 1
366370
resx[lab, j] = val
367371

@@ -407,7 +411,11 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
407411
val = values[i, j]
408412

409413
# not nan
414+
{{if name == 'int64'}}
415+
if val != {{nan_val}}:
416+
{{else}}
410417
if val == val and val != {{nan_val}}:
418+
{{endif}}
411419
nobs[lab, j] += 1
412420
if nobs[lab, j] == rank:
413421
resx[lab, j] = val
@@ -478,7 +486,11 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
478486
val = values[i, j]
479487

480488
# not nan
489+
{{if name == 'int64'}}
490+
if val != {{nan_val}}:
491+
{{else}}
481492
if val == val and val != {{nan_val}}:
493+
{{endif}}
482494
nobs[lab, j] += 1
483495
if val > maxx[lab, j]:
484496
maxx[lab, j] = val
@@ -492,7 +504,11 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
492504
val = values[i, 0]
493505

494506
# not nan
507+
{{if name == 'int64'}}
508+
if val != {{nan_val}}:
509+
{{else}}
495510
if val == val and val != {{nan_val}}:
511+
{{endif}}
496512
nobs[lab, 0] += 1
497513
if val > maxx[lab, 0]:
498514
maxx[lab, 0] = val
@@ -541,8 +557,11 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
541557
val = values[i, j]
542558

543559
# not nan
560+
{{if name == 'int64'}}
561+
if val != {{nan_val}}:
562+
{{else}}
544563
if val == val and val != {{nan_val}}:
545-
564+
{{endif}}
546565
nobs[lab, j] += 1
547566
if val < minx[lab, j]:
548567
minx[lab, j] = val
@@ -556,7 +575,11 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
556575
val = values[i, 0]
557576

558577
# not nan
578+
{{if name == 'int64'}}
579+
if val != {{nan_val}}:
580+
{{else}}
559581
if val == val and val != {{nan_val}}:
582+
{{endif}}
560583
nobs[lab, 0] += 1
561584
if val < minx[lab, 0]:
562585
minx[lab, 0] = val
@@ -596,14 +619,19 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
596619
continue
597620
for j in range(K):
598621
val = values[i, j]
622+
623+
# val = nan
624+
{{if name == 'int64'}}
625+
if is_datetimelike and val == {{nan_val}}:
626+
out[i, j] = {{nan_val}}
627+
else:
628+
{{else}}
599629
if val == val:
630+
{{endif}}
600631
if val < accum[lab, j]:
601632
min_val = val
602633
accum[lab, j] = min_val
603634
out[i, j] = accum[lab, j]
604-
# val = nan
605-
elif is_datetimelike:
606-
out[i, j] = {{nan_val}}
607635

608636

609637
@cython.boundscheck(False)
@@ -633,14 +661,18 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
633661
continue
634662
for j in range(K):
635663
val = values[i, j]
664+
665+
{{if name == 'int64'}}
666+
if is_datetimelike and val == {{nan_val}}:
667+
out[i, j] = {{nan_val}}
668+
else:
669+
{{else}}
636670
if val == val:
671+
{{endif}}
637672
if val > accum[lab, j]:
638673
max_val = val
639674
accum[lab, j] = max_val
640675
out[i, j] = accum[lab, j]
641-
# val = nan
642-
elif is_datetimelike:
643-
out[i, j] = {{nan_val}}
644676

645677
{{endfor}}
646678

@@ -738,7 +770,12 @@ def group_cumsum(numeric[:, :] out,
738770
continue
739771
for j in range(K):
740772
val = values[i, j]
741-
if val == val:
773+
774+
if numeric == float32_t or numeric == float64_t:
775+
if val == val:
776+
accum[lab, j] += val
777+
out[i, j] = accum[lab, j]
778+
else:
742779
accum[lab, j] += val
743780
out[i, j] = accum[lab, j]
744781

pandas/src/algos_rank_helper.pxi.in

+1-7
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
175175

176176
count += 1.0
177177

178-
{{if dtype == 'float64'}}
179178
if i == n - 1 or sorted_data[i + 1] != val:
180-
{{else}}
181-
if i == n - 1 or fabs(sorted_data[i + 1] - val) > 0:
182-
{{endif}}
183179
if tiebreak == TIEBREAK_AVERAGE:
184180
for j in range(i - dups + 1, i + 1):
185181
ranks[argsorted[j]] = sum_ranks / dups
@@ -345,10 +341,8 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
345341

346342
{{if dtype == 'object'}}
347343
if j == k - 1 or are_diff(values[i, j + 1], val):
348-
{{elif dtype == 'float64'}}
349-
if j == k - 1 or values[i, j + 1] != val:
350344
{{else}}
351-
if j == k - 1 or fabs(values[i, j + 1] - val) > FP_ERR:
345+
if j == k - 1 or values[i, j + 1] != val:
352346
{{endif}}
353347
if tiebreak == TIEBREAK_AVERAGE:
354348
for z in range(j - dups + 1, j + 1):

pandas/src/hashtable_class_helper.pxi.in

+2
Original file line numberDiff line numberDiff line change
@@ -386,9 +386,11 @@ cdef class {{name}}HashTable(HashTable):
386386
val = values[i]
387387

388388
# specific for groupby
389+
{{if dtype != 'uint64'}}
389390
if val < 0:
390391
labels[i] = -1
391392
continue
393+
{{endif}}
392394

393395
k = kh_get_{{dtype}}(self.table, val)
394396
if k != self.table.n_buckets:

pandas/src/hashtable_func_helper.pxi.in

+9-4
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,12 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
5959

6060
for i in range(n):
6161
val = values[i]
62+
63+
{{if dtype == 'float64'}}
6264
if val == val or not dropna:
65+
{{else}}
66+
if True:
67+
{{endif}}
6368
k = kh_get_{{ttype}}(table, val)
6469
if k != table.n_buckets:
6570
table.vals[k] += 1
@@ -85,7 +90,7 @@ cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna):
8590
int64_t[:] result_counts
8691
{{endif}}
8792

88-
int k
93+
Py_ssize_t k
8994

9095
table = kh_init_{{ttype}}()
9196
{{if dtype == 'object'}}
@@ -133,11 +138,11 @@ def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
133138
def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'):
134139
{{endif}}
135140
cdef:
136-
int ret = 0, k
141+
int ret = 0
137142
{{if dtype != 'object'}}
138143
{{dtype}}_t value
139144
{{endif}}
140-
Py_ssize_t i, n = len(values)
145+
Py_ssize_t k, i, n = len(values)
141146
kh_{{ttype}}_t * table = kh_init_{{ttype}}()
142147
ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
143148

@@ -230,7 +235,7 @@ def mode_{{dtype}}({{ctype}}[:] values):
230235
cdef:
231236
int count, max_count = 2
232237
int j = -1 # so you can do +=
233-
int k
238+
Py_ssize_t k
234239
kh_{{table_type}}_t *table
235240
ndarray[{{ctype}}] modes
236241

pandas/src/parser/io.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
215215

216216
retval = src->memmap + src->position;
217217

218-
if (src->position + nbytes > src->last_pos) {
218+
if (src->position + (off_t)nbytes > src->last_pos) {
219219
// fewer than nbytes remaining
220220
*bytes_read = src->last_pos - src->position;
221221
} else {

0 commit comments

Comments
 (0)