@@ -69,17 +69,22 @@ from pandas._libs.tslibs.nattype cimport (
69
69
)
70
70
from pandas._libs.tslibs.timestamps cimport _Timestamp
71
71
72
+ import cython
73
+
72
74
from pandas._libs.tslibs import (
73
75
Resolution,
74
76
get_resolution,
75
77
)
76
78
from pandas._libs.tslibs.timestamps import Timestamp
77
79
78
- # Note: this is the only non-tslibs intra-pandas dependency here
80
+ from libc.stdlib cimport srand
81
+ from libc.time cimport time
79
82
80
83
from pandas._libs.missing cimport checknull_with_nat_and_na
81
84
from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
82
85
86
+ # Note: this is the only non-tslibs intra-pandas dependency here
87
+
83
88
84
89
def _test_parse_iso8601 (ts: str ):
85
90
"""
@@ -398,6 +403,33 @@ def first_non_null(values: ndarray) -> int:
398
403
return - 1
399
404
400
405
406
+ @ cython.wraparound (False )
407
+ @ cython.boundscheck (False )
408
+ def random_non_null (values: ndarray , int n ) -> ndarray:
409
+ """Find n non-null values selected at random , return an array of indices."""
410
+ cdef:
411
+ Py_ssize_t total = len (values)
412
+ Py_ssize_t i , non_null_count
413
+ list non_null_indices = []
414
+ srand(time(NULL ))
415
+ for i in range(total ):
416
+ val = values[i]
417
+ if checknull_with_nat_and_na(val):
418
+ continue
419
+ if (
420
+ isinstance (val, str )
421
+ and
422
+ (len (val) == 0 or val in nat_strings or val in (" now" , " today" ))
423
+ ):
424
+ continue
425
+ non_null_indices.append(i)
426
+ non_null_count = len (non_null_indices)
427
+ if non_null_count == 0 or n <= 0 :
428
+ return np.empty(0 , dtype = np.int64)
429
+ # use np.random.choice
430
+ return np.random.choice(non_null_indices, min (n, non_null_count), replace = False )
431
+
432
+
401
433
@ cython.wraparound (False )
402
434
@ cython.boundscheck (False )
403
435
cpdef array_to_datetime(
0 commit comments