1
+ import sys
1
2
cimport util
2
3
from tslib import NaT
3
4
from datetime import datetime, timedelta
4
5
iNaT = util.get_nat()
5
6
7
+ cdef bint PY2 = sys.version_info[0 ] == 2
8
+
6
9
# core.common import for fast inference checks
7
10
def is_float (object obj ):
8
11
return util.is_float_object(obj)
@@ -38,10 +41,10 @@ _TYPE_MAP = {
38
41
' f' : ' floating' ,
39
42
' complex128' : ' complex' ,
40
43
' c' : ' complex' ,
41
- ' string' : ' string' ,
42
- ' S' : ' string' ,
43
- ' unicode' : ' unicode' ,
44
- ' U' : ' unicode' ,
44
+ ' string' : ' string' if PY2 else ' bytes ' ,
45
+ ' S' : ' string' if PY2 else ' bytes ' ,
46
+ ' unicode' : ' unicode' if PY2 else ' string ' ,
47
+ ' U' : ' unicode' if PY2 else ' string ' ,
45
48
' bool' : ' boolean' ,
46
49
' b' : ' boolean' ,
47
50
' datetime64[ns]' : ' datetime64' ,
@@ -181,6 +184,10 @@ def infer_dtype(object _values):
181
184
if is_unicode_array(values):
182
185
return ' unicode'
183
186
187
+ elif PyBytes_Check(val):
188
+ if is_bytes_array(values):
189
+ return ' bytes'
190
+
184
191
elif is_timedelta(val):
185
192
if is_timedelta_or_timedelta64_array(values):
186
193
return ' timedelta'
@@ -196,11 +203,6 @@ def infer_dtype(object _values):
196
203
197
204
return ' mixed'
198
205
199
- def infer_dtype_list (list values ):
200
- cdef:
201
- Py_ssize_t i, n = len (values)
202
- pass
203
-
204
206
205
207
def is_possible_datetimelike_array (object arr ):
206
208
# determine if we have a possible datetimelike (or null-like) array
@@ -253,7 +255,6 @@ def is_bool_array(ndarray values):
253
255
cdef:
254
256
Py_ssize_t i, n = len (values)
255
257
ndarray[object ] objbuf
256
- object obj
257
258
258
259
if issubclass (values.dtype.type, np.bool_):
259
260
return True
@@ -277,7 +278,6 @@ def is_integer_array(ndarray values):
277
278
cdef:
278
279
Py_ssize_t i, n = len (values)
279
280
ndarray[object ] objbuf
280
- object obj
281
281
282
282
if issubclass (values.dtype.type, np.integer):
283
283
return True
@@ -298,7 +298,6 @@ def is_integer_float_array(ndarray values):
298
298
cdef:
299
299
Py_ssize_t i, n = len (values)
300
300
ndarray[object ] objbuf
301
- object obj
302
301
303
302
if issubclass (values.dtype.type, np.integer):
304
303
return True
@@ -321,7 +320,6 @@ def is_float_array(ndarray values):
321
320
cdef:
322
321
Py_ssize_t i, n = len (values)
323
322
ndarray[object ] objbuf
324
- object obj
325
323
326
324
if issubclass (values.dtype.type, np.floating):
327
325
return True
@@ -342,9 +340,9 @@ def is_string_array(ndarray values):
342
340
cdef:
343
341
Py_ssize_t i, n = len (values)
344
342
ndarray[object ] objbuf
345
- object obj
346
343
347
- if issubclass (values.dtype.type, (np.string_, np.unicode_)):
344
+ if ((PY2 and issubclass (values.dtype.type, np.string_)) or
345
+ not PY2 and issubclass (values.dtype.type, np.unicode_)):
348
346
return True
349
347
elif values.dtype == np.object_:
350
348
objbuf = values
@@ -363,7 +361,6 @@ def is_unicode_array(ndarray values):
363
361
cdef:
364
362
Py_ssize_t i, n = len (values)
365
363
ndarray[object ] objbuf
366
- object obj
367
364
368
365
if issubclass (values.dtype.type, np.unicode_):
369
366
return True
@@ -381,8 +378,29 @@ def is_unicode_array(ndarray values):
381
378
return False
382
379
383
380
381
+ def is_bytes_array (ndarray values ):
382
+ cdef:
383
+ Py_ssize_t i, n = len (values)
384
+ ndarray[object ] objbuf
385
+
386
+ if issubclass (values.dtype.type, np.bytes_):
387
+ return True
388
+ elif values.dtype == np.object_:
389
+ objbuf = values
390
+
391
+ if n == 0 :
392
+ return False
393
+
394
+ for i in range (n):
395
+ if not PyBytes_Check(objbuf[i]):
396
+ return False
397
+ return True
398
+ else :
399
+ return False
400
+
401
+
384
402
def is_datetime_array (ndarray[object] values ):
385
- cdef int i, null_count = 0 , n = len (values)
403
+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
386
404
cdef object v
387
405
if n == 0 :
388
406
return False
@@ -399,7 +417,7 @@ def is_datetime_array(ndarray[object] values):
399
417
return null_count != n
400
418
401
419
def is_datetime64_array (ndarray values ):
402
- cdef int i, null_count = 0 , n = len (values)
420
+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
403
421
cdef object v
404
422
if n == 0 :
405
423
return False
@@ -416,7 +434,7 @@ def is_datetime64_array(ndarray values):
416
434
return null_count != n
417
435
418
436
def is_timedelta_array (ndarray values ):
419
- cdef int i, null_count = 0 , n = len (values)
437
+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
420
438
cdef object v
421
439
if n == 0 :
422
440
return False
@@ -431,7 +449,7 @@ def is_timedelta_array(ndarray values):
431
449
return null_count != n
432
450
433
451
def is_timedelta64_array (ndarray values ):
434
- cdef int i, null_count = 0 , n = len (values)
452
+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
435
453
cdef object v
436
454
if n == 0 :
437
455
return False
@@ -447,7 +465,7 @@ def is_timedelta64_array(ndarray values):
447
465
448
466
def is_timedelta_or_timedelta64_array (ndarray values ):
449
467
""" infer with timedeltas and/or nat/none """
450
- cdef int i, null_count = 0 , n = len (values)
468
+ cdef Py_ssize_t i, null_count = 0 , n = len (values)
451
469
cdef object v
452
470
if n == 0 :
453
471
return False
@@ -462,7 +480,7 @@ def is_timedelta_or_timedelta64_array(ndarray values):
462
480
return null_count != n
463
481
464
482
def is_date_array (ndarray[object] values ):
465
- cdef int i, n = len (values)
483
+ cdef Py_ssize_t i, n = len (values)
466
484
if n == 0 :
467
485
return False
468
486
for i in range (n):
@@ -471,7 +489,7 @@ def is_date_array(ndarray[object] values):
471
489
return True
472
490
473
491
def is_time_array (ndarray[object] values ):
474
- cdef int i, n = len (values)
492
+ cdef Py_ssize_t i, n = len (values)
475
493
if n == 0 :
476
494
return False
477
495
for i in range (n):
@@ -484,7 +502,7 @@ def is_period(object o):
484
502
return isinstance (o,Period)
485
503
486
504
def is_period_array (ndarray[object] values ):
487
- cdef int i, n = len (values)
505
+ cdef Py_ssize_t i, n = len (values)
488
506
from pandas.tseries.period import Period
489
507
490
508
if n == 0 :
0 commit comments