@@ -13,9 +13,6 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
13
13
14
14
# core.common import for fast inference checks
15
15
16
- npy_int64_max = np.iinfo(np.int64).max
17
-
18
-
19
16
cpdef bint is_float(object obj):
20
17
return util.is_float_object(obj)
21
18
@@ -629,48 +626,157 @@ cdef extern from "parse_helper.h":
629
626
630
627
cdef int64_t iINT64_MAX = < int64_t> INT64_MAX
631
628
cdef int64_t iINT64_MIN = < int64_t> INT64_MIN
629
+ cdef uint64_t iUINT64_MAX = < uint64_t> UINT64_MAX
632
630
633
631
634
- def maybe_convert_numeric (object[: ] values , set na_values ,
632
+ def maybe_convert_numeric (ndarray[object ] values , set na_values ,
635
633
bint convert_empty = True , bint coerce_numeric = False ):
636
634
"""
637
- Type inference function-- convert strings to numeric (potentially) and
638
- convert to proper dtype array
635
+ Convert object array to a numeric array if possible.
636
+
637
+ Parameters
638
+ ----------
639
+ values : ndarray
640
+ Array of object elements to convert.
641
+ na_values : set
642
+ Set of values that should be interpreted as NaN.
643
+ convert_empty : bool, default True
644
+ If an empty array-like object is encountered, whether to interpret
645
+ that element as NaN or not. If set to False, a ValueError will be
646
+ raised if such an element is encountered and 'coerce_numeric' is False.
647
+ coerce_numeric : bool, default False
648
+ If initial attempts to convert to numeric have failed, whether to
649
+ force conversion to numeric via alternative methods or by setting the
650
+ element to NaN. Otherwise, an Exception will be raised when such an
651
+ element is encountered.
652
+
653
+ This boolean also has an impact on how conversion behaves when a
654
+ numeric array has no suitable numerical dtype to return (i.e. uint64,
655
+ int32, uint8). If set to False, the original object array will be
656
+ returned. Otherwise, a ValueError will be raised.
657
+
658
+ Returns
659
+ -------
660
+ numeric_array : array of converted object values to numerical ones
639
661
"""
640
662
cdef:
641
663
int status, maybe_int
642
664
Py_ssize_t i, n = values.size
643
665
ndarray[float64_t] floats = np.empty(n, dtype = ' f8' )
644
666
ndarray[complex128_t] complexes = np.empty(n, dtype = ' c16' )
645
667
ndarray[int64_t] ints = np.empty(n, dtype = ' i8' )
668
+ ndarray[uint64_t] uints = np.empty(n, dtype = ' u8' )
646
669
ndarray[uint8_t] bools = np.empty(n, dtype = ' u1' )
670
+ bint seen_null = False
671
+ bint seen_uint = False
672
+ bint seen_sint = False
647
673
bint seen_float = False
648
674
bint seen_complex = False
649
675
bint seen_int = False
650
676
bint seen_bool = False
651
677
object val
652
678
float64_t fval
653
679
680
+
681
+ def check_uint64_nan ():
682
+ """
683
+ Check whether we have encountered uint64 when handling a NaN element.
684
+
685
+ If uint64 has been encountered, we cannot safely cast to float64 due
686
+ to truncation problems (this would occur if we return a numeric array
687
+ containing a NaN element).
688
+
689
+ Returns
690
+ -------
691
+ return_values : bool
692
+ Whether or not we should return the original input array to avoid
693
+ data truncation.
694
+ """
695
+ if seen_null and seen_uint:
696
+ if not coerce_numeric:
697
+ return True
698
+ else :
699
+ raise ValueError (" uint64 array detected, and such an "
700
+ " array cannot contain NaN." )
701
+
702
+ return False
703
+
704
+
705
+ def check_uint64_int64_conflict ():
706
+ """
707
+ Check whether we have encountered both int64 and uint64 elements.
708
+
709
+ If bot have been encountered, we cannot safely cast to an integer
710
+ dtype since none is large enough to hold both types of elements.
711
+
712
+ Returns
713
+ -------
714
+ return_values : bool
715
+ Whether or not we should return the original input array to avoid
716
+ data truncation.
717
+ """
718
+ if seen_sint and seen_uint:
719
+ if not coerce_numeric:
720
+ return True
721
+ else :
722
+ raise ValueError (" uint64 and negative values detected. "
723
+ " Cannot safely return a numeric array "
724
+ " without truncating data." )
725
+
726
+ return False
727
+
654
728
for i in range (n):
655
729
val = values[i]
656
730
657
731
if val.__hash__ is not None and val in na_values:
732
+ seen_null = True
733
+ if check_uint64_nan():
734
+ return values
735
+
658
736
floats[i] = complexes[i] = nan
659
737
seen_float = True
660
738
elif util.is_float_object(val):
739
+ if val != val:
740
+ seen_null = True
741
+ if check_uint64_nan():
742
+ return values
743
+
661
744
floats[i] = complexes[i] = val
662
745
seen_float = True
663
746
elif util.is_integer_object(val):
664
- floats[i] = ints[i] = val
747
+ floats[i] = complexes[i] = val
748
+ as_int = int (val)
665
749
seen_int = True
750
+
751
+ seen_uint = seen_uint or (as_int > iINT64_MAX)
752
+ seen_sint = seen_sint or (as_int < 0 )
753
+
754
+ if check_uint64_nan() or check_uint64_int64_conflict():
755
+ return values
756
+
757
+ if seen_uint:
758
+ uints[i] = as_int
759
+ elif seen_sint:
760
+ ints[i] = as_int
761
+ else :
762
+ uints[i] = as_int
763
+ ints[i] = as_int
666
764
elif util.is_bool_object(val):
667
- floats[i] = ints[i] = bools[i] = val
765
+ floats[i] = uints[i] = ints[i] = bools[i] = val
668
766
seen_bool = True
669
767
elif val is None :
768
+ seen_null = True
769
+ if check_uint64_nan():
770
+ return values
771
+
670
772
floats[i] = complexes[i] = nan
671
773
seen_float = True
672
774
elif hasattr (val, ' __len__' ) and len (val) == 0 :
673
775
if convert_empty or coerce_numeric:
776
+ seen_null = True
777
+ if check_uint64_nan():
778
+ return values
779
+
674
780
floats[i] = complexes[i] = nan
675
781
seen_float = True
676
782
else :
@@ -686,24 +792,55 @@ def maybe_convert_numeric(object[:] values, set na_values,
686
792
status = floatify(val, & fval, & maybe_int)
687
793
688
794
if fval in na_values:
795
+ seen_null = True
796
+ if check_uint64_nan():
797
+ return values
798
+
689
799
floats[i] = complexes[i] = nan
690
800
seen_float = True
691
801
else :
802
+ if fval != fval:
803
+ seen_null = True
804
+ if check_uint64_nan():
805
+ return values
806
+
692
807
floats[i] = fval
693
808
694
- if not seen_float:
695
- if maybe_int:
696
- as_int = int (val)
809
+ if maybe_int:
810
+ as_int = int (val)
697
811
698
- if as_int <= iINT64_MAX and as_int >= iINT64_MIN:
812
+ if as_int in na_values:
813
+ seen_float = True
814
+ seen_null = True
815
+ else :
816
+ seen_uint = seen_uint or (as_int > iINT64_MAX)
817
+ seen_sint = seen_sint or (as_int < 0 )
818
+ seen_int = True
819
+
820
+ if check_uint64_nan() or check_uint64_int64_conflict():
821
+ return values
822
+
823
+ if not (seen_float or as_int in na_values):
824
+ if as_int < iINT64_MIN or as_int > iUINT64_MAX:
825
+ raise ValueError (' Integer out of range.' )
826
+
827
+ if seen_uint:
828
+ uints[i] = as_int
829
+ elif seen_sint:
699
830
ints[i] = as_int
700
831
else :
701
- raise ValueError (' integer out of range' )
702
- else :
703
- seen_float = True
832
+ uints[i] = as_int
833
+ ints[i] = as_int
834
+ else :
835
+ seen_float = True
704
836
except (TypeError , ValueError ) as e:
705
837
if not coerce_numeric:
706
838
raise type (e)(str (e) + ' at position {}' .format(i))
839
+ elif " uint64" in str (e): # Exception from check functions.
840
+ raise
841
+ seen_null = True
842
+ if check_uint64_nan():
843
+ return values
707
844
708
845
floats[i] = nan
709
846
seen_float = True
@@ -713,9 +850,14 @@ def maybe_convert_numeric(object[:] values, set na_values,
713
850
elif seen_float:
714
851
return floats
715
852
elif seen_int:
716
- return ints
853
+ if seen_uint:
854
+ return uints
855
+ else :
856
+ return ints
717
857
elif seen_bool:
718
858
return bools.view(np.bool_)
859
+ elif seen_uint:
860
+ return uints
719
861
return ints
720
862
721
863
@@ -810,7 +952,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
810
952
floats[i] = < float64_t> val
811
953
complexes[i] = < double complex > val
812
954
if not seen_null:
813
- seen_uint = seen_uint or (int (val) > npy_int64_max )
955
+ seen_uint = seen_uint or (int (val) > iINT64_MAX )
814
956
seen_sint = seen_sint or (val < 0 )
815
957
816
958
if seen_uint and seen_sint:
0 commit comments