@@ -13,9 +13,6 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
13
13
14
14
# core.common import for fast inference checks
15
15
16
- npy_int64_max = np.iinfo(np.int64).max
17
-
18
-
19
16
cpdef bint is_float(object obj):
20
17
return util.is_float_object(obj)
21
18
@@ -629,48 +626,155 @@ cdef extern from "parse_helper.h":
629
626
630
627
cdef int64_t iINT64_MAX = < int64_t> INT64_MAX
631
628
cdef int64_t iINT64_MIN = < int64_t> INT64_MIN
629
+ cdef uint64_t iUINT64_MAX = < uint64_t> UINT64_MAX
632
630
633
631
634
- def maybe_convert_numeric (object[: ] values , set na_values ,
632
+ def maybe_convert_numeric (ndarray[object ] values , set na_values ,
635
633
bint convert_empty = True , bint coerce_numeric = False ):
636
634
"""
637
- Type inference function-- convert strings to numeric (potentially) and
638
- convert to proper dtype array
635
+ Convert object array to a numeric array if possible.
636
+
637
+ Parameters
638
+ ----------
639
+ values : ndarray
640
+ Array of object elements to convert.
641
+ na_values : set
642
+ Set of values that should be interpreted as NaN.
643
+ convert_empty : bool, default True
644
+ If an empty array-like object is encountered, whether to interpret
645
+ that element as NaN or not. If set to False, a ValueError will be
646
+ raised if such an element is encountered and 'coerce_numeric' is False.
647
+ coerce_numeric : bool, default False
648
+ If initial attempts to convert to numeric have failed, whether to
649
+ force conversion to numeric via alternative methods or by setting the
650
+ element to NaN. Otherwise, an Exception will be raised when such an
651
+ element is encountered.
652
+
653
+ This boolean also has an impact on how conversion behaves when a
654
+ numeric array has no suitable numerical dtype to return (i.e. uint64,
655
+ int32, uint8). If set to False, the original object array will be
656
+ returned. Otherwise, a ValueError will be raised.
657
+
658
+ Returns
659
+ -------
660
+ numeric_array : array of converted object values to numerical ones
639
661
"""
640
662
cdef:
641
663
int status, maybe_int
642
664
Py_ssize_t i, n = values.size
643
665
ndarray[float64_t] floats = np.empty(n, dtype = ' f8' )
644
666
ndarray[complex128_t] complexes = np.empty(n, dtype = ' c16' )
645
667
ndarray[int64_t] ints = np.empty(n, dtype = ' i8' )
668
+ ndarray[uint64_t] uints = np.empty(n, dtype = ' u8' )
646
669
ndarray[uint8_t] bools = np.empty(n, dtype = ' u1' )
670
+ bint seen_null = False
671
+ bint seen_uint = False
672
+ bint seen_sint = False
647
673
bint seen_float = False
648
674
bint seen_complex = False
649
675
bint seen_int = False
650
676
bint seen_bool = False
651
677
object val
652
678
float64_t fval
653
679
680
+ def check_uint64_nan ():
681
+ """
682
+ Check whether we have encountered uint64 when handling a NaN element.
683
+
684
+ If uint64 has been encountered, we cannot safely cast to float64 due
685
+ to truncation problems (this would occur if we return a numeric array
686
+ containing a NaN element).
687
+
688
+ Returns
689
+ -------
690
+ return_values : bool
691
+ Whether or not we should return the original input array to avoid
692
+ data truncation.
693
+ """
694
+ if seen_null and seen_uint:
695
+ if not coerce_numeric:
696
+ return True
697
+ else :
698
+ raise ValueError (" uint64 array detected, and such an "
699
+ " array cannot contain NaN." )
700
+
701
+ return False
702
+
703
+ def check_uint64_int64_conflict ():
704
+ """
705
+ Check whether we have encountered both int64 and uint64 elements.
706
+
707
+ If bot have been encountered, we cannot safely cast to an integer
708
+ dtype since none is large enough to hold both types of elements.
709
+
710
+ Returns
711
+ -------
712
+ return_values : bool
713
+ Whether or not we should return the original input array to avoid
714
+ data truncation.
715
+ """
716
+ if seen_sint and seen_uint:
717
+ if not coerce_numeric:
718
+ return True
719
+ else :
720
+ raise ValueError (" uint64 and negative values detected. "
721
+ " Cannot safely return a numeric array "
722
+ " without truncating data." )
723
+
724
+ return False
725
+
654
726
for i in range (n):
655
727
val = values[i]
656
728
657
729
if val.__hash__ is not None and val in na_values:
730
+ seen_null = True
731
+ if check_uint64_nan():
732
+ return values
733
+
658
734
floats[i] = complexes[i] = nan
659
735
seen_float = True
660
736
elif util.is_float_object(val):
737
+ if val != val:
738
+ seen_null = True
739
+ if check_uint64_nan():
740
+ return values
741
+
661
742
floats[i] = complexes[i] = val
662
743
seen_float = True
663
744
elif util.is_integer_object(val):
664
- floats[i] = ints[i] = val
745
+ floats[i] = complexes[i] = val
746
+ as_int = int (val)
665
747
seen_int = True
748
+
749
+ seen_uint = seen_uint or (as_int > iINT64_MAX)
750
+ seen_sint = seen_sint or (as_int < 0 )
751
+
752
+ if check_uint64_nan() or check_uint64_int64_conflict():
753
+ return values
754
+
755
+ if seen_uint:
756
+ uints[i] = as_int
757
+ elif seen_sint:
758
+ ints[i] = as_int
759
+ else :
760
+ uints[i] = as_int
761
+ ints[i] = as_int
666
762
elif util.is_bool_object(val):
667
- floats[i] = ints[i] = bools[i] = val
763
+ floats[i] = uints[i] = ints[i] = bools[i] = val
668
764
seen_bool = True
669
765
elif val is None :
766
+ seen_null = True
767
+ if check_uint64_nan():
768
+ return values
769
+
670
770
floats[i] = complexes[i] = nan
671
771
seen_float = True
672
772
elif hasattr (val, ' __len__' ) and len (val) == 0 :
673
773
if convert_empty or coerce_numeric:
774
+ seen_null = True
775
+ if check_uint64_nan():
776
+ return values
777
+
674
778
floats[i] = complexes[i] = nan
675
779
seen_float = True
676
780
else :
@@ -686,24 +790,55 @@ def maybe_convert_numeric(object[:] values, set na_values,
686
790
status = floatify(val, & fval, & maybe_int)
687
791
688
792
if fval in na_values:
793
+ seen_null = True
794
+ if check_uint64_nan():
795
+ return values
796
+
689
797
floats[i] = complexes[i] = nan
690
798
seen_float = True
691
799
else :
800
+ if fval != fval:
801
+ seen_null = True
802
+ if check_uint64_nan():
803
+ return values
804
+
692
805
floats[i] = fval
693
806
694
- if not seen_float:
695
- if maybe_int:
696
- as_int = int (val)
807
+ if maybe_int:
808
+ as_int = int (val)
697
809
698
- if as_int <= iINT64_MAX and as_int >= iINT64_MIN:
810
+ if as_int in na_values:
811
+ seen_float = True
812
+ seen_null = True
813
+ else :
814
+ seen_uint = seen_uint or (as_int > iINT64_MAX)
815
+ seen_sint = seen_sint or (as_int < 0 )
816
+ seen_int = True
817
+
818
+ if check_uint64_nan() or check_uint64_int64_conflict():
819
+ return values
820
+
821
+ if not (seen_float or as_int in na_values):
822
+ if as_int < iINT64_MIN or as_int > iUINT64_MAX:
823
+ raise ValueError (' Integer out of range.' )
824
+
825
+ if seen_uint:
826
+ uints[i] = as_int
827
+ elif seen_sint:
699
828
ints[i] = as_int
700
829
else :
701
- raise ValueError (' integer out of range' )
702
- else :
703
- seen_float = True
830
+ uints[i] = as_int
831
+ ints[i] = as_int
832
+ else :
833
+ seen_float = True
704
834
except (TypeError , ValueError ) as e:
705
835
if not coerce_numeric:
706
836
raise type (e)(str (e) + ' at position {}' .format(i))
837
+ elif " uint64" in str (e): # Exception from check functions.
838
+ raise
839
+ seen_null = True
840
+ if check_uint64_nan():
841
+ return values
707
842
708
843
floats[i] = nan
709
844
seen_float = True
@@ -713,9 +848,14 @@ def maybe_convert_numeric(object[:] values, set na_values,
713
848
elif seen_float:
714
849
return floats
715
850
elif seen_int:
716
- return ints
851
+ if seen_uint:
852
+ return uints
853
+ else :
854
+ return ints
717
855
elif seen_bool:
718
856
return bools.view(np.bool_)
857
+ elif seen_uint:
858
+ return uints
719
859
return ints
720
860
721
861
@@ -810,7 +950,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
810
950
floats[i] = < float64_t> val
811
951
complexes[i] = < double complex > val
812
952
if not seen_null:
813
- seen_uint = seen_uint or (int (val) > npy_int64_max )
953
+ seen_uint = seen_uint or (int (val) > iINT64_MAX )
814
954
seen_sint = seen_sint or (val < 0 )
815
955
816
956
if seen_uint and seen_sint:
0 commit comments