@@ -1778,20 +1778,161 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
1778
1778
return number ;
1779
1779
}
1780
1780
1781
+ /* copy a decimal number string in form `decimal` and `tsep` and `sci` as
1782
+ decimal point, thousands separator and sci exponent character to a an
1783
+ equivalent c-locale decimal string (striping tsep, replacing `decimal`
1784
+ with '.'). Return NULL if nothing could be copied.
1785
+ */
1786
+
1787
+ char * str_copy_decimal_str_c (const char * s , char * * endpos , char decimal ,
1788
+ char tsep , char sci ) {
1789
+ #define IS_TSEP (c ) (tsep != '\0' && c == tsep)
1790
+ ssize_t size = 0 ;
1791
+ ssize_t num_digits = 0 ;
1792
+ char has_exponent = 0 ;
1793
+ const char * p = s ;
1794
+ // First count how many characters we can consume.
1795
+ // Leading sign
1796
+ if (* p == '+' || * p == '-' ) p ++ ;
1797
+ // Integer part
1798
+ while (isdigit_ascii (* p )) {
1799
+ p ++ ;
1800
+ p += IS_TSEP (* p );
1801
+ num_digits ++ ;
1802
+ }
1803
+ // Fractional part
1804
+ if (* p == decimal ) {
1805
+ p ++ ;
1806
+ while (isdigit_ascii (* p )) {
1807
+ p ++ ;
1808
+ num_digits ++ ;
1809
+ }
1810
+ }
1811
+ if (num_digits == 0 ) {
1812
+ if (endpos != NULL ) {
1813
+ * endpos = (char * )s ;
1814
+ }
1815
+ return NULL ;
1816
+ }
1817
+ // Exponent part
1818
+ if (toupper_ascii (* p ) == toupper_ascii (sci )) {
1819
+ const char * p_at_e = p ;
1820
+ num_digits = 0 ;
1821
+ p ++ ;
1822
+ // Exponent sign
1823
+ if (* p == '+' || * p == '-' ) p ++ ;
1824
+ // Exponent
1825
+ while (isdigit_ascii (* p )) {
1826
+ p ++ ;
1827
+ num_digits ++ ;
1828
+ }
1829
+ if (num_digits == 0 ) {
1830
+ // no digits after exponent; un-consume the (+|-)?
1831
+ p = p_at_e ;
1832
+ has_exponent = 0 ;
1833
+ } else {
1834
+ has_exponent = 1 ;
1835
+ }
1836
+ }
1837
+
1838
+ size = p - s ;
1839
+ char * pc = malloc (size + 1 );
1840
+ memcpy (pc , p , size );
1841
+ pc [size ] = '\0' ;
1842
+ char * dst = pc ;
1843
+ p = s ;
1844
+ num_digits = 0 ;
1845
+ // Copy leading sign
1846
+ if (* p == '+' || * p == '-' ) {
1847
+ * dst ++ = * p ++ ;
1848
+ }
1849
+ // Copy integer part
1850
+ while (isdigit_ascii (* p )) {
1851
+ * dst ++ = * p ++ ;
1852
+ p += IS_TSEP (* p );
1853
+ num_digits ++ ;
1854
+ }
1855
+ // Copy factional part, replacing `decimal` with '.'
1856
+ if (* p == decimal ) {
1857
+ * dst ++ = '.' ;
1858
+ p ++ ;
1859
+ while (isdigit_ascii (* p )) {
1860
+ * dst ++ = * p ++ ;
1861
+ num_digits ++ ;
1862
+ }
1863
+ }
1864
+ assert (num_digits > 0 );
1865
+ // Copy exponent
1866
+ if (has_exponent && toupper_ascii (* p ) == toupper_ascii (sci )) {
1867
+ num_digits = 0 ;
1868
+ * dst ++ = * p ++ ;
1869
+ // Copy leading exponent sign
1870
+ if (* p == '+' || * p == '-' ) {
1871
+ * dst ++ = * p ++ ;
1872
+ }
1873
+ // Exponent
1874
+ while (isdigit_ascii (* p )) {
1875
+ * dst ++ = * p ++ ;
1876
+ num_digits ++ ;
1877
+ }
1878
+ assert (num_digits > 0 );
1879
+ }
1880
+ * dst = '\0' ;
1881
+ if (endpos != NULL ) {
1882
+ * endpos = (char * )p ;
1883
+ }
1884
+ return pc ;
1885
+ #undef IS_TSEP
1886
+ }
1887
+
1781
1888
double round_trip (const char * p , char * * q , char decimal , char sci , char tsep ,
1782
1889
int skip_trailing , int * error , int * maybe_int ) {
1890
+ char * pc = NULL ;
1891
+ // 'normalize' representation to C-locale; replace decimal with '.' and
1892
+ // remove t(housand)sep.
1893
+ char * endptr = NULL ;
1894
+ if (decimal != '.' || tsep != '\0' ) {
1895
+ pc = str_copy_decimal_str_c (p , & endptr , decimal , tsep , sci );
1896
+ if (pc == NULL ) {
1897
+ if (q != NULL ) {
1898
+ * q = (char * )p ;
1899
+ }
1900
+ * error = -1 ;
1901
+ return 0.0 ;
1902
+ }
1903
+ }
1783
1904
// This is called from a nogil block in parsers.pyx
1784
1905
// so need to explicitly get GIL before Python calls
1785
1906
PyGILState_STATE gstate ;
1786
1907
gstate = PyGILState_Ensure ();
1787
-
1788
- double r = PyOS_string_to_double (p , q , 0 );
1908
+ double r ;
1909
+ if (pc != NULL ) {
1910
+ char * endpc = NULL ;
1911
+ r = PyOS_string_to_double (pc , & endpc , 0 );
1912
+ // PyOS_string_to_double needs to consume the whole string
1913
+ if (endpc == pc + strlen (pc )) {
1914
+ if (q != NULL ) {
1915
+ // report endptr from source string (p)
1916
+ * q = (char * ) endptr ;
1917
+ }
1918
+ } else {
1919
+ * error = -1 ;
1920
+ if (q != NULL ) {
1921
+ // p and pc are different len due to tsep removal. Can't report
1922
+ // how much it has consumed of p. Just rewind to beginning.
1923
+ * q = (char * )p ;
1924
+ }
1925
+ }
1926
+ } else {
1927
+ r = PyOS_string_to_double (p , q , 0 );
1928
+ }
1789
1929
if (maybe_int != NULL ) * maybe_int = 0 ;
1790
1930
if (PyErr_Occurred () != NULL ) * error = -1 ;
1791
1931
else if (r == Py_HUGE_VAL ) * error = (int )Py_HUGE_VAL ;
1792
1932
PyErr_Clear ();
1793
1933
1794
1934
PyGILState_Release (gstate );
1935
+ free (pc );
1795
1936
return r ;
1796
1937
}
1797
1938
0 commit comments