@@ -1778,20 +1778,157 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
1778
1778
return number ;
1779
1779
}
1780
1780
1781
+ /* copy a decimal number string in form `decimal` and `tsep` and `sci` as
1782
+ decimal point, thousands separator and sci exponent character to a an
1783
+ equivalent c-locale decimal string (striping tsep, replacing `decimal`
1784
+ with '.'). Return NULL if nothing could be copied.
1785
+ */
1786
+
1787
+ char * str_copy_decimal_str_c (const char * s , char * * endpos , char decimal ,
1788
+ char tsep , char sci ) {
1789
+ #define IS_TSEP (c ) (tsep != '\0' && c == tsep)
1790
+ ssize_t size = 0 ;
1791
+ ssize_t num_digits = 0 ;
1792
+ char has_exponent = 0 ;
1793
+ const char * p = s ;
1794
+ // First count how many characters we can consume.
1795
+ // Leading sign
1796
+ if (* p == '+' || * p == '-' ) p ++ ;
1797
+ // Integer part
1798
+ while (isdigit_ascii (* p )) {
1799
+ p ++ ;
1800
+ p += IS_TSEP (* p );
1801
+ num_digits ++ ;
1802
+ }
1803
+ // Fractional part
1804
+ if (* p == decimal ) {
1805
+ p ++ ;
1806
+ while (isdigit_ascii (* p )) {
1807
+ p ++ ;
1808
+ p += IS_TSEP (* p );
1809
+ }
1810
+ }
1811
+ if (num_digits == 0 ) {
1812
+ if (endpos != NULL ) {
1813
+ * endpos = (char * )s ;
1814
+ }
1815
+ return NULL ;
1816
+ }
1817
+ // Exponent part
1818
+ if (toupper_ascii (* p ) == toupper_ascii (sci )) {
1819
+ const char * p_at_e = p ;
1820
+ num_digits = 0 ;
1821
+ p ++ ;
1822
+ // Exponent sign
1823
+ if (* p == '+' || * p == '-' ) p ++ ;
1824
+ // Exponent
1825
+ while (isdigit_ascii (* p )) {
1826
+ p ++ ;
1827
+ p += IS_TSEP (* p );
1828
+ num_digits ++ ;
1829
+ }
1830
+ if (num_digits == 0 ) {
1831
+ // no digits after exponent; un-consume the (+|-)?
1832
+ p = p_at_e ;
1833
+ has_exponent = 0 ;
1834
+ } else {
1835
+ has_exponent = 1 ;
1836
+ }
1837
+ }
1838
+
1839
+ size = p - s ;
1840
+ char * pc = malloc (size + 1 );
1841
+ memcpy (pc , p , size );
1842
+ pc [size ] = '\0' ;
1843
+ char * dst = pc ;
1844
+ p = s ;
1845
+ num_digits = 0 ;
1846
+ // Copy leading sign
1847
+ if (* p == '+' || * p == '-' ) {
1848
+ * dst ++ = * p ++ ;
1849
+ }
1850
+ // Copy integer part
1851
+ while (isdigit_ascii (* p )) {
1852
+ * dst ++ = * p ++ ;
1853
+ p += IS_TSEP (* p );
1854
+ num_digits ++ ;
1855
+ }
1856
+ // Copy factional part, replacing `decimal` with '.'
1857
+ if (* p == decimal ) {
1858
+ * dst ++ = '.' ;
1859
+ p ++ ;
1860
+ while (isdigit_ascii (* p )) {
1861
+ * dst ++ = * p ++ ;
1862
+ p += IS_TSEP (* p );
1863
+ num_digits ++ ;
1864
+ }
1865
+ }
1866
+ assert (num_digits > 0 );
1867
+ // Copy exponent
1868
+ if (has_exponent && toupper_ascii (* p ) == toupper_ascii (sci )) {
1869
+ num_digits = 0 ;
1870
+ * dst ++ = * p ++ ;
1871
+ // Copy leading exponent sign
1872
+ if (* p == '+' || * p == '-' ) {
1873
+ * dst ++ = * p ++ ;
1874
+ }
1875
+ // Exponent
1876
+ while (isdigit_ascii (* p )) {
1877
+ * dst ++ = * p ++ ;
1878
+ p += IS_TSEP (* p );
1879
+ num_digits ++ ;
1880
+ }
1881
+ assert (num_digits > 0 );
1882
+ }
1883
+ * dst = '\0' ;
1884
+ if (endpos != NULL ) {
1885
+ * endpos = (char * )p ;
1886
+ }
1887
+ return pc ;
1888
+ #undef IS_TSEP
1889
+ }
1890
+
1781
1891
double round_trip (const char * p , char * * q , char decimal , char sci , char tsep ,
1782
1892
int skip_trailing , int * error , int * maybe_int ) {
1893
+ char * pc = NULL ;
1894
+ // 'normalize' representation to C-locale; replace decimal with '.' and
1895
+ // remove t(housand)sep.
1896
+ char * endptr = NULL ;
1897
+ if (decimal != '.' || tsep != '\0' ) {
1898
+ pc = str_copy_decimal_str_c (p , & endptr , decimal , tsep , sci );
1899
+ }
1783
1900
// This is called from a nogil block in parsers.pyx
1784
1901
// so need to explicitly get GIL before Python calls
1785
1902
PyGILState_STATE gstate ;
1786
1903
gstate = PyGILState_Ensure ();
1787
-
1788
- double r = PyOS_string_to_double (p , q , 0 );
1904
+ double r ;
1905
+ if (pc != NULL ) {
1906
+ char * endpc = NULL ;
1907
+ r = PyOS_string_to_double (pc , & endpc , 0 );
1908
+ // PyOS_string_to_double needs to consume the whole string
1909
+ if (endpc == pc + strlen (pc )) {
1910
+ if (q != NULL ) {
1911
+ // report endptr from source string (p)
1912
+ * q = (char * ) endptr ;
1913
+ }
1914
+ } else {
1915
+ * error = -1 ;
1916
+ if (q != NULL ) {
1917
+ // p and pc are different len due to tsep removal. Can't report
1918
+ // how much it has consumed of p. Just rewind to beginning.
1919
+ * q = (char * )p ;
1920
+ }
1921
+ }
1922
+ } else {
1923
+ r = PyOS_string_to_double (p , q , 0 );
1924
+ }
1789
1925
if (maybe_int != NULL ) * maybe_int = 0 ;
1790
1926
if (PyErr_Occurred () != NULL ) * error = -1 ;
1791
1927
else if (r == Py_HUGE_VAL ) * error = (int )Py_HUGE_VAL ;
1792
1928
PyErr_Clear ();
1793
1929
1794
1930
PyGILState_Release (gstate );
1931
+ free (pc );
1795
1932
return r ;
1796
1933
}
1797
1934
0 commit comments