Skip to content

Commit 15eb9ca

Browse files
githeapWillAyd
authored andcommitted
ENH: Enable read_csv interpret 'Infinity' as floating point value #10065 (#28181)
1 parent 498f300 commit 15eb9ca

File tree

4 files changed

+50
-6
lines changed

4 files changed

+50
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ I/O
162162

163163
- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
164164
- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
165-
-
165+
- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
166166

167167
Plotting
168168
^^^^^^^^

pandas/_libs/parsers.pyx

+14-4
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,10 @@ cdef:
16931693
char* cposinf = b'+inf'
16941694
char* cneginf = b'-inf'
16951695

1696+
char* cinfty = b'Infinity'
1697+
char* cposinfty = b'+Infinity'
1698+
char* cneginfty = b'-Infinity'
1699+
16961700

16971701
cdef _try_double(parser_t *parser, int64_t col,
16981702
int64_t line_start, int64_t line_end,
@@ -1772,9 +1776,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
17721776
if error != 0 or p_end == word or p_end[0]:
17731777
error = 0
17741778
if (strcasecmp(word, cinf) == 0 or
1775-
strcasecmp(word, cposinf) == 0):
1779+
strcasecmp(word, cposinf) == 0 or
1780+
strcasecmp(word, cinfty) == 0 or
1781+
strcasecmp(word, cposinfty) == 0):
17761782
data[0] = INF
1777-
elif strcasecmp(word, cneginf) == 0:
1783+
elif (strcasecmp(word, cneginf) == 0 or
1784+
strcasecmp(word, cneginfty) == 0 ):
17781785
data[0] = NEGINF
17791786
else:
17801787
return 1
@@ -1793,9 +1800,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
17931800
if error != 0 or p_end == word or p_end[0]:
17941801
error = 0
17951802
if (strcasecmp(word, cinf) == 0 or
1796-
strcasecmp(word, cposinf) == 0):
1803+
strcasecmp(word, cposinf) == 0 or
1804+
strcasecmp(word, cinfty) == 0 or
1805+
strcasecmp(word, cposinfty) == 0):
17971806
data[0] = INF
1798-
elif strcasecmp(word, cneginf) == 0:
1807+
elif (strcasecmp(word, cneginf) == 0 or
1808+
strcasecmp(word, cneginfty) == 0):
17991809
data[0] = NEGINF
18001810
else:
18011811
return 1

pandas/_libs/src/parse_helper.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
5050
status = to_double(data, result, sci, dec, maybe_int);
5151

5252
if (!status) {
53-
/* handle inf/-inf */
53+
/* handle inf/-inf infinity/-infinity */
5454
if (strlen(data) == 3) {
5555
if (0 == strcasecmp(data, "inf")) {
5656
*result = HUGE_VAL;
@@ -68,6 +68,23 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
6868
} else {
6969
goto parsingerror;
7070
}
71+
} else if (strlen(data) == 8) {
72+
if (0 == strcasecmp(data, "infinity")) {
73+
*result = HUGE_VAL;
74+
*maybe_int = 0;
75+
} else {
76+
goto parsingerror;
77+
}
78+
} else if (strlen(data) == 9) {
79+
if (0 == strcasecmp(data, "-infinity")) {
80+
*result = -HUGE_VAL;
81+
*maybe_int = 0;
82+
} else if (0 == strcasecmp(data, "+infinity")) {
83+
*result = HUGE_VAL;
84+
*maybe_int = 0;
85+
} else {
86+
goto parsingerror;
87+
}
7188
} else {
7289
goto parsingerror;
7390
}

pandas/tests/io/parser/test_common.py

+17
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,23 @@ def test_inf_parsing(all_parsers, na_filter):
18651865
tm.assert_frame_equal(result, expected)
18661866

18671867

1868+
@pytest.mark.parametrize("na_filter", [True, False])
1869+
def test_infinity_parsing(all_parsers, na_filter):
1870+
parser = all_parsers
1871+
data = """\
1872+
,A
1873+
a,Infinity
1874+
b,-Infinity
1875+
c,+Infinity
1876+
"""
1877+
expected = DataFrame(
1878+
{"A": [float("infinity"), float("-infinity"), float("+infinity")]},
1879+
index=["a", "b", "c"],
1880+
)
1881+
result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
1882+
tm.assert_frame_equal(result, expected)
1883+
1884+
18681885
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
18691886
def test_raise_on_no_columns(all_parsers, nrows):
18701887
parser = all_parsers

0 commit comments

Comments
 (0)