Skip to content

Commit 8b6a462

Browse files
author
Marco Gorelli
committed
Pull latest changes
2 parents 96cfb08 + 4566850 commit 8b6a462

File tree

13 files changed

+78
-16
lines changed

13 files changed

+78
-16
lines changed

asv_bench/benchmarks/rolling.py

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def setup(self, constructor, window, dtype, method):
2121
def time_rolling(self, constructor, window, dtype, method):
2222
getattr(self.roll, method)()
2323

24+
def peakmem_rolling(self, constructor, window, dtype, method):
25+
getattr(self.roll, method)()
26+
2427

2528
class ExpandingMethods:
2629

doc/source/reference/plotting.rst

+4
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ The following functions are contained in the `pandas.plotting` module.
1313
:toctree: api/
1414

1515
andrews_curves
16+
autocorrelation_plot
1617
bootstrap_plot
18+
boxplot
1719
deregister_matplotlib_converters
1820
lag_plot
1921
parallel_coordinates
22+
plot_params
2023
radviz
2124
register_matplotlib_converters
2225
scatter_matrix
26+
table

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ I/O
162162

163163
- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
164164
- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
165-
-
165+
- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
166166

167167
Plotting
168168
^^^^^^^^

pandas/_libs/index.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datetime import datetime, timedelta, date
2+
import warnings
23

34
import cython
45

pandas/_libs/index_class_helper.pxi.in

+10-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,16 @@ cdef class {{name}}Engine(IndexEngine):
6060

6161
# A view is needed for some subclasses, such as PeriodEngine:
6262
values = self._get_index_values().view('{{dtype}}')
63-
indexer = values == val
63+
try:
64+
with warnings.catch_warnings():
65+
# e.g. if values is float64 and `val` is a str, suppress warning
66+
warnings.filterwarnings("ignore", category=FutureWarning)
67+
indexer = values == val
68+
except TypeError:
69+
# if the equality above returns a bool, cython will raise TypeError
70+
# when trying to cast it to ndarray
71+
raise KeyError(val)
72+
6473
found = np.where(indexer)[0]
6574
count = len(found)
6675

pandas/_libs/parsers.pyx

+14-4
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,10 @@ cdef:
16931693
char* cposinf = b'+inf'
16941694
char* cneginf = b'-inf'
16951695

1696+
char* cinfty = b'Infinity'
1697+
char* cposinfty = b'+Infinity'
1698+
char* cneginfty = b'-Infinity'
1699+
16961700

16971701
cdef _try_double(parser_t *parser, int64_t col,
16981702
int64_t line_start, int64_t line_end,
@@ -1772,9 +1776,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
17721776
if error != 0 or p_end == word or p_end[0]:
17731777
error = 0
17741778
if (strcasecmp(word, cinf) == 0 or
1775-
strcasecmp(word, cposinf) == 0):
1779+
strcasecmp(word, cposinf) == 0 or
1780+
strcasecmp(word, cinfty) == 0 or
1781+
strcasecmp(word, cposinfty) == 0):
17761782
data[0] = INF
1777-
elif strcasecmp(word, cneginf) == 0:
1783+
elif (strcasecmp(word, cneginf) == 0 or
1784+
strcasecmp(word, cneginfty) == 0 ):
17781785
data[0] = NEGINF
17791786
else:
17801787
return 1
@@ -1793,9 +1800,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
17931800
if error != 0 or p_end == word or p_end[0]:
17941801
error = 0
17951802
if (strcasecmp(word, cinf) == 0 or
1796-
strcasecmp(word, cposinf) == 0):
1803+
strcasecmp(word, cposinf) == 0 or
1804+
strcasecmp(word, cinfty) == 0 or
1805+
strcasecmp(word, cposinfty) == 0):
17971806
data[0] = INF
1798-
elif strcasecmp(word, cneginf) == 0:
1807+
elif (strcasecmp(word, cneginf) == 0 or
1808+
strcasecmp(word, cneginfty) == 0):
17991809
data[0] = NEGINF
18001810
else:
18011811
return 1

pandas/_libs/src/parse_helper.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
5050
status = to_double(data, result, sci, dec, maybe_int);
5151

5252
if (!status) {
53-
/* handle inf/-inf */
53+
/* handle inf/-inf infinity/-infinity */
5454
if (strlen(data) == 3) {
5555
if (0 == strcasecmp(data, "inf")) {
5656
*result = HUGE_VAL;
@@ -68,6 +68,23 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
6868
} else {
6969
goto parsingerror;
7070
}
71+
} else if (strlen(data) == 8) {
72+
if (0 == strcasecmp(data, "infinity")) {
73+
*result = HUGE_VAL;
74+
*maybe_int = 0;
75+
} else {
76+
goto parsingerror;
77+
}
78+
} else if (strlen(data) == 9) {
79+
if (0 == strcasecmp(data, "-infinity")) {
80+
*result = -HUGE_VAL;
81+
*maybe_int = 0;
82+
} else if (0 == strcasecmp(data, "+infinity")) {
83+
*result = HUGE_VAL;
84+
*maybe_int = 0;
85+
} else {
86+
goto parsingerror;
87+
}
7188
} else {
7289
goto parsingerror;
7390
}

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ def _decide_output_index(self, output, labels):
349349
output_keys = sorted(output)
350350
try:
351351
output_keys.sort()
352-
except Exception: # pragma: no cover
352+
except TypeError:
353353
pass
354354

355355
if isinstance(labels, MultiIndex):

pandas/core/groupby/groupby.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -727,8 +727,7 @@ def f(g):
727727
with option_context("mode.chained_assignment", None):
728728
try:
729729
result = self._python_apply_general(f)
730-
except Exception:
731-
730+
except TypeError:
732731
# gh-20949
733732
# try again, with .apply acting as a filtering
734733
# operation, by excluding the grouping column

pandas/core/groupby/grouper.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -592,9 +592,11 @@ def is_in_axis(key):
592592

593593
# if the grouper is obj[name]
594594
def is_in_obj(gpr):
595+
if not hasattr(gpr, "name"):
596+
return False
595597
try:
596-
return id(gpr) == id(obj[gpr.name])
597-
except Exception:
598+
return gpr is obj[gpr.name]
599+
except (KeyError, IndexError):
598600
return False
599601

600602
for i, (gpr, level) in enumerate(zip(keys, levels)):

pandas/core/groupby/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@ def apply(self, f, data, axis=0):
212212
# This Exception is also raised if `f` triggers an exception
213213
# but it is preferable to raise the exception in Python.
214214
pass
215-
except Exception:
216-
# raise this error to the caller
215+
except TypeError:
216+
# occurs if we have any EAs
217217
pass
218218

219219
for key, (i, group) in zip(group_keys, splitter):

pandas/plotting/_misc.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,8 @@ def autocorrelation_plot(series, ax=None, **kwds):
417417
418418
Parameters
419419
----------
420-
series: Time series
421-
ax: Matplotlib axis object, optional
420+
series : Time series
421+
ax : Matplotlib axis object, optional
422422
kwds : keywords
423423
Options to pass to matplotlib plotting method
424424

pandas/tests/io/parser/test_common.py

+17
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,23 @@ def test_inf_parsing(all_parsers, na_filter):
18651865
tm.assert_frame_equal(result, expected)
18661866

18671867

1868+
@pytest.mark.parametrize("na_filter", [True, False])
1869+
def test_infinity_parsing(all_parsers, na_filter):
1870+
parser = all_parsers
1871+
data = """\
1872+
,A
1873+
a,Infinity
1874+
b,-Infinity
1875+
c,+Infinity
1876+
"""
1877+
expected = DataFrame(
1878+
{"A": [float("infinity"), float("-infinity"), float("+infinity")]},
1879+
index=["a", "b", "c"],
1880+
)
1881+
result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
1882+
tm.assert_frame_equal(result, expected)
1883+
1884+
18681885
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
18691886
def test_raise_on_no_columns(all_parsers, nrows):
18701887
parser = all_parsers

0 commit comments

Comments
 (0)