Skip to content

Commit 9e27a38

Browse files
committed
Merge branch 'master' into bugfix/replace_recursion
* master: BENCH: asv csv reading benchmarks no longer read StringIO objects off the end (pandas-dev#21807) BUG: df.agg, df.transform and df.apply use different methods when axis=1 than when axis=0 (pandas-dev#21224) BUG: bug in GroupBy.count where arg minlength passed to np.bincount must be None for np<1.13 (pandas-dev#21957) CLN: Vbench to asv conversion script (pandas-dev#22089) consistent docstring (pandas-dev#22066) TST: skip pytables test with not-updated pytables conda package (pandas-dev#22099) CLN: Remove Legacy MultiIndex Index Compatibility (pandas-dev#21740) DOC: Reword doc for filepath_or_buffer in read_csv (pandas-dev#22058) BUG: rolling with MSVC 2017 build (pandas-dev#21813)
2 parents 8271576 + 0b7a08b commit 9e27a38

23 files changed

+420
-527
lines changed

appveyor.yml

+2
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ environment:
2020
matrix:
2121

2222
- CONDA_ROOT: "C:\\Miniconda3_64"
23+
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
2324
PYTHON_VERSION: "3.6"
2425
PYTHON_ARCH: "64"
2526
CONDA_PY: "36"
2627
CONDA_NPY: "113"
2728

2829
- CONDA_ROOT: "C:\\Miniconda3_64"
30+
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
2931
PYTHON_VERSION: "2.7"
3032
PYTHON_ARCH: "64"
3133
CONDA_PY: "27"

asv_bench/benchmarks/io/csv.py

+32-20
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,14 @@ def time_frame_date_formatting(self):
5454
self.data.to_csv(self.fname, date_format='%Y%m%d')
5555

5656

57-
class ReadCSVDInferDatetimeFormat(object):
57+
class StringIORewind(object):
58+
59+
def data(self, stringio_object):
60+
stringio_object.seek(0)
61+
return stringio_object
62+
63+
64+
class ReadCSVDInferDatetimeFormat(StringIORewind):
5865

5966
goal_time = 0.2
6067
params = ([True, False], ['custom', 'iso8601', 'ymd'])
@@ -66,10 +73,12 @@ def setup(self, infer_datetime_format, format):
6673
'iso8601': '%Y-%m-%d %H:%M:%S',
6774
'ymd': '%Y%m%d'}
6875
dt_format = formats[format]
69-
self.data = StringIO('\n'.join(rng.strftime(dt_format).tolist()))
76+
self.StringIO_input = StringIO('\n'.join(
77+
rng.strftime(dt_format).tolist()))
7078

7179
def time_read_csv(self, infer_datetime_format, format):
72-
read_csv(self.data, header=None, names=['foo'], parse_dates=['foo'],
80+
read_csv(self.data(self.StringIO_input),
81+
header=None, names=['foo'], parse_dates=['foo'],
7382
infer_datetime_format=infer_datetime_format)
7483

7584

@@ -95,7 +104,7 @@ def time_skipprows(self, skiprows):
95104
read_csv(self.fname, skiprows=skiprows)
96105

97106

98-
class ReadUint64Integers(object):
107+
class ReadUint64Integers(StringIORewind):
99108

100109
goal_time = 0.2
101110

@@ -108,13 +117,13 @@ def setup(self):
108117
self.data2 = StringIO('\n'.join(arr.astype(str).tolist()))
109118

110119
def time_read_uint64(self):
111-
read_csv(self.data1, header=None, names=['foo'])
120+
read_csv(self.data(self.data1), header=None, names=['foo'])
112121

113122
def time_read_uint64_neg_values(self):
114-
read_csv(self.data2, header=None, names=['foo'])
123+
read_csv(self.data(self.data2), header=None, names=['foo'])
115124

116125
def time_read_uint64_na_values(self):
117-
read_csv(self.data1, header=None, names=['foo'],
126+
read_csv(self.data(self.data1), header=None, names=['foo'],
118127
na_values=self.na_values)
119128

120129

@@ -140,19 +149,20 @@ def time_thousands(self, sep, thousands):
140149
read_csv(self.fname, sep=sep, thousands=thousands)
141150

142151

143-
class ReadCSVComment(object):
152+
class ReadCSVComment(StringIORewind):
144153

145154
goal_time = 0.2
146155

147156
def setup(self):
148157
data = ['A,B,C'] + (['1,2,3 # comment'] * 100000)
149-
self.s_data = StringIO('\n'.join(data))
158+
self.StringIO_input = StringIO('\n'.join(data))
150159

151160
def time_comment(self):
152-
read_csv(self.s_data, comment='#', header=None, names=list('abc'))
161+
read_csv(self.data(self.StringIO_input), comment='#',
162+
header=None, names=list('abc'))
153163

154164

155-
class ReadCSVFloatPrecision(object):
165+
class ReadCSVFloatPrecision(StringIORewind):
156166

157167
goal_time = 0.2
158168
params = ([',', ';'], ['.', '_'], [None, 'high', 'round_trip'])
@@ -164,14 +174,14 @@ def setup(self, sep, decimal, float_precision):
164174
rows = sep.join(['0{}'.format(decimal) + '{}'] * 3) + '\n'
165175
data = rows * 5
166176
data = data.format(*floats) * 200 # 1000 x 3 strings csv
167-
self.s_data = StringIO(data)
177+
self.StringIO_input = StringIO(data)
168178

169179
def time_read_csv(self, sep, decimal, float_precision):
170-
read_csv(self.s_data, sep=sep, header=None, names=list('abc'),
171-
float_precision=float_precision)
180+
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
181+
names=list('abc'), float_precision=float_precision)
172182

173183
def time_read_csv_python_engine(self, sep, decimal, float_precision):
174-
read_csv(self.s_data, sep=sep, header=None, engine='python',
184+
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
175185
float_precision=None, names=list('abc'))
176186

177187

@@ -193,7 +203,7 @@ def time_convert_direct(self):
193203
read_csv(self.fname, dtype='category')
194204

195205

196-
class ReadCSVParseDates(object):
206+
class ReadCSVParseDates(StringIORewind):
197207

198208
goal_time = 0.2
199209

@@ -206,12 +216,14 @@ def setup(self):
206216
"""
207217
two_cols = ['KORD,19990127'] * 5
208218
data = data.format(*two_cols)
209-
self.s_data = StringIO(data)
219+
self.StringIO_input = StringIO(data)
210220

211221
def time_multiple_date(self):
212-
read_csv(self.s_data, sep=',', header=None,
213-
names=list(string.digits[:9]), parse_dates=[[1, 2], [1, 3]])
222+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
223+
names=list(string.digits[:9]),
224+
parse_dates=[[1, 2], [1, 3]])
214225

215226
def time_baseline(self):
216-
read_csv(self.s_data, sep=',', header=None, parse_dates=[1],
227+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
228+
parse_dates=[1],
217229
names=list(string.digits[:9]))

asv_bench/vbench_to_asv.py

-163
This file was deleted.

doc/source/whatsnew/v0.23.4.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ and bug fixes. We recommend that all users upgrade to this version.
2020
Fixed Regressions
2121
~~~~~~~~~~~~~~~~~
2222

23-
-
23+
- Python 3.7 with Windows gave all missing values for rolling variance calculations (:issue:`21813`)
2424
-
2525

2626
.. _whatsnew_0234.bug_fixes:

doc/source/whatsnew/v0.24.0.txt

+7-5
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,9 @@ Numeric
475475
- Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`)
476476
- Bug in :func:`factorize` fails with read-only array (:issue:`12813`)
477477
- Fixed bug in :func:`unique` handled signed zeros inconsistently: for some inputs 0.0 and -0.0 were treated as equal and for some inputs as different. Now they are treated as equal for all inputs (:issue:`21866`)
478-
-
478+
- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` where,
479+
when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``),
480+
a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
479481
-
480482

481483
Strings
@@ -514,7 +516,7 @@ Missing
514516
MultiIndex
515517
^^^^^^^^^^
516518

517-
-
519+
- Removed compatibility for MultiIndex pickles prior to version 0.8.0; compatibility with MultiIndex pickles from version 0.13 forward is maintained (:issue:`21654`)
518520
-
519521
-
520522

@@ -536,11 +538,11 @@ Groupby/Resample/Rolling
536538

537539
- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
538540
- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`)
539-
-
540-
-
541-
541+
- Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a
542+
``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`).
542543
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
543544
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
545+
-
544546

545547
Sparse
546548
^^^^^^

pandas/_libs/src/headers/cmath

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#if defined(_MSC_VER) && (_MSC_VER < 1800)
77
#include <cmath>
88
namespace std {
9+
__inline int isnan(double x) { return _isnan(x); }
910
__inline int signbit(double num) { return _copysign(1.0, num) < 0; }
1011
}
1112
#else

pandas/_libs/window.pyx

+11-10
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ cnp.import_array()
1414

1515

1616
cdef extern from "src/headers/cmath" namespace "std":
17+
bint isnan(double) nogil
1718
int signbit(double) nogil
1819
double sqrt(double x) nogil
1920

@@ -653,16 +654,16 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
653654
double *ssqdm_x) nogil:
654655
""" add a value from the var calc """
655656
cdef double delta
656-
657-
# Not NaN
658-
if val == val:
659-
nobs[0] = nobs[0] + 1
660-
661-
# a part of Welford's method for the online variance-calculation
662-
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
663-
delta = val - mean_x[0]
664-
mean_x[0] = mean_x[0] + delta / nobs[0]
665-
ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
657+
# `isnan` instead of equality as fix for GH-21813, msvc 2017 bug
658+
if isnan(val):
659+
return
660+
661+
nobs[0] = nobs[0] + 1
662+
# a part of Welford's method for the online variance-calculation
663+
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
664+
delta = val - mean_x[0]
665+
mean_x[0] = mean_x[0] + delta / nobs[0]
666+
ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
666667

667668

668669
cdef inline void remove_var(double val, double *nobs, double *mean_x,

0 commit comments

Comments
 (0)