Skip to content

Commit 465c7e2

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into tslibs-offsets6
2 parents f1988cf + 3d44221 commit 465c7e2

File tree

12 files changed

+231
-33
lines changed

12 files changed

+231
-33
lines changed

doc/source/whatsnew/v0.21.1.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ Indexing
7373
- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`)
7474
- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`)
7575
- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`)
76-
-
76+
- Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`)
7777
-
7878

7979
I/O
@@ -103,7 +103,7 @@ Groupby/Resample/Rolling
103103
- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
104104
- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`)
105105
- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
106-
-
106+
- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`)
107107
-
108108
-
109109

doc/source/whatsnew/v0.22.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ Other API Changes
7979
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
8080
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
8181
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
82-
82+
- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
8383

8484
.. _whatsnew_0220.deprecations:
8585

@@ -144,6 +144,7 @@ Indexing
144144
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
145145
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
146146
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
147+
- Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`)
147148
- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`)
148149
-
149150

pandas/_libs/tslibs/offsets.pyx

+28-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def _validate_business_time(t_input):
259259
# ---------------------------------------------------------------------
260260
# Constructor Helpers
261261

262-
_rd_kwds = set([
262+
relativedelta_kwds = set([
263263
'years', 'months', 'weeks', 'days',
264264
'year', 'month', 'week', 'day', 'weekday',
265265
'hour', 'minute', 'second', 'microsecond',
@@ -404,6 +404,33 @@ class _BaseOffset(object):
404404
# will raise NotImplementedError.
405405
return get_day_of_month(other, self._day_opt)
406406

407+
def _validate_n(self, n):
408+
"""
409+
Require that `n` be a nonzero integer.
410+
411+
Parameters
412+
----------
413+
n : int
414+
415+
Returns
416+
-------
417+
nint : int
418+
419+
Raises
420+
------
421+
TypeError if `int(n)` raises
422+
ValueError if n != int(n)
423+
"""
424+
try:
425+
nint = int(n)
426+
except (ValueError, TypeError):
427+
raise TypeError('`n` argument must be an integer, '
428+
'got {ntype}'.format(ntype=type(n)))
429+
if n != nint:
430+
raise ValueError('`n` argument must be an integer, '
431+
'got {n}'.format(n=n))
432+
return nint
433+
407434

408435
class BaseOffset(_BaseOffset):
409436
# Here we add __rfoo__ methods that don't play well with cdef classes

pandas/_libs/window.pyx

+17-8
Original file line numberDiff line numberDiff line change
@@ -661,9 +661,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
661661
if val == val:
662662
nobs[0] = nobs[0] + 1
663663

664-
delta = (val - mean_x[0])
664+
# a part of Welford's method for the online variance-calculation
665+
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
666+
delta = val - mean_x[0]
665667
mean_x[0] = mean_x[0] + delta / nobs[0]
666-
ssqdm_x[0] = ssqdm_x[0] + delta * (val - mean_x[0])
668+
ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
667669

668670

669671
cdef inline void remove_var(double val, double *nobs, double *mean_x,
@@ -675,9 +677,11 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x,
675677
if val == val:
676678
nobs[0] = nobs[0] - 1
677679
if nobs[0]:
678-
delta = (val - mean_x[0])
680+
# a part of Welford's method for the online variance-calculation
681+
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
682+
delta = val - mean_x[0]
679683
mean_x[0] = mean_x[0] - delta / nobs[0]
680-
ssqdm_x[0] = ssqdm_x[0] - delta * (val - mean_x[0])
684+
ssqdm_x[0] = ssqdm_x[0] - ((nobs[0] + 1) * delta ** 2) / nobs[0]
681685
else:
682686
mean_x[0] = 0
683687
ssqdm_x[0] = 0
@@ -689,7 +693,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
689693
Numerically stable implementation using Welford's method.
690694
"""
691695
cdef:
692-
double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta
696+
double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old
693697
int64_t s, e
694698
bint is_variable
695699
Py_ssize_t i, j, N
@@ -749,6 +753,9 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
749753
add_var(input[i], &nobs, &mean_x, &ssqdm_x)
750754
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
751755

756+
# a part of Welford's method for the online variance-calculation
757+
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
758+
752759
# After the first window, observations can both be added and
753760
# removed
754761
for i from win <= i < N:
@@ -760,10 +767,12 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp,
760767

761768
# Adding one observation and removing another one
762769
delta = val - prev
763-
prev -= mean_x
770+
mean_x_old = mean_x
771+
764772
mean_x += delta / nobs
765-
val -= mean_x
766-
ssqdm_x += (val + prev) * delta
773+
ssqdm_x += ((nobs - 1) * val
774+
+ (nobs + 1) * prev
775+
- 2 * nobs * mean_x_old) * delta / nobs
767776

768777
else:
769778
add_var(val, &nobs, &mean_x, &ssqdm_x)

pandas/core/indexes/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1939,7 +1939,10 @@ def putmask(self, mask, value):
19391939
try:
19401940
np.putmask(values, mask, self._convert_for_op(value))
19411941
return self._shallow_copy(values)
1942-
except (ValueError, TypeError):
1942+
except (ValueError, TypeError) as err:
1943+
if is_object_dtype(self):
1944+
raise err
1945+
19431946
# coerces to object
19441947
return self.astype(object).putmask(mask, value)
19451948

pandas/core/indexes/multi.py

+15
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,11 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
11621162
MultiIndex.from_product : Make a MultiIndex from cartesian product
11631163
of iterables
11641164
"""
1165+
if not is_list_like(arrays):
1166+
raise TypeError("Input must be a list / sequence of array-likes.")
1167+
elif is_iterator(arrays):
1168+
arrays = list(arrays)
1169+
11651170
# Check if lengths of all arrays are equal or not,
11661171
# raise ValueError, if not
11671172
for i in range(1, len(arrays)):
@@ -1206,6 +1211,11 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
12061211
MultiIndex.from_product : Make a MultiIndex from cartesian product
12071212
of iterables
12081213
"""
1214+
if not is_list_like(tuples):
1215+
raise TypeError('Input must be a list / sequence of tuple-likes.')
1216+
elif is_iterator(tuples):
1217+
tuples = list(tuples)
1218+
12091219
if len(tuples) == 0:
12101220
if names is None:
12111221
msg = 'Cannot infer number of levels from empty list'
@@ -1260,6 +1270,11 @@ def from_product(cls, iterables, sortorder=None, names=None):
12601270
from pandas.core.categorical import _factorize_from_iterables
12611271
from pandas.core.reshape.util import cartesian_product
12621272

1273+
if not is_list_like(iterables):
1274+
raise TypeError("Input must be a list / sequence of iterables.")
1275+
elif is_iterator(iterables):
1276+
iterables = list(iterables)
1277+
12631278
labels, levels = _factorize_from_iterables(iterables)
12641279
labels = cartesian_product(labels)
12651280
return MultiIndex(levels, labels, sortorder=sortorder, names=names)

pandas/tests/indexes/common.py

+13
Original file line numberDiff line numberDiff line change
@@ -1032,3 +1032,16 @@ def test_map(self):
10321032

10331033
dict_map = {}
10341034
tm.assert_index_equal(index.map(dict_map), nan_index)
1035+
1036+
def test_putmask_with_wrong_mask(self):
1037+
# GH18368
1038+
index = self.create_index()
1039+
1040+
with pytest.raises(ValueError):
1041+
index.putmask(np.ones(len(index) + 1, np.bool), 1)
1042+
1043+
with pytest.raises(ValueError):
1044+
index.putmask(np.ones(len(index) - 1, np.bool), 1)
1045+
1046+
with pytest.raises(ValueError):
1047+
index.putmask('foo', 1)

pandas/tests/indexes/test_multi.py

+59-5
Original file line numberDiff line numberDiff line change
@@ -672,15 +672,31 @@ def test_from_arrays(self):
672672
for lev, lab in zip(self.index.levels, self.index.labels):
673673
arrays.append(np.asarray(lev).take(lab))
674674

675-
result = MultiIndex.from_arrays(arrays)
676-
assert list(result) == list(self.index)
675+
# list of arrays as input
676+
result = MultiIndex.from_arrays(arrays, names=self.index.names)
677+
tm.assert_index_equal(result, self.index)
677678

678679
# infer correctly
679680
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
680681
['a', 'b']])
681682
assert result.levels[0].equals(Index([Timestamp('20130101')]))
682683
assert result.levels[1].equals(Index(['a', 'b']))
683684

685+
def test_from_arrays_iterator(self):
686+
# GH 18434
687+
arrays = []
688+
for lev, lab in zip(self.index.levels, self.index.labels):
689+
arrays.append(np.asarray(lev).take(lab))
690+
691+
# iterator as input
692+
result = MultiIndex.from_arrays(iter(arrays), names=self.index.names)
693+
tm.assert_index_equal(result, self.index)
694+
695+
# invalid iterator input
696+
with tm.assert_raises_regex(
697+
TypeError, "Input must be a list / sequence of array-likes."):
698+
MultiIndex.from_arrays(0)
699+
684700
def test_from_arrays_index_series_datetimetz(self):
685701
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
686702
tz='US/Eastern')
@@ -825,7 +841,25 @@ def test_from_product(self):
825841
expected = MultiIndex.from_tuples(tuples, names=names)
826842

827843
tm.assert_index_equal(result, expected)
828-
assert result.names == names
844+
845+
def test_from_product_iterator(self):
846+
# GH 18434
847+
first = ['foo', 'bar', 'buz']
848+
second = ['a', 'b', 'c']
849+
names = ['first', 'second']
850+
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
851+
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
852+
('buz', 'c')]
853+
expected = MultiIndex.from_tuples(tuples, names=names)
854+
855+
# iterator as input
856+
result = MultiIndex.from_product(iter([first, second]), names=names)
857+
tm.assert_index_equal(result, expected)
858+
859+
# Invalid non-iterable input
860+
with tm.assert_raises_regex(
861+
TypeError, "Input must be a list / sequence of iterables."):
862+
MultiIndex.from_product(0)
829863

830864
def test_from_product_empty(self):
831865
# 0 levels
@@ -1725,8 +1759,28 @@ def test_from_tuples(self):
17251759
'from empty list',
17261760
MultiIndex.from_tuples, [])
17271761

1728-
idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
1729-
assert len(idx) == 2
1762+
expected = MultiIndex(levels=[[1, 3], [2, 4]],
1763+
labels=[[0, 1], [0, 1]],
1764+
names=['a', 'b'])
1765+
1766+
# input tuples
1767+
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
1768+
tm.assert_index_equal(result, expected)
1769+
1770+
def test_from_tuples_iterator(self):
1771+
# GH 18434
1772+
# input iterator for tuples
1773+
expected = MultiIndex(levels=[[1, 3], [2, 4]],
1774+
labels=[[0, 1], [0, 1]],
1775+
names=['a', 'b'])
1776+
1777+
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
1778+
tm.assert_index_equal(result, expected)
1779+
1780+
# input non-iterables
1781+
with tm.assert_raises_regex(
1782+
TypeError, 'Input must be a list / sequence of tuple-likes.'):
1783+
MultiIndex.from_tuples(0)
17301784

17311785
def test_from_tuples_empty(self):
17321786
# GH 16777

pandas/tests/test_window.py

+8
Original file line numberDiff line numberDiff line change
@@ -2482,6 +2482,14 @@ def test_rolling_corr_pairwise(self):
24822482
self._check_pairwise_moment('rolling', 'corr', window=10,
24832483
min_periods=5)
24842484

2485+
@pytest.mark.parametrize('window', range(7))
2486+
def test_rolling_corr_with_zero_variance(self, window):
2487+
# GH 18430
2488+
s = pd.Series(np.zeros(20))
2489+
other = pd.Series(np.arange(20))
2490+
2491+
assert s.rolling(window=window).corr(other=other).isna().all()
2492+
24852493
def _check_pairwise_moment(self, dispatch, name, **kwargs):
24862494
def get_result(obj, obj2=None):
24872495
return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2)

pandas/tests/tseries/offsets/conftest.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ def offset_types(request):
77
return request.param
88

99

10+
@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if
11+
issubclass(getattr(offsets, o), offsets.MonthOffset)
12+
and o != 'MonthOffset'])
13+
def month_classes(request):
14+
return request.param
15+
16+
17+
@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__ if
18+
issubclass(getattr(offsets, o), offsets.Tick)])
19+
def tick_classes(request):
20+
return request.param
21+
22+
1023
@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
1124
'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'])
1225
def tz(request):

pandas/tests/tseries/offsets/test_offsets.py

+37
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
get_offset, get_standard_freq)
1818
from pandas.core.indexes.datetimes import (
1919
_to_m8, DatetimeIndex, _daterange_cache)
20+
import pandas._libs.tslibs.offsets as liboffsets
2021
from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset
2122
from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd,
2223
BusinessHour, WeekOfMonth, CBMonthEnd,
@@ -4682,9 +4683,45 @@ def test_all_offset_classes(self, tup):
46824683
assert first == second
46834684

46844685

4686+
# ---------------------------------------------------------------------
46854687
def test_get_offset_day_error():
46864688
# subclass of _BaseOffset must override _day_opt attribute, or we should
46874689
# get a NotImplementedError
46884690

46894691
with pytest.raises(NotImplementedError):
46904692
DateOffset()._get_offset_day(datetime.now())
4693+
4694+
4695+
@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds)))
4696+
def test_valid_month_attributes(kwd, month_classes):
4697+
# GH#18226
4698+
cls = month_classes
4699+
# check that we cannot create e.g. MonthEnd(weeks=3)
4700+
with pytest.raises(TypeError):
4701+
cls(**{kwd: 3})
4702+
4703+
4704+
@pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds)))
4705+
def test_valid_tick_attributes(kwd, tick_classes):
4706+
# GH#18226
4707+
cls = tick_classes
4708+
# check that we cannot create e.g. Hour(weeks=3)
4709+
with pytest.raises(TypeError):
4710+
cls(**{kwd: 3})
4711+
4712+
4713+
def test_validate_n_error():
4714+
with pytest.raises(TypeError):
4715+
DateOffset(n='Doh!')
4716+
4717+
with pytest.raises(TypeError):
4718+
MonthBegin(n=timedelta(1))
4719+
4720+
with pytest.raises(TypeError):
4721+
BDay(n=np.array([1, 2], dtype=np.int64))
4722+
4723+
4724+
def test_require_integers(offset_types):
4725+
cls = offset_types
4726+
with pytest.raises(ValueError):
4727+
cls(n=1.5)

0 commit comments

Comments
 (0)