32-bit compat issues in MacPython/pandas-wheels #34410

TomAugspurger · 2020-05-27T16:29:45Z

https://dev.azure.com/pandas-dev/pandas-wheels/_build/results?buildId=36222&view=logs&j=c0130b29-789d-5a3c-6978-10796a508a7f&t=e120bc6c-1f5e-5a41-8f0a-1d992cd2fbfb&l=1400

A couple classes of failures

2020-05-27T03:46:00.4640450Z ______________ test_multiple_agg_funcs[rolling-2-expected_vals0] ______________
2020-05-27T03:46:00.4640874Z [gw0] win32 -- Python 3.7.7 D:\a\1\s\test_venv\Scripts\python.exe
2020-05-27T03:46:00.4641216Z 
2020-05-27T03:46:00.4641446Z func = 'rolling', window_size = 2
2020-05-27T03:46:00.4641974Z expected_vals = [[nan, nan, nan, nan], [15.0, 20.0, 25.0, 20.0], [25.0, 30.0, 35.0, 30.0], [nan, nan, nan, nan], [20.0, 30.0, 35.0, 30.0], [35.0, 40.0, 60.0, 40.0], ...]
2020-05-27T03:46:00.4642426Z 
2020-05-27T03:46:00.4642646Z     @pytest.mark.parametrize(
2020-05-27T03:46:00.4642970Z         "func,window_size,expected_vals",
2020-05-27T03:46:00.4643468Z         [
2020-05-27T03:46:00.4643722Z             (
2020-05-27T03:46:00.4643997Z                 "rolling",
2020-05-27T03:46:00.4644235Z                 2,
2020-05-27T03:46:00.4644500Z                 [
2020-05-27T03:46:00.4644818Z                     [np.nan, np.nan, np.nan, np.nan],
2020-05-27T03:46:00.4645140Z                     [15.0, 20.0, 25.0, 20.0],
2020-05-27T03:46:00.4645482Z                     [25.0, 30.0, 35.0, 30.0],
2020-05-27T03:46:00.4645799Z                     [np.nan, np.nan, np.nan, np.nan],
2020-05-27T03:46:00.4646157Z                     [20.0, 30.0, 35.0, 30.0],
2020-05-27T03:46:00.4646503Z                     [35.0, 40.0, 60.0, 40.0],
2020-05-27T03:46:00.4646811Z                     [60.0, 80.0, 85.0, 80],
2020-05-27T03:46:00.4647113Z                 ],
2020-05-27T03:46:00.4647372Z             ),
2020-05-27T03:46:00.4647589Z             (
2020-05-27T03:46:00.4647855Z                 "expanding",
2020-05-27T03:46:00.4648104Z                 None,
2020-05-27T03:46:00.4648376Z                 [
2020-05-27T03:46:00.4648680Z                     [10.0, 10.0, 20.0, 20.0],
2020-05-27T03:46:00.4648983Z                     [15.0, 20.0, 25.0, 20.0],
2020-05-27T03:46:00.4649326Z                     [20.0, 30.0, 30.0, 20.0],
2020-05-27T03:46:00.4649670Z                     [10.0, 10.0, 30.0, 30.0],
2020-05-27T03:46:00.4649972Z                     [20.0, 30.0, 35.0, 30.0],
2020-05-27T03:46:00.4650322Z                     [26.666667, 40.0, 50.0, 30.0],
2020-05-27T03:46:00.4650683Z                     [40.0, 80.0, 60.0, 30.0],
2020-05-27T03:46:00.4650946Z                 ],
2020-05-27T03:46:00.4651208Z             ),
2020-05-27T03:46:00.4651414Z         ],
2020-05-27T03:46:00.4651646Z     )
2020-05-27T03:46:00.4651997Z     def test_multiple_agg_funcs(func, window_size, expected_vals):
2020-05-27T03:46:00.4652328Z         # GH 15072
2020-05-27T03:46:00.4652605Z         df = pd.DataFrame(
2020-05-27T03:46:00.4652839Z             [
2020-05-27T03:46:00.4653114Z                 ["A", 10, 20],
2020-05-27T03:46:00.4653414Z                 ["A", 20, 30],
2020-05-27T03:46:00.4653669Z                 ["A", 30, 40],
2020-05-27T03:46:00.4653966Z                 ["B", 10, 30],
2020-05-27T03:46:00.4654266Z                 ["B", 30, 40],
2020-05-27T03:46:00.4654520Z                 ["B", 40, 80],
2020-05-27T03:46:00.4654822Z                 ["B", 80, 90],
2020-05-27T03:46:00.4655056Z             ],
2020-05-27T03:46:00.4655354Z             columns=["stock", "low", "high"],
2020-05-27T03:46:00.4655642Z         )
2020-05-27T03:46:00.4655833Z     
2020-05-27T03:46:00.4656116Z         f = getattr(df.groupby("stock"), func)
2020-05-27T03:46:00.4656545Z         if window_size:
2020-05-27T03:46:00.4656817Z             window = f(window_size)
2020-05-27T03:46:00.4657104Z         else:
2020-05-27T03:46:00.4657337Z             window = f()
2020-05-27T03:46:00.4657591Z     
2020-05-27T03:46:00.4657872Z         index = pd.MultiIndex.from_tuples(
2020-05-27T03:46:00.4658235Z             [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
2020-05-27T03:46:00.4658625Z             names=["stock", None],
2020-05-27T03:46:00.4658902Z         )
2020-05-27T03:46:00.4659155Z         columns = pd.MultiIndex.from_tuples(
2020-05-27T03:46:00.4659548Z             [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
2020-05-27T03:46:00.4659860Z         )
2020-05-27T03:46:00.4660195Z         expected = pd.DataFrame(expected_vals, index=index, columns=columns)
2020-05-27T03:46:00.4660526Z     
2020-05-27T03:46:00.4660750Z         result = window.agg(
2020-05-27T03:46:00.4661122Z >           OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"])))
2020-05-27T03:46:00.4661464Z         )
2020-05-27T03:46:00.4661630Z 
2020-05-27T03:46:00.4661965Z test_venv\lib\site-packages\pandas\tests\window\test_api.py:346: 
2020-05-27T03:46:00.4662381Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4662906Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2033: in aggregate
2020-05-27T03:46:00.4663339Z     return super().aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4663730Z test_venv\lib\site-packages\pandas\core\window\rolling.py:603: in aggregate
2020-05-27T03:46:00.4664169Z     result, how = self._aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4664599Z test_venv\lib\site-packages\pandas\core\base.py:417: in _aggregate
2020-05-27T03:46:00.4664941Z     result = _agg(arg, _agg_1dim)
2020-05-27T03:46:00.4665315Z test_venv\lib\site-packages\pandas\core\base.py:384: in _agg
2020-05-27T03:46:00.4665658Z     result[fname] = func(fname, agg_how)
2020-05-27T03:46:00.4666056Z test_venv\lib\site-packages\pandas\core\base.py:368: in _agg_1dim
2020-05-27T03:46:00.4666433Z     return colg.aggregate(how)
2020-05-27T03:46:00.4666793Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2033: in aggregate
2020-05-27T03:46:00.4667225Z     return super().aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4667657Z test_venv\lib\site-packages\pandas\core\window\rolling.py:603: in aggregate
2020-05-27T03:46:00.4668054Z     result, how = self._aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4668471Z test_venv\lib\site-packages\pandas\core\base.py:475: in _aggregate
2020-05-27T03:46:00.4668902Z     return self._aggregate_multiple_funcs(arg, _axis=_axis), None
2020-05-27T03:46:00.4669286Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4669590Z 
2020-05-27T03:46:00.4669874Z self = RollingGroupby [window=2,center=False,axis=0], arg = ['mean', 'max']
2020-05-27T03:46:00.4670218Z _axis = 0
2020-05-27T03:46:00.4670378Z 
2020-05-27T03:46:00.4670690Z     def _aggregate_multiple_funcs(self, arg, _axis):
2020-05-27T03:46:00.4671087Z         from pandas.core.reshape.concat import concat
2020-05-27T03:46:00.4671345Z     
2020-05-27T03:46:00.4671605Z         if _axis != 0:
2020-05-27T03:46:00.4671974Z             raise NotImplementedError("axis other than 0 is not supported")
2020-05-27T03:46:00.4672267Z     
2020-05-27T03:46:00.4672562Z         if self._selected_obj.ndim == 1:
2020-05-27T03:46:00.4672862Z             obj = self._selected_obj
2020-05-27T03:46:00.4673151Z         else:
2020-05-27T03:46:00.4673452Z             obj = self._obj_with_exclusions
2020-05-27T03:46:00.4673697Z     
2020-05-27T03:46:00.4673941Z         results = []
2020-05-27T03:46:00.4674209Z         keys = []
2020-05-27T03:46:00.4674412Z     
2020-05-27T03:46:00.4674664Z         # degenerate case
2020-05-27T03:46:00.4674921Z         if obj.ndim == 1:
2020-05-27T03:46:00.4675282Z             for a in arg:
2020-05-27T03:46:00.4675640Z                 colg = self._gotitem(obj.name, ndim=1, subset=obj)
2020-05-27T03:46:00.4675954Z                 try:
2020-05-27T03:46:00.4676277Z                     new_res = colg.aggregate(a)
2020-05-27T03:46:00.4676528Z     
2020-05-27T03:46:00.4676805Z                 except TypeError:
2020-05-27T03:46:00.4677111Z                     pass
2020-05-27T03:46:00.4677359Z                 else:
2020-05-27T03:46:00.4677673Z                     results.append(new_res)
2020-05-27T03:46:00.4677958Z     
2020-05-27T03:46:00.4678210Z                     # make sure we find a good name
2020-05-27T03:46:00.4678584Z                     name = com.get_callable_name(a) or a
2020-05-27T03:46:00.4678891Z                     keys.append(name)
2020-05-27T03:46:00.4679164Z     
2020-05-27T03:46:00.4679405Z         # multiples
2020-05-27T03:46:00.4679630Z         else:
2020-05-27T03:46:00.4679957Z             for index, col in enumerate(obj):
2020-05-27T03:46:00.4680401Z                 colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, index])
2020-05-27T03:46:00.4680756Z                 try:
2020-05-27T03:46:00.4681075Z                     new_res = colg.aggregate(arg)
2020-05-27T03:46:00.4681407Z                 except (TypeError, DataError):
2020-05-27T03:46:00.4681737Z                     pass
2020-05-27T03:46:00.4682119Z                 except ValueError as err:
2020-05-27T03:46:00.4682422Z                     # cannot aggregate
2020-05-27T03:46:00.4682818Z                     if "Must produce aggregated value" in str(err):
2020-05-27T03:46:00.4683257Z                         # raised directly in _aggregate_named
2020-05-27T03:46:00.4683562Z                         pass
2020-05-27T03:46:00.4683915Z                     elif "no results" in str(err):
2020-05-27T03:46:00.4684337Z                         # raised directly in _aggregate_multiple_funcs
2020-05-27T03:46:00.4684653Z                         pass
2020-05-27T03:46:00.4684954Z                     else:
2020-05-27T03:46:00.4685220Z                         raise
2020-05-27T03:46:00.4685512Z                 else:
2020-05-27T03:46:00.4685823Z                     results.append(new_res)
2020-05-27T03:46:00.4686109Z                     keys.append(col)
2020-05-27T03:46:00.4686378Z     
2020-05-27T03:46:00.4686628Z         # if we are empty
2020-05-27T03:46:00.4686893Z         if not len(results):
2020-05-27T03:46:00.4687219Z >           raise ValueError("no results")
2020-05-27T03:46:00.4687514Z E           ValueError: no results
2020-05-27T03:46:00.4687767Z 
2020-05-27T03:46:00.4688056Z test_venv\lib\site-packages\pandas\core\base.py:540: ValueError
2020-05-27T03:46:00.4900327Z ________________ test_rolling_apply_args_kwargs[args_kwargs0] _________________
2020-05-27T03:46:00.4910069Z [gw0] win32 -- Python 3.7.7 D:\a\1\s\test_venv\Scripts\python.exe
2020-05-27T03:46:00.4913683Z 
2020-05-27T03:46:00.4916587Z args_kwargs = [None, {'par': 10}]
2020-05-27T03:46:00.4920984Z 
2020-05-27T03:46:00.4932595Z     @pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]])
2020-05-27T03:46:00.4934841Z     def test_rolling_apply_args_kwargs(args_kwargs):
2020-05-27T03:46:00.4935692Z         # GH 33433
2020-05-27T03:46:00.4936381Z         def foo(x, par):
2020-05-27T03:46:00.4937029Z             return np.sum(x + par)
2020-05-27T03:46:00.4937700Z     
2020-05-27T03:46:00.4938401Z         df = DataFrame({"gr": [1, 1], "a": [1, 2]})
2020-05-27T03:46:00.4939052Z     
2020-05-27T03:46:00.4939885Z         idx = Index(["gr", "a"])
2020-05-27T03:46:00.4940561Z         expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx)
2020-05-27T03:46:00.4941155Z     
2020-05-27T03:46:00.4941805Z         result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1])
2020-05-27T03:46:00.4942523Z         tm.assert_frame_equal(result, expected)
2020-05-27T03:46:00.4943122Z     
2020-05-27T03:46:00.4943684Z         result = df.rolling(1).apply(foo, args=(10,))
2020-05-27T03:46:00.4945113Z     
2020-05-27T03:46:00.4945760Z         midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None])
2020-05-27T03:46:00.4946452Z         expected = Series([11.0, 12.0], index=midx, name="a")
2020-05-27T03:46:00.4947078Z     
2020-05-27T03:46:00.4947685Z         gb_rolling = df.groupby("gr")["a"].rolling(1)
2020-05-27T03:46:00.4948255Z     
2020-05-27T03:46:00.4949192Z >       result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1])
2020-05-27T03:46:00.4949774Z 
2020-05-27T03:46:00.4950430Z test_venv\lib\site-packages\pandas\tests\window\test_apply.py:165: 
2020-05-27T03:46:00.4951199Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4951938Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2067: in apply
2020-05-27T03:46:00.4952625Z     kwargs=kwargs,
2020-05-27T03:46:00.4953353Z test_venv\lib\site-packages\pandas\core\window\rolling.py:1391: in apply
2020-05-27T03:46:00.4954036Z     kwargs=kwargs,
2020-05-27T03:46:00.4954689Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2196: in _apply
2020-05-27T03:46:00.4955365Z     **kwargs,
2020-05-27T03:46:00.4956044Z test_venv\lib\site-packages\pandas\core\window\rolling.py:589: in _apply
2020-05-27T03:46:00.4956693Z     result = calc(values)
2020-05-27T03:46:00.4957386Z test_venv\lib\site-packages\pandas\core\window\rolling.py:575: in calc
2020-05-27T03:46:00.4958198Z     closed=self.closed,
2020-05-27T03:46:00.4958846Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4959456Z 
2020-05-27T03:46:00.4960053Z self = <pandas.core.window.indexers.GroupbyRollingIndexer object at 0x3A34FA30>
2020-05-27T03:46:00.4960788Z num_values = 2, min_periods = None, center = False, closed = None
2020-05-27T03:46:00.4961381Z 
2020-05-27T03:46:00.4961909Z     @Appender(get_window_bounds_doc)
2020-05-27T03:46:00.4962523Z     def get_window_bounds(
2020-05-27T03:46:00.4963100Z         self,
2020-05-27T03:46:00.4963652Z         num_values: int = 0,
2020-05-27T03:46:00.4964290Z         min_periods: Optional[int] = None,
2020-05-27T03:46:00.4964946Z         center: Optional[bool] = None,
2020-05-27T03:46:00.4965552Z         closed: Optional[str] = None,
2020-05-27T03:46:00.4966208Z     ) -> Tuple[np.ndarray, np.ndarray]:
2020-05-27T03:46:00.4966908Z         # 1) For each group, get the indices that belong to the group
2020-05-27T03:46:00.4967605Z         # 2) Use the indices to calculate the start & end bounds of the window
2020-05-27T03:46:00.4968322Z         # 3) Append the window bounds in group order
2020-05-27T03:46:00.4968991Z         start_arrays = []
2020-05-27T03:46:00.4969583Z         end_arrays = []
2020-05-27T03:46:00.4970143Z         window_indicies_start = 0
2020-05-27T03:46:00.4970823Z         for key, indicies in self.groupby_indicies.items():
2020-05-27T03:46:00.4971539Z             if self.index_array is not None:
2020-05-27T03:46:00.4972196Z                 index_array = self.index_array.take(indicies)
2020-05-27T03:46:00.4972843Z             else:
2020-05-27T03:46:00.4973467Z                 index_array = self.index_array
2020-05-27T03:46:00.4974075Z             indexer = self.rolling_indexer(
2020-05-27T03:46:00.4974759Z                 index_array=index_array, window_size=self.window_size,
2020-05-27T03:46:00.4975407Z             )
2020-05-27T03:46:00.4975981Z             start, end = indexer.get_window_bounds(
2020-05-27T03:46:00.4976661Z                 len(indicies), min_periods, center, closed
2020-05-27T03:46:00.4977281Z             )
2020-05-27T03:46:00.4977908Z             # Cannot use groupby_indicies as they might not be monotonic with the object
2020-05-27T03:46:00.4978599Z             # we're rolling over
2020-05-27T03:46:00.4979224Z             window_indicies = np.arange(
2020-05-27T03:46:00.4979816Z                 window_indicies_start,
2020-05-27T03:46:00.4980472Z                 window_indicies_start + len(indicies),
2020-05-27T03:46:00.4981202Z                 dtype=np.int64,
2020-05-27T03:46:00.4981747Z             )
2020-05-27T03:46:00.4982364Z             window_indicies_start += len(indicies)
2020-05-27T03:46:00.4983051Z             # Extend as we'll be slicing window like [start, end)
2020-05-27T03:46:00.4983789Z             window_indicies = np.append(window_indicies, [window_indicies[-1] + 1])
2020-05-27T03:46:00.4984532Z >           start_arrays.append(window_indicies.take(start))
2020-05-27T03:46:00.4985326Z E           TypeError: Cannot cast array data from dtype('int64') to dtype('int32') according to the rule 'safe'
2020-05-27T03:46:00.4986013Z

The text was updated successfully, but these errors were encountered:

TomAugspurger added Bug Needs Triage Issue that has not been reviewed by a pandas team member 32bit 32-bit systems and removed Needs Triage Issue that has not been reviewed by a pandas team member labels May 27, 2020

mroeschke mentioned this issue May 28, 2020

BUG: Fix failing MacPython 32bit wheels for groupby rolling #34423

Merged

4 tasks

TomAugspurger closed this as completed in #34423 May 29, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

32-bit compat issues in MacPython/pandas-wheels #34410

32-bit compat issues in MacPython/pandas-wheels #34410

TomAugspurger commented May 27, 2020

32-bit compat issues in MacPython/pandas-wheels #34410

32-bit compat issues in MacPython/pandas-wheels #34410

Comments

TomAugspurger commented May 27, 2020