From 76b2bf88e94feee8948403909bf4cbd6fd4ff21f Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 3 Oct 2013 18:03:09 -0400 Subject: [PATCH 1/4] BUG: fix DatetimeIndex join with PeriodIndex stack overflow --- pandas/tseries/period.py | 10 ++++------ pandas/tseries/tests/test_period.py | 22 ++++++++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index cd81867ff8f08..fac8aed4b3189 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -12,8 +12,8 @@ import pandas.tseries.frequencies as _freq_mod import pandas.core.common as com -from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE, - _maybe_box, _values_from_object) +from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, + _values_from_object) from pandas import compat from pandas.lib import Timestamp import pandas.lib as lib @@ -712,11 +712,9 @@ def _array_values(self): def astype(self, dtype): dtype = np.dtype(dtype) if dtype == np.object_: - result = np.empty(len(self), dtype=dtype) - result[:] = [x for x in self] - return result + return Index(np.array([x for x in self], dtype), dtype) elif dtype == _INT64_DTYPE: - return self.values.copy() + return Index(self.values.copy(), dtype) else: # pragma: no cover raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 9abecc0aeeec6..5629dfa8e5c4a 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -22,8 +22,8 @@ import pandas.core.datetools as datetools import pandas as pd import numpy as np -from pandas.compat import range, lrange, lmap, map, zip -randn = np.random.randn +from numpy.random import randn +from pandas.compat import range, lrange, lmap, zip from pandas import Series, TimeSeries, DataFrame from pandas.util.testing import(assert_series_equal, assert_almost_equal, @@ -1207,7 +1207,6 @@ def test_is_(self): self.assertFalse(index.is_(index - 2)) self.assertFalse(index.is_(index - 0)) - def test_comp_period(self): idx = period_range('2007-01', periods=20, freq='M') @@ -1913,6 +1912,17 @@ def test_join_self(self): res = index.join(index, how=kind) self.assert_(index is res) + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe(3, 2, data_gen_f=lambda *args: + np.random.randint(2), c_idx_type='p', + r_idx_type='dt') + s = df.iloc[:2, 0] + + res = s.index.join(df.columns, how='outer') + expected = Index([s.index[0], s.index[1], + df.columns[0], df.columns[1]], object) + np.testing.assert_array_equal(res, expected) + def test_align_series(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') ts = Series(np.random.randn(len(rng)), index=rng) @@ -2185,15 +2195,15 @@ def test_minutely(self): def test_secondly(self): self._check_freq('S', '1970-01-01') - + def test_millisecondly(self): self._check_freq('L', '1970-01-01') def test_microsecondly(self): self._check_freq('U', '1970-01-01') - + def test_nanosecondly(self): - self._check_freq('N', '1970-01-01') + self._check_freq('N', '1970-01-01') def _check_freq(self, freq, base_date): rng = PeriodIndex(start=base_date, periods=10, freq=freq) From cbe2366f3d9e61983a446aa7f2905f3d4bdca1af Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 3 Oct 2013 18:10:39 -0400 Subject: [PATCH 2/4] TST: add test for joining PeriodIndex with DatetimeIndex --- doc/source/release.rst | 2 ++ pandas/tseries/period.py | 5 ++--- pandas/tseries/tests/test_period.py | 2 +- pandas/tseries/tests/test_timeseries.py | 13 +++++++++++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index eaf10977af4f7..ebba7444e82d8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -553,6 +553,8 @@ Bug Fixes passed ``index_col=0`` (:issue:`5066`). - Fixed a bug where :func:`~pandas.read_html` was incorrectly infering the type of headers (:issue:`5048`). + - Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a + stack overflow (:issue:`3899`). pandas 0.12.0 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index fac8aed4b3189..860fde6e4eca6 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -712,11 +712,10 @@ def _array_values(self): def astype(self, dtype): dtype = np.dtype(dtype) if dtype == np.object_: - return Index(np.array([x for x in self], dtype), dtype) + return Index(np.array(list(self), dtype), dtype) elif dtype == _INT64_DTYPE: return Index(self.values.copy(), dtype) - else: # pragma: no cover - raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype) + raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype) def __iter__(self): for val in self.values: diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 5629dfa8e5c4a..55963b01d2779 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1921,7 +1921,7 @@ def test_join_does_not_recur(self): res = s.index.join(df.columns, how='outer') expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object) - np.testing.assert_array_equal(res, expected) + tm.assert_index_equal(res, expected) def test_align_series(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f3598dd2d210b..d717033a41331 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -2064,6 +2064,19 @@ def test_ns_index(self): new_index = pd.DatetimeIndex(start=index[0], end=index[-1], freq=index.freq) self.assert_index_parameters(new_index) + def test_join_with_period_index(self): + df = tm.makeCustomDataframe(10, 10, data_gen_f=lambda *args: + np.random.randint(2), c_idx_type='p', + r_idx_type='dt') + s = df.iloc[:5, 0] + joins = 'left', 'right', 'inner', 'outer' + + with tm.assertRaisesRegexp(ValueError, + 'can only call with other PeriodIndex-ed ' + 'objects'): + for join in joins: + df.columns.join(s.index, how=join) + class TestDatetime64(unittest.TestCase): """ From e5936eb1582ab2d48bd5fcbea7d560479a0811d3 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 3 Oct 2013 18:46:03 -0400 Subject: [PATCH 3/4] CLN: remove unused imports --- pandas/tseries/tests/test_timeseries.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index d717033a41331..5329f37095961 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -31,16 +31,12 @@ import pandas.index as _index -from pandas.compat import( - range, long, StringIO, lrange, lmap, map, zip, cPickle as pickle, product -) -from pandas import read_pickle +from pandas.compat import range, long, StringIO, lrange, lmap, zip, product import pandas.core.datetools as dt from numpy.random import rand from numpy.testing import assert_array_equal from pandas.util.testing import assert_frame_equal import pandas.compat as compat -from pandas.core.datetools import BDay import pandas.core.common as com from pandas import concat from pandas import _np_version_under1p7 @@ -2071,10 +2067,9 @@ def test_join_with_period_index(self): s = df.iloc[:5, 0] joins = 'left', 'right', 'inner', 'outer' - with tm.assertRaisesRegexp(ValueError, - 'can only call with other PeriodIndex-ed ' - 'objects'): - for join in joins: + for join in joins: + with tm.assertRaisesRegexp(ValueError, 'can only call with other ' + 'PeriodIndex-ed objects'): df.columns.join(s.index, how=join) From cc4b1300fffaf941ad35678c6a11e90cca34f93b Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 3 Oct 2013 20:56:26 -0400 Subject: [PATCH 4/4] CLN: remove copying of PeriodIndex values on astype(int) --- pandas/tseries/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 860fde6e4eca6..579b0b3019fdc 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -714,7 +714,7 @@ def astype(self, dtype): if dtype == np.object_: return Index(np.array(list(self), dtype), dtype) elif dtype == _INT64_DTYPE: - return Index(self.values.copy(), dtype) + return Index(self.values, dtype) raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype) def __iter__(self):