Skip to content

BUG: fix PeriodIndex join with DatetimeIndex stack overflow #5101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 4, 2013
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ Bug Fixes
passed ``index_col=0`` (:issue:`5066`).
- Fixed a bug where :func:`~pandas.read_html` was incorrectly infering the
type of headers (:issue:`5048`).
- Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a
stack overflow (:issue:`3899`).


pandas 0.12.0
Expand Down
13 changes: 5 additions & 8 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import pandas.tseries.frequencies as _freq_mod

import pandas.core.common as com
from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
_maybe_box, _values_from_object)
from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box,
_values_from_object)
from pandas import compat
from pandas.lib import Timestamp
import pandas.lib as lib
Expand Down Expand Up @@ -712,13 +712,10 @@ def _array_values(self):
def astype(self, dtype):
dtype = np.dtype(dtype)
if dtype == np.object_:
result = np.empty(len(self), dtype=dtype)
result[:] = [x for x in self]
return result
return Index(np.array(list(self), dtype), dtype)
elif dtype == _INT64_DTYPE:
return self.values.copy()
else: # pragma: no cover
raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
return Index(self.values.copy(), dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why copy instead of view here? I know that's what was there before, just strange.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was just about to ask that actually...@jreback?

raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)

def __iter__(self):
for val in self.values:
Expand Down
22 changes: 16 additions & 6 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import pandas.core.datetools as datetools
import pandas as pd
import numpy as np
from pandas.compat import range, lrange, lmap, map, zip
randn = np.random.randn
from numpy.random import randn
from pandas.compat import range, lrange, lmap, zip

from pandas import Series, TimeSeries, DataFrame
from pandas.util.testing import(assert_series_equal, assert_almost_equal,
Expand Down Expand Up @@ -1207,7 +1207,6 @@ def test_is_(self):
self.assertFalse(index.is_(index - 2))
self.assertFalse(index.is_(index - 0))


def test_comp_period(self):
idx = period_range('2007-01', periods=20, freq='M')

Expand Down Expand Up @@ -1913,6 +1912,17 @@ def test_join_self(self):
res = index.join(index, how=kind)
self.assert_(index is res)

def test_join_does_not_recur(self):
df = tm.makeCustomDataframe(3, 2, data_gen_f=lambda *args:
np.random.randint(2), c_idx_type='p',
r_idx_type='dt')
s = df.iloc[:2, 0]

res = s.index.join(df.columns, how='outer')
expected = Index([s.index[0], s.index[1],
df.columns[0], df.columns[1]], object)
tm.assert_index_equal(res, expected)

def test_align_series(self):
rng = period_range('1/1/2000', '1/1/2010', freq='A')
ts = Series(np.random.randn(len(rng)), index=rng)
Expand Down Expand Up @@ -2185,15 +2195,15 @@ def test_minutely(self):

def test_secondly(self):
self._check_freq('S', '1970-01-01')

def test_millisecondly(self):
self._check_freq('L', '1970-01-01')

def test_microsecondly(self):
self._check_freq('U', '1970-01-01')

def test_nanosecondly(self):
self._check_freq('N', '1970-01-01')
self._check_freq('N', '1970-01-01')

def _check_freq(self, freq, base_date):
rng = PeriodIndex(start=base_date, periods=10, freq=freq)
Expand Down
18 changes: 13 additions & 5 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,12 @@

import pandas.index as _index

from pandas.compat import(
range, long, StringIO, lrange, lmap, map, zip, cPickle as pickle, product
)
from pandas import read_pickle
from pandas.compat import range, long, StringIO, lrange, lmap, zip, product
import pandas.core.datetools as dt
from numpy.random import rand
from numpy.testing import assert_array_equal
from pandas.util.testing import assert_frame_equal
import pandas.compat as compat
from pandas.core.datetools import BDay
import pandas.core.common as com
from pandas import concat
from pandas import _np_version_under1p7
Expand Down Expand Up @@ -2064,6 +2060,18 @@ def test_ns_index(self):
new_index = pd.DatetimeIndex(start=index[0], end=index[-1], freq=index.freq)
self.assert_index_parameters(new_index)

def test_join_with_period_index(self):
df = tm.makeCustomDataframe(10, 10, data_gen_f=lambda *args:
np.random.randint(2), c_idx_type='p',
r_idx_type='dt')
s = df.iloc[:5, 0]
joins = 'left', 'right', 'inner', 'outer'

for join in joins:
with tm.assertRaisesRegexp(ValueError, 'can only call with other '
'PeriodIndex-ed objects'):
df.columns.join(s.index, how=join)


class TestDatetime64(unittest.TestCase):
"""
Expand Down