Skip to content

Commit 6ae36e4

Browse files
committed
Merge pull request #4379 from cpcloud/self-join-index-is-same
BUG: fix period index object instantiation when joining with self
2 parents fcaf9a6 + e546e53 commit 6ae36e4

File tree

5 files changed

+63
-27
lines changed

5 files changed

+63
-27
lines changed

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ pandas 0.13
8989
- Fixed bug with duplicate columns and type conversion in ``read_json`` when
9090
``orient='split'`` (:issue:`4377`)
9191
- Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`)
92+
- Fixed an issue where ``PeriodIndex`` joining with self was returning a new
93+
instance rather than the same instance (:issue:`4379`); also adds a test
94+
for this for the other index types
9295

9396
pandas 0.12
9497
===========

pandas/tests/test_index.py

+22
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,15 @@ def test_slice_keep_name(self):
555555
idx = Index(['a', 'b'], name='asdf')
556556
self.assertEqual(idx.name, idx[1:].name)
557557

558+
def test_join_self(self):
559+
indices = 'unicode', 'str', 'date', 'int', 'float'
560+
kinds = 'outer', 'inner', 'left', 'right'
561+
for index_kind in indices:
562+
for kind in kinds:
563+
res = getattr(self, '{0}Index'.format(index_kind))
564+
joined = res.join(res, how=kind)
565+
self.assert_(res is joined)
566+
558567

559568
class TestInt64Index(unittest.TestCase):
560569
_multiprocess_can_split_ = True
@@ -834,6 +843,12 @@ def test_join_non_unique(self):
834843
exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64)
835844
self.assert_(np.array_equal(ridx, exp_ridx))
836845

846+
def test_join_self(self):
847+
kinds = 'outer', 'inner', 'left', 'right'
848+
for kind in kinds:
849+
joined = self.index.join(self.index, how=kind)
850+
self.assert_(self.index is joined)
851+
837852
def test_intersection(self):
838853
other = Index([1, 2, 3, 4, 5])
839854
result = self.index.intersection(other)
@@ -1727,6 +1742,13 @@ def _check_all(other):
17271742

17281743
self.assertRaises(Exception, self.index.join, self.index, level=1)
17291744

1745+
def test_join_self(self):
1746+
kinds = 'outer', 'inner', 'left', 'right'
1747+
for kind in kinds:
1748+
res = self.index
1749+
joined = res.join(res, how=kind)
1750+
self.assert_(res is joined)
1751+
17301752
def test_reindex(self):
17311753
result, indexer = self.index.reindex(list(self.index[:4]))
17321754
tm.assert_isinstance(result, MultiIndex)

pandas/tseries/period.py

+25-27
Original file line numberDiff line numberDiff line change
@@ -553,11 +553,9 @@ class PeriodIndex(Int64Index):
553553
__le__ = _period_index_cmp('__le__')
554554
__ge__ = _period_index_cmp('__ge__')
555555

556-
def __new__(cls, data=None, ordinal=None,
557-
freq=None, start=None, end=None, periods=None,
558-
copy=False, name=None,
559-
year=None, month=None, quarter=None, day=None,
560-
hour=None, minute=None, second=None,
556+
def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
557+
periods=None, copy=False, name=None, year=None, month=None,
558+
quarter=None, day=None, hour=None, minute=None, second=None,
561559
tz=None):
562560

563561
freq = _freq_mod.get_standard_freq(freq)
@@ -649,19 +647,18 @@ def _from_arraylike(cls, data, freq, tz):
649647
freq = getattr(data[0], 'freq', None)
650648

651649
if freq is None:
652-
raise ValueError(('freq not specified and cannot be '
653-
'inferred from first element'))
650+
raise ValueError('freq not specified and cannot be '
651+
'inferred from first element')
654652

655-
if np.issubdtype(data.dtype, np.datetime64):
656-
data = dt64arr_to_periodarr(data, freq, tz)
657-
elif data.dtype == np.int64:
658-
pass
659-
else:
660-
try:
661-
data = com._ensure_int64(data)
662-
except (TypeError, ValueError):
663-
data = com._ensure_object(data)
664-
data = _get_ordinals(data, freq)
653+
if data.dtype != np.int64:
654+
if np.issubdtype(data.dtype, np.datetime64):
655+
data = dt64arr_to_periodarr(data, freq, tz)
656+
else:
657+
try:
658+
data = com._ensure_int64(data)
659+
except (TypeError, ValueError):
660+
data = com._ensure_object(data)
661+
data = _get_ordinals(data, freq)
665662

666663
return data, freq
667664

@@ -1013,8 +1010,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
10131010
if return_indexers:
10141011
result, lidx, ridx = result
10151012
return self._apply_meta(result), lidx, ridx
1016-
else:
1017-
return self._apply_meta(result)
1013+
return self._apply_meta(result)
10181014

10191015
def _assert_can_do_setop(self, other):
10201016
if not isinstance(other, PeriodIndex):
@@ -1031,9 +1027,10 @@ def _wrap_union_result(self, other, result):
10311027
return result
10321028

10331029
def _apply_meta(self, rawarr):
1034-
idx = rawarr.view(PeriodIndex)
1035-
idx.freq = self.freq
1036-
return idx
1030+
if not isinstance(rawarr, PeriodIndex):
1031+
rawarr = rawarr.view(PeriodIndex)
1032+
rawarr.freq = self.freq
1033+
return rawarr
10371034

10381035
def __getitem__(self, key):
10391036
"""Override numpy.ndarray's __getitem__ method to work as desired"""
@@ -1069,18 +1066,19 @@ def _format_native_types(self, na_rep=u('NaT'), **kwargs):
10691066
return values.tolist()
10701067

10711068
def __array_finalize__(self, obj):
1072-
if self.ndim == 0: # pragma: no cover
1069+
if not self.ndim: # pragma: no cover
10731070
return self.item()
10741071

10751072
self.freq = getattr(obj, 'freq', None)
10761073
self.name = getattr(obj, 'name', None)
10771074

10781075
def __repr__(self):
1079-
output = str(self.__class__) + '\n'
1080-
output += 'freq: ''%s''\n' % self.freq
1081-
if len(self) > 0:
1076+
output = com.pprint_thing(self.__class__) + '\n'
1077+
output += 'freq: %s\n' % self.freq
1078+
n = len(self)
1079+
if n:
10821080
output += '[%s, ..., %s]\n' % (self[0], self[-1])
1083-
output += 'length: %d' % len(self)
1081+
output += 'length: %d' % n
10841082
return output
10851083

10861084
def __unicode__(self):

pandas/tseries/tests/test_period.py

+7
Original file line numberDiff line numberDiff line change
@@ -1864,6 +1864,13 @@ def test_joins(self):
18641864
tm.assert_isinstance(joined, PeriodIndex)
18651865
self.assert_(joined.freq == index.freq)
18661866

1867+
def test_join_self(self):
1868+
index = period_range('1/1/2000', '1/20/2000', freq='D')
1869+
1870+
for kind in ['inner', 'outer', 'left', 'right']:
1871+
res = index.join(index, how=kind)
1872+
self.assert_(index is res)
1873+
18671874
def test_align_series(self):
18681875
rng = period_range('1/1/2000', '1/1/2010', freq='A')
18691876
ts = Series(np.random.randn(len(rng)), index=rng)

pandas/tseries/tests/test_timeseries.py

+6
Original file line numberDiff line numberDiff line change
@@ -1960,6 +1960,12 @@ def test_slice_keeps_name(self):
19601960
dr = pd.date_range(st, et, freq='H', name='timebucket')
19611961
self.assertEqual(dr[1:].name, dr.name)
19621962

1963+
def test_join_self(self):
1964+
index = date_range('1/1/2000', periods=10)
1965+
kinds = 'outer', 'inner', 'left', 'right'
1966+
for kind in kinds:
1967+
joined = index.join(index, how=kind)
1968+
self.assert_(index is joined)
19631969

19641970
class TestLegacySupport(unittest.TestCase):
19651971
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)