pandas-dev · cpcloud · Jul 30, 2013 · Jul 27, 2013 · cpcloud · Jul 27, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -89,6 +89,9 @@ pandas 0.13
   - Fixed bug with duplicate columns and type conversion in ``read_json`` when
     ``orient='split'`` (:issue:`4377`)
   - Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`)
+  - Fixed an issue where ``PeriodIndex`` joining with self was returning a new
+    instance rather than the same instance (:issue:`4379`); also adds a test
+    for this for the other index types
 
 pandas 0.12
 ===========

diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -555,6 +555,15 @@ def test_slice_keep_name(self):
         idx = Index(['a', 'b'], name='asdf')
         self.assertEqual(idx.name, idx[1:].name)
 
+    def test_join_self(self):
+        indices = 'unicode', 'str', 'date', 'int', 'float'
+        kinds = 'outer', 'inner', 'left', 'right'
+        for index_kind in indices:
+            for kind in kinds:
+                res = getattr(self, '{0}Index'.format(index_kind))
+                joined = res.join(res, how=kind)
+                self.assert_(res is joined)
+
 
 class TestInt64Index(unittest.TestCase):
     _multiprocess_can_split_ = True
@@ -834,6 +843,12 @@ def test_join_non_unique(self):
         exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64)
         self.assert_(np.array_equal(ridx, exp_ridx))
 
+    def test_join_self(self):
+        kinds = 'outer', 'inner', 'left', 'right'
+        for kind in kinds:
+            joined = self.index.join(self.index, how=kind)
+            self.assert_(self.index is joined)
+
     def test_intersection(self):
         other = Index([1, 2, 3, 4, 5])
         result = self.index.intersection(other)
@@ -1727,6 +1742,13 @@ def _check_all(other):
 
         self.assertRaises(Exception, self.index.join, self.index, level=1)
 
+    def test_join_self(self):
+        kinds = 'outer', 'inner', 'left', 'right'
+        for kind in kinds:
+            res = self.index
+            joined = res.join(res, how=kind)
+            self.assert_(res is joined)
+
     def test_reindex(self):
         result, indexer = self.index.reindex(list(self.index[:4]))
         tm.assert_isinstance(result, MultiIndex)

diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py
@@ -553,11 +553,9 @@ class PeriodIndex(Int64Index):
     __le__ = _period_index_cmp('__le__')
     __ge__ = _period_index_cmp('__ge__')
 
-    def __new__(cls, data=None, ordinal=None,
-                freq=None, start=None, end=None, periods=None,
-                copy=False, name=None,
-                year=None, month=None, quarter=None, day=None,
-                hour=None, minute=None, second=None,
+    def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
+                periods=None, copy=False, name=None, year=None, month=None,
+                quarter=None, day=None, hour=None, minute=None, second=None,
                 tz=None):
 
         freq = _freq_mod.get_standard_freq(freq)
@@ -649,19 +647,18 @@ def _from_arraylike(cls, data, freq, tz):
                     freq = getattr(data[0], 'freq', None)
 
                 if freq is None:
-                    raise ValueError(('freq not specified and cannot be '
-                                      'inferred from first element'))
+                    raise ValueError('freq not specified and cannot be '
+                                     'inferred from first element')
 
-                if np.issubdtype(data.dtype, np.datetime64):
-                    data = dt64arr_to_periodarr(data, freq, tz)
-                elif data.dtype == np.int64:
-                    pass
-                else:
-                    try:
-                        data = com._ensure_int64(data)
-                    except (TypeError, ValueError):
-                        data = com._ensure_object(data)
-                        data = _get_ordinals(data, freq)
+                if data.dtype != np.int64:
+                    if np.issubdtype(data.dtype, np.datetime64):
+                        data = dt64arr_to_periodarr(data, freq, tz)
+                    else:
+                        try:
+                            data = com._ensure_int64(data)
+                        except (TypeError, ValueError):
+                            data = com._ensure_object(data)
+                            data = _get_ordinals(data, freq)
 
         return data, freq
 
@@ -1013,8 +1010,7 @@ def join(self, other, how='left', level=None, return_indexers=False):
         if return_indexers:
             result, lidx, ridx = result
             return self._apply_meta(result), lidx, ridx
-        else:
-            return self._apply_meta(result)
+        return self._apply_meta(result)
 
     def _assert_can_do_setop(self, other):
         if not isinstance(other, PeriodIndex):
@@ -1031,9 +1027,10 @@ def _wrap_union_result(self, other, result):
         return result
 
     def _apply_meta(self, rawarr):
-        idx = rawarr.view(PeriodIndex)
-        idx.freq = self.freq
-        return idx
+        if not isinstance(rawarr, PeriodIndex):
+            rawarr = rawarr.view(PeriodIndex)
+        rawarr.freq = self.freq
+        return rawarr
 
     def __getitem__(self, key):
         """Override numpy.ndarray's __getitem__ method to work as desired"""
@@ -1069,18 +1066,19 @@ def _format_native_types(self, na_rep=u('NaT'), **kwargs):
         return values.tolist()
 
     def __array_finalize__(self, obj):
-        if self.ndim == 0:  # pragma: no cover
+        if not self.ndim:  # pragma: no cover
             return self.item()
 
         self.freq = getattr(obj, 'freq', None)
         self.name = getattr(obj, 'name', None)
 
     def __repr__(self):
-        output = str(self.__class__) + '\n'
-        output += 'freq: ''%s''\n' % self.freq
-        if len(self) > 0:
+        output = com.pprint_thing(self.__class__) + '\n'
+        output += 'freq: %s\n' % self.freq
+        n = len(self)
+        if n:
             output += '[%s, ..., %s]\n' % (self[0], self[-1])
-        output += 'length: %d' % len(self)
+        output += 'length: %d' % n
         return output
 
     def __unicode__(self):

diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py
@@ -1864,6 +1864,13 @@ def test_joins(self):
             tm.assert_isinstance(joined, PeriodIndex)
             self.assert_(joined.freq == index.freq)
 
+    def test_join_self(self):
+        index = period_range('1/1/2000', '1/20/2000', freq='D')
+
+        for kind in ['inner', 'outer', 'left', 'right']:
+            res = index.join(index, how=kind)
+            self.assert_(index is res)
+
     def test_align_series(self):
         rng = period_range('1/1/2000', '1/1/2010', freq='A')
         ts = Series(np.random.randn(len(rng)), index=rng)

diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
@@ -1960,6 +1960,12 @@ def test_slice_keeps_name(self):
         dr = pd.date_range(st, et, freq='H', name='timebucket')
         self.assertEqual(dr[1:].name, dr.name)
 
+    def test_join_self(self):
+        index = date_range('1/1/2000', periods=10)
+        kinds = 'outer', 'inner', 'left', 'right'
+        for kind in kinds:
+            joined = index.join(index, how=kind)
+            self.assert_(index is joined)
 
 class TestLegacySupport(unittest.TestCase):
     _multiprocess_can_split_ = True