BUG/OUTPUT: fix repring of MI nat

cpcloud · cpcloud · commit e44f65b54a6a · 2014-06-09T12:54:02.000-04:00
Fix neg indexers in datetimeindex
diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt
@@ -214,3 +214,7 @@ Bug Fixes
 - Bug in ``.ix`` getitem should always return a Series (:issue:`7150`)
 - Bug in multi-index slicing with incomplete indexers (:issue:`7399`)
 - Bug in multi-index slicing with a step in a sliced level (:issue:`7400`)
+- Bug where negative indexers in ``DatetimeIndex`` were not correctly sliced
+  (:issue:`7408`)
+- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`,
+  :issue:`7409`).
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -2610,12 +2610,14 @@ def get_level_values(self, level):
         return values
 
     def format(self, space=2, sparsify=None, adjoin=True, names=False,
-               na_rep='NaN', formatter=None):
+               na_rep=None, formatter=None):
         if len(self) == 0:
             return []
 
         stringified_levels = []
         for lev, lab in zip(self.levels, self.labels):
+            na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)
+
             if len(lev) > 0:
 
                 formatted = lev.take(lab).format(formatter=formatter)
@@ -2624,12 +2626,12 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
                 mask = lab == -1
                 if mask.any():
                     formatted = np.array(formatted, dtype=object)
-                    formatted[mask] = na_rep
+                    formatted[mask] = na
                     formatted = formatted.tolist()
 
             else:
                 # weird all NA case
-                formatted = [com.pprint_thing(na_rep if isnull(x) else x,
+                formatted = [com.pprint_thing(na if isnull(x) else x,
                                               escape_chars=('\t', '\r', '\n'))
                              for x in com.take_1d(lev.values, lab)]
             stringified_levels.append(formatted)
@@ -4041,3 +4043,12 @@ def _all_indexes_same(indexes):
         if not first.equals(index):
             return False
     return True
+
+
+def _get_na_rep(dtype):
+    return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN')
+
+
+def _get_na_value(dtype):
+    return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype,
+                                                                     np.nan)
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -17,8 +17,7 @@
 import pandas.core.common as com
 import pandas.algos as algos
 
-from pandas.core.index import Index, MultiIndex
-from pandas.tseries.period import PeriodIndex
+from pandas.core.index import MultiIndex, _get_na_value
 
 
 class _Unstacker(object):
@@ -83,7 +82,7 @@ def __init__(self, values, index, level=-1, value_columns=None):
 
         def _make_index(lev, lab):
             values = _make_index_array_level(lev.values, lab)
-            i = lev._simple_new(values, lev.name, 
+            i = lev._simple_new(values, lev.name,
                                 freq=getattr(lev, 'freq', None),
                                 tz=getattr(lev, 'tz', None))
             return i
@@ -262,7 +261,7 @@ def _make_index_array_level(lev, lab):
 
     l = np.arange(len(lab))
     mask_labels = np.empty(len(mask[mask]), dtype=object)
-    mask_labels.fill(np.nan)
+    mask_labels.fill(_get_na_value(lev.dtype.type))
     mask_indexer = com._ensure_int64(l[mask])
 
     labels = lev
@@ -638,7 +637,7 @@ def melt(frame, id_vars=None, value_vars=None,
 
     This function is useful to massage a DataFrame into a format where one
     or more columns are identifier variables (`id_vars`), while all other
-    columns, considered measured variables (`value_vars`), are "unpivoted" to 
+    columns, considered measured variables (`value_vars`), are "unpivoted" to
     the row axis, leaving just two non-identifier columns, 'variable' and
     'value'.
 
@@ -680,7 +679,7 @@ def melt(frame, id_vars=None, value_vars=None,
     0  a        B      1
     1  b        B      3
     2  c        B      5
-    
+
     >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])
        A variable  value
     0  a        B      1
@@ -702,7 +701,7 @@ def melt(frame, id_vars=None, value_vars=None,
     If you have multi-index columns:
 
     >>> df.columns = [list('ABC'), list('DEF')]
-    >>> df 
+    >>> df
        A  B  C
        D  E  F
     0  a  1  2
@@ -901,7 +900,7 @@ def get_var_names(df, regex):
         return df.filter(regex=regex).columns.tolist()
 
     def melt_stub(df, stub, i, j):
-        varnames = get_var_names(df, "^"+stub)
+        varnames = get_var_names(df, "^" + stub)
         newdf = melt(df, id_vars=i, value_vars=varnames, value_name=stub,
                      var_name=j)
         newdf_j = newdf[j].str.replace(stub, "")
@@ -971,6 +970,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False):
 
     Examples
     --------
+    >>> import pandas as pd
     >>> s = pd.Series(list('abca'))
 
     >>> get_dummies(s)
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
@@ -576,7 +576,7 @@ def maybe_indices_to_slice(ndarray[int64_t] indices):
     cdef:
         Py_ssize_t i, n = len(indices)
 
-    if n == 0:
+    if not n or indices[0] < 0:
         return indices
 
     for i in range(1, n):
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -11665,6 +11665,13 @@ def test_unstack_non_unique_index_names(self):
         with tm.assertRaises(ValueError):
             df.T.stack('c1')
 
+    def test_repr_with_mi_nat(self):
+        df = DataFrame({'X': [1, 2]},
+                       index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']])
+        res = repr(df)
+        exp = '              X\nNaT        a  1\n2013-01-01 b  2'
+        nose.tools.assert_equal(res, exp)
+
     def test_reset_index(self):
         stacked = self.frame.stack()[::2]
         stacked = DataFrame({'foo': stacked, 'bar': stacked})
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
@@ -873,6 +873,11 @@ def test_outer_join_sort(self):
         expected = right_idx.astype(object).union(left_idx.astype(object))
         tm.assert_index_equal(joined, expected)
 
+    def test_nan_first_take_datetime(self):
+        idx = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')])
+        res = idx.take([-1, 0, 1])
+        exp = Index([idx[-1], idx[0], idx[1]])
+        tm.assert_index_equal(res, exp)
 
 class TestFloat64Index(tm.TestCase):
     _multiprocess_can_split_ = True