diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 1c564fbf76f59..ca7d6a11d38f1 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -214,3 +214,7 @@ Bug Fixes - Bug in ``.ix`` getitem should always return a Series (:issue:`7150`) - Bug in multi-index slicing with incomplete indexers (:issue:`7399`) - Bug in multi-index slicing with a step in a sliced level (:issue:`7400`) +- Bug where negative indexers in ``DatetimeIndex`` were not correctly sliced + (:issue:`7408`) +- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`, + :issue:`7409`). diff --git a/pandas/core/index.py b/pandas/core/index.py index 69edf8d9c3f42..8bf7a3db78b31 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2610,12 +2610,14 @@ def get_level_values(self, level): return values def format(self, space=2, sparsify=None, adjoin=True, names=False, - na_rep='NaN', formatter=None): + na_rep=None, formatter=None): if len(self) == 0: return [] stringified_levels = [] for lev, lab in zip(self.levels, self.labels): + na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) + if len(lev) > 0: formatted = lev.take(lab).format(formatter=formatter) @@ -2624,12 +2626,12 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, mask = lab == -1 if mask.any(): formatted = np.array(formatted, dtype=object) - formatted[mask] = na_rep + formatted[mask] = na formatted = formatted.tolist() else: # weird all NA case - formatted = [com.pprint_thing(na_rep if isnull(x) else x, + formatted = [com.pprint_thing(na if isnull(x) else x, escape_chars=('\t', '\r', '\n')) for x in com.take_1d(lev.values, lab)] stringified_levels.append(formatted) @@ -4041,3 +4043,12 @@ def _all_indexes_same(indexes): if not first.equals(index): return False return True + + +def _get_na_rep(dtype): + return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN') + + +def _get_na_value(dtype): + return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype, + np.nan) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index a3a2e6849bce4..e1712be7b5a5f 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -17,8 +17,7 @@ import pandas.core.common as com import pandas.algos as algos -from pandas.core.index import Index, MultiIndex -from pandas.tseries.period import PeriodIndex +from pandas.core.index import MultiIndex, _get_na_value class _Unstacker(object): @@ -83,7 +82,7 @@ def __init__(self, values, index, level=-1, value_columns=None): def _make_index(lev, lab): values = _make_index_array_level(lev.values, lab) - i = lev._simple_new(values, lev.name, + i = lev._simple_new(values, lev.name, freq=getattr(lev, 'freq', None), tz=getattr(lev, 'tz', None)) return i @@ -262,7 +261,7 @@ def _make_index_array_level(lev, lab): l = np.arange(len(lab)) mask_labels = np.empty(len(mask[mask]), dtype=object) - mask_labels.fill(np.nan) + mask_labels.fill(_get_na_value(lev.dtype.type)) mask_indexer = com._ensure_int64(l[mask]) labels = lev @@ -638,7 +637,7 @@ def melt(frame, id_vars=None, value_vars=None, This function is useful to massage a DataFrame into a format where one or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to + columns, considered measured variables (`value_vars`), are "unpivoted" to the row axis, leaving just two non-identifier columns, 'variable' and 'value'. @@ -680,7 +679,7 @@ def melt(frame, id_vars=None, value_vars=None, 0 a B 1 1 b B 3 2 c B 5 - + >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C']) A variable value 0 a B 1 @@ -702,7 +701,7 @@ def melt(frame, id_vars=None, value_vars=None, If you have multi-index columns: >>> df.columns = [list('ABC'), list('DEF')] - >>> df + >>> df A B C D E F 0 a 1 2 @@ -901,7 +900,7 @@ def get_var_names(df, regex): return df.filter(regex=regex).columns.tolist() def melt_stub(df, stub, i, j): - varnames = get_var_names(df, "^"+stub) + varnames = get_var_names(df, "^" + stub) newdf = melt(df, id_vars=i, value_vars=varnames, value_name=stub, var_name=j) newdf_j = newdf[j].str.replace(stub, "") @@ -971,6 +970,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False): Examples -------- + >>> import pandas as pd >>> s = pd.Series(list('abca')) >>> get_dummies(s) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 53c4e0a44e8e9..3324040391340 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -576,7 +576,7 @@ def maybe_indices_to_slice(ndarray[int64_t] indices): cdef: Py_ssize_t i, n = len(indices) - if n == 0: + if not n or indices[0] < 0: return indices for i in range(1, n): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 7354c57498561..eaf3086c611d8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11665,6 +11665,13 @@ def test_unstack_non_unique_index_names(self): with tm.assertRaises(ValueError): df.T.stack('c1') + def test_repr_with_mi_nat(self): + df = DataFrame({'X': [1, 2]}, + index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) + res = repr(df) + exp = ' X\nNaT a 1\n2013-01-01 b 2' + nose.tools.assert_equal(res, exp) + def test_reset_index(self): stacked = self.frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 29aed792bfe11..0752ec52c9a1e 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -873,6 +873,11 @@ def test_outer_join_sort(self): expected = right_idx.astype(object).union(left_idx.astype(object)) tm.assert_index_equal(joined, expected) + def test_nan_first_take_datetime(self): + idx = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')]) + res = idx.take([-1, 0, 1]) + exp = Index([idx[-1], idx[0], idx[1]]) + tm.assert_index_equal(res, exp) class TestFloat64Index(tm.TestCase): _multiprocess_can_split_ = True