Skip to content

BUG: fix repring of nat multiindex and fix neg indexing in datetimeindex #7410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 9, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,7 @@ Bug Fixes
- Bug in ``.ix`` getitem should always return a Series (:issue:`7150`)
- Bug in multi-index slicing with incomplete indexers (:issue:`7399`)
- Bug in multi-index slicing with a step in a sliced level (:issue:`7400`)
- Bug where negative indexers in ``DatetimeIndex`` were not correctly sliced
(:issue:`7408`)
- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`,
:issue:`7409`).
17 changes: 14 additions & 3 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2610,12 +2610,14 @@ def get_level_values(self, level):
return values

def format(self, space=2, sparsify=None, adjoin=True, names=False,
na_rep='NaN', formatter=None):
na_rep=None, formatter=None):
if len(self) == 0:
return []

stringified_levels = []
for lev, lab in zip(self.levels, self.labels):
na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)

if len(lev) > 0:

formatted = lev.take(lab).format(formatter=formatter)
Expand All @@ -2624,12 +2626,12 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
mask = lab == -1
if mask.any():
formatted = np.array(formatted, dtype=object)
formatted[mask] = na_rep
formatted[mask] = na
formatted = formatted.tolist()

else:
# weird all NA case
formatted = [com.pprint_thing(na_rep if isnull(x) else x,
formatted = [com.pprint_thing(na if isnull(x) else x,
escape_chars=('\t', '\r', '\n'))
for x in com.take_1d(lev.values, lab)]
stringified_levels.append(formatted)
Expand Down Expand Up @@ -4041,3 +4043,12 @@ def _all_indexes_same(indexes):
if not first.equals(index):
return False
return True


def _get_na_rep(dtype):
return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN')


def _get_na_value(dtype):
return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype,
np.nan)
16 changes: 8 additions & 8 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
import pandas.core.common as com
import pandas.algos as algos

from pandas.core.index import Index, MultiIndex
from pandas.tseries.period import PeriodIndex
from pandas.core.index import MultiIndex, _get_na_value


class _Unstacker(object):
Expand Down Expand Up @@ -83,7 +82,7 @@ def __init__(self, values, index, level=-1, value_columns=None):

def _make_index(lev, lab):
values = _make_index_array_level(lev.values, lab)
i = lev._simple_new(values, lev.name,
i = lev._simple_new(values, lev.name,
freq=getattr(lev, 'freq', None),
tz=getattr(lev, 'tz', None))
return i
Expand Down Expand Up @@ -262,7 +261,7 @@ def _make_index_array_level(lev, lab):

l = np.arange(len(lab))
mask_labels = np.empty(len(mask[mask]), dtype=object)
mask_labels.fill(np.nan)
mask_labels.fill(_get_na_value(lev.dtype.type))
mask_indexer = com._ensure_int64(l[mask])

labels = lev
Expand Down Expand Up @@ -638,7 +637,7 @@ def melt(frame, id_vars=None, value_vars=None,

This function is useful to massage a DataFrame into a format where one
or more columns are identifier variables (`id_vars`), while all other
columns, considered measured variables (`value_vars`), are "unpivoted" to
columns, considered measured variables (`value_vars`), are "unpivoted" to
the row axis, leaving just two non-identifier columns, 'variable' and
'value'.

Expand Down Expand Up @@ -680,7 +679,7 @@ def melt(frame, id_vars=None, value_vars=None,
0 a B 1
1 b B 3
2 c B 5

>>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])
A variable value
0 a B 1
Expand All @@ -702,7 +701,7 @@ def melt(frame, id_vars=None, value_vars=None,
If you have multi-index columns:

>>> df.columns = [list('ABC'), list('DEF')]
>>> df
>>> df
A B C
D E F
0 a 1 2
Expand Down Expand Up @@ -901,7 +900,7 @@ def get_var_names(df, regex):
return df.filter(regex=regex).columns.tolist()

def melt_stub(df, stub, i, j):
varnames = get_var_names(df, "^"+stub)
varnames = get_var_names(df, "^" + stub)
newdf = melt(df, id_vars=i, value_vars=varnames, value_name=stub,
var_name=j)
newdf_j = newdf[j].str.replace(stub, "")
Expand Down Expand Up @@ -971,6 +970,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False):

Examples
--------
>>> import pandas as pd
>>> s = pd.Series(list('abca'))

>>> get_dummies(s)
Expand Down
2 changes: 1 addition & 1 deletion pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ def maybe_indices_to_slice(ndarray[int64_t] indices):
cdef:
Py_ssize_t i, n = len(indices)

if n == 0:
if not n or indices[0] < 0:
return indices

for i in range(1, n):
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11665,6 +11665,13 @@ def test_unstack_non_unique_index_names(self):
with tm.assertRaises(ValueError):
df.T.stack('c1')

def test_repr_with_mi_nat(self):
df = DataFrame({'X': [1, 2]},
index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']])
res = repr(df)
exp = ' X\nNaT a 1\n2013-01-01 b 2'
nose.tools.assert_equal(res, exp)

def test_reset_index(self):
stacked = self.frame.stack()[::2]
stacked = DataFrame({'foo': stacked, 'bar': stacked})
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,11 @@ def test_outer_join_sort(self):
expected = right_idx.astype(object).union(left_idx.astype(object))
tm.assert_index_equal(joined, expected)

def test_nan_first_take_datetime(self):
idx = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')])
res = idx.take([-1, 0, 1])
exp = Index([idx[-1], idx[0], idx[1]])
tm.assert_index_equal(res, exp)

class TestFloat64Index(tm.TestCase):
_multiprocess_can_split_ = True
Expand Down