Skip to content

Commit aae6213

Browse files
committed
BUG: Formatting of an index that has nan was inconsistent or wrong (would fill from
other values), (GH2850_) BUG: issue in test_index.py/test_format 1) printing of 'nan' rather than the na_rep (NaN) is inconcistent with everywhere else 2) a 'None' in the index is defacto treated as NaN, is this wrong? CLN: constistency among index for NaN/NaT values
1 parent a79f08c commit aae6213

File tree

4 files changed

+68
-11
lines changed

4 files changed

+68
-11
lines changed

RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ pandas 0.11.0
145145
- Bug in DataFrame column insertion when the column creation fails, existing frame is left in
146146
an irrecoverable state (GH3010_)
147147
- Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_)
148+
- Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from
149+
other values), (GH2850_)
148150

149151
.. _GH622: https://github.com/pydata/pandas/issues/622
150152
.. _GH797: https://github.com/pydata/pandas/issues/797
@@ -161,6 +163,7 @@ pandas 0.11.0
161163
.. _GH2867: https://github.com/pydata/pandas/issues/2867
162164
.. _GH2807: https://github.com/pydata/pandas/issues/2807
163165
.. _GH2849: https://github.com/pydata/pandas/issues/2849
166+
.. _GH2850: https://github.com/pydata/pandas/issues/2850
164167
.. _GH2898: https://github.com/pydata/pandas/issues/2898
165168
.. _GH2892: https://github.com/pydata/pandas/issues/2892
166169
.. _GH2909: https://github.com/pydata/pandas/issues/2909

pandas/core/index.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,9 @@ def __unicode__(self):
173173
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
174174
"""
175175
if len(self) > 6 and len(self) > np.get_printoptions()['threshold']:
176-
data = self[:3].tolist() + ["..."] + self[-3:].tolist()
176+
data = self[:3].format() + ["..."] + self[-3:].format()
177177
else:
178-
data = self
178+
data = self.format()
179179

180180
prepr = com.pprint_thing(data, escape_chars=('\t', '\r', '\n'))
181181
return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype)
@@ -247,8 +247,14 @@ def _has_complex_internals(self):
247247

248248
def summary(self, name=None):
249249
if len(self) > 0:
250-
index_summary = ', %s to %s' % (com.pprint_thing(self[0]),
251-
com.pprint_thing(self[-1]))
250+
head = self[0]
251+
if hasattr(head,'format'):
252+
head = head.format()
253+
tail = self[-1]
254+
if hasattr(tail,'format'):
255+
tail = tail.format()
256+
index_summary = ', %s to %s' % (com.pprint_thing(head),
257+
com.pprint_thing(tail))
252258
else:
253259
index_summary = ''
254260

@@ -419,7 +425,7 @@ def take(self, indexer, axis=0):
419425
taken = self.view(np.ndarray).take(indexer)
420426
return self._constructor(taken, name=self.name)
421427

422-
def format(self, name=False, formatter=None):
428+
def format(self, name=False, formatter=None, na_rep='NaN'):
423429
"""
424430
Render a string representation of the Index
425431
"""
@@ -454,6 +460,14 @@ def format(self, name=False, formatter=None):
454460
if values.dtype == np.object_:
455461
result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
456462
for x in values]
463+
464+
# could have nans
465+
mask = isnull(values)
466+
if mask.any():
467+
result = np.array(result)
468+
result[mask] = na_rep
469+
result = result.tolist()
470+
457471
else:
458472
result = _trim_front(format_array(values, None, justify='left'))
459473
return header + result
@@ -1446,10 +1460,9 @@ def __unicode__(self):
14461460
np.set_printoptions(threshold=50)
14471461

14481462
if len(self) > 100:
1449-
values = np.concatenate([self[:50].values,
1450-
self[-50:].values])
1463+
values = self[:50].format() + self[-50:].format()
14511464
else:
1452-
values = self.values
1465+
values = self.format()
14531466

14541467
summary = com.pprint_thing(values, escape_chars=('\t', '\r', '\n'))
14551468

@@ -1618,7 +1631,16 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
16181631
stringified_levels = []
16191632
for lev, lab in zip(self.levels, self.labels):
16201633
if len(lev) > 0:
1634+
16211635
formatted = lev.take(lab).format(formatter=formatter)
1636+
1637+
# we have some NA
1638+
mask = lab==-1
1639+
if mask.any():
1640+
formatted = np.array(formatted)
1641+
formatted[mask] = na_rep
1642+
formatted = formatted.tolist()
1643+
16221644
else:
16231645
# weird all NA case
16241646
formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))

pandas/tests/test_format.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,31 @@ def test_long_series(self):
603603
nmatches = len(re.findall('dtype',str_rep))
604604
self.assert_(nmatches == 1)
605605

606+
def test_index_with_nan(self):
607+
# GH 2850
608+
df = DataFrame({'id1': {0: '1a3', 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'},
609+
'id3': {0: '78d', 1: '79d'}, 'value': {0: 123, 1: 64}})
610+
611+
# multi-index
612+
y = df.set_index(['id1', 'id2', 'id3'])
613+
result = y.to_string()
614+
expected = u' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64'
615+
self.assert_(result == expected)
616+
617+
# index
618+
y = df.set_index('id2')
619+
result = y.to_string()
620+
expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64'
621+
self.assert_(result == expected)
622+
623+
# all-nan in mi
624+
df2 = df.copy()
625+
df2.ix[:,'id2'] = np.nan
626+
y = df2.set_index('id2')
627+
result = y.to_string()
628+
expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64'
629+
self.assert_(result == expected)
630+
606631
def test_to_string(self):
607632
from pandas import read_table
608633
import re
@@ -1234,10 +1259,16 @@ def test_datetimeindex(self):
12341259
result = s.to_string()
12351260
self.assertTrue('2013-01-02' in result)
12361261

1237-
s = Series(2, index=[ Timestamp('20130111'), NaT ]).append(s)
1262+
# nat in index
1263+
s2 = Series(2, index=[ Timestamp('20130111'), NaT ])
1264+
s = s2.append(s)
12381265
result = s.to_string()
12391266
self.assertTrue('NaT' in result)
12401267

1268+
# nat in summary
1269+
result = str(s2.index)
1270+
self.assertTrue('NaT' in result)
1271+
12411272
def test_timedelta64(self):
12421273

12431274
from pandas import date_range

pandas/tests/test_index.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -351,12 +351,13 @@ def test_format(self):
351351
# 2845
352352
index = Index([1, 2.0+3.0j, np.nan])
353353
formatted = index.format()
354-
expected = [str(index[0]), str(index[1]), str(index[2])]
354+
expected = [str(index[0]), str(index[1]), u'NaN']
355355
self.assertEquals(formatted, expected)
356356

357+
# is this really allowed?
357358
index = Index([1, 2.0+3.0j, None])
358359
formatted = index.format()
359-
expected = [str(index[0]), str(index[1]), '']
360+
expected = [str(index[0]), str(index[1]), u'NaN']
360361
self.assertEquals(formatted, expected)
361362

362363
self.strIndex[:0].format()

0 commit comments

Comments
 (0)