Skip to content

Commit 1a976d8

Browse files
committed
Merge pull request pandas-dev#3193 from jreback/index_fmt
CLN: refactor core/index and tseries/index,period to have their format, to_native_types methods consistent
2 parents 8db7b4d + 7f85480 commit 1a976d8

File tree

6 files changed

+101
-61
lines changed

6 files changed

+101
-61
lines changed

pandas/core/index.py

+23-36
Original file line numberDiff line numberDiff line change
@@ -428,12 +428,10 @@ def take(self, indexer, axis=0):
428428
taken = self.view(np.ndarray).take(indexer)
429429
return self._constructor(taken, name=self.name)
430430

431-
def format(self, name=False, formatter=None, na_rep='NaN'):
431+
def format(self, name=False, formatter=None, **kwargs):
432432
"""
433433
Render a string representation of the Index
434434
"""
435-
from pandas.core.format import format_array
436-
437435
header = []
438436
if name:
439437
header.append(com.pprint_thing(self.name,
@@ -443,11 +441,13 @@ def format(self, name=False, formatter=None, na_rep='NaN'):
443441
if formatter is not None:
444442
return header + list(self.map(formatter))
445443

446-
if self.is_all_dates:
447-
return header + _date_formatter(self)
444+
return self._format_with_header(header, **kwargs)
448445

446+
def _format_with_header(self, header, na_rep='NaN', **kwargs):
449447
values = self.values
450448

449+
from pandas.core.format import format_array
450+
451451
if values.dtype == np.object_:
452452
values = lib.maybe_convert_objects(values, safe=1)
453453

@@ -466,17 +466,18 @@ def format(self, name=False, formatter=None, na_rep='NaN'):
466466
result = _trim_front(format_array(values, None, justify='left'))
467467
return header + result
468468

469-
def to_native_types(self, slicer=None, na_rep='', float_format=None):
469+
def to_native_types(self, slicer=None, **kwargs):
470+
""" slice and dice then format """
470471
values = self
471472
if slicer is not None:
472473
values = values[slicer]
473-
if self.is_all_dates:
474-
return _date_formatter(values)
475-
else:
476-
mask = isnull(values)
477-
values = np.array(values,dtype=object)
478-
values[mask] = na_rep
474+
return values._format_native_types(**kwargs)
479475

476+
def _format_native_types(self, na_rep='', **kwargs):
477+
""" actually format my specific types """
478+
mask = isnull(self)
479+
values = np.array(self,dtype=object,copy=True)
480+
values[mask] = na_rep
480481
return values.tolist()
481482

482483
def equals(self, other):
@@ -1320,6 +1321,11 @@ def inferred_type(self):
13201321
def _constructor(self):
13211322
return Int64Index
13221323

1324+
@property
1325+
def asi8(self):
1326+
# do not cache or you'll create a memory leak
1327+
return self.values.view('i8')
1328+
13231329
@property
13241330
def is_all_dates(self):
13251331
"""
@@ -1489,11 +1495,8 @@ def __repr__(self):
14891495
def __len__(self):
14901496
return len(self.labels[0])
14911497

1492-
def to_native_types(self, slicer=None, na_rep='', float_format=None):
1493-
ix = self
1494-
if slicer:
1495-
ix = self[slicer]
1496-
return ix.tolist()
1498+
def _format_native_types(self, **kwargs):
1499+
return self.tolist()
14971500

14981501
@property
14991502
def _constructor(self):
@@ -1651,13 +1654,13 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
16511654
# we have some NA
16521655
mask = lab==-1
16531656
if mask.any():
1654-
formatted = np.array(formatted)
1657+
formatted = np.array(formatted,dtype=object)
16551658
formatted[mask] = na_rep
16561659
formatted = formatted.tolist()
16571660

16581661
else:
16591662
# weird all NA case
1660-
formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
1663+
formatted = [com.pprint_thing(na_rep if isnull(x) else x, escape_chars=('\t', '\r', '\n'))
16611664
for x in com.take_1d(lev.values, lab)]
16621665
stringified_levels.append(formatted)
16631666

@@ -1669,6 +1672,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
16691672
level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n'))
16701673
if name is not None else '')
16711674

1675+
16721676
level.extend(np.array(lev, dtype=object))
16731677
result_levels.append(level)
16741678

@@ -2598,23 +2602,6 @@ def _wrap_joined_index(self, joined, other):
25982602

25992603
# For utility purposes
26002604

2601-
def _date_formatter(obj, na_rep=u'NaT'):
2602-
data = list(obj)
2603-
2604-
# tz formatter or time formatter
2605-
zero_time = time(0, 0)
2606-
for d in data:
2607-
if d.time() != zero_time or d.tzinfo is not None:
2608-
return [u'%s' % x for x in data ]
2609-
2610-
values = np.array(data,dtype=object)
2611-
mask = isnull(obj.values)
2612-
values[mask] = na_rep
2613-
2614-
imask = -mask
2615-
values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ])
2616-
return values.tolist()
2617-
26182605
def _sparsify(label_list, start=0):
26192606
pivoted = zip(*label_list)
26202607
k = len(label_list)

pandas/tests/test_format.py

+16
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,22 @@ def test_index_with_nan(self):
666666
expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64'
667667
self.assert_(result == expected)
668668

669+
# partial nan in mi
670+
df2 = df.copy()
671+
df2.ix[:,'id2'] = np.nan
672+
y = df2.set_index(['id2','id3'])
673+
result = y.to_string()
674+
expected = u' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64'
675+
self.assert_(result == expected)
676+
677+
df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'},
678+
'id3': {0: np.nan, 1: '79d'}, 'value': {0: 123, 1: 64}})
679+
680+
y = df.set_index(['id1','id2','id3'])
681+
result = y.to_string()
682+
expected = u' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64'
683+
self.assert_(result == expected)
684+
669685
def test_to_string(self):
670686
from pandas import read_table
671687
import re

pandas/tests/test_frame.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import pandas.core.format as fmt
2222
import pandas.core.datetools as datetools
2323
from pandas.core.api import (DataFrame, Index, Series, notnull, isnull,
24-
MultiIndex, DatetimeIndex, Timestamp)
24+
MultiIndex, DatetimeIndex, Timestamp, Period)
2525
from pandas.io.parsers import read_csv
2626

2727
from pandas.util.testing import (assert_almost_equal,
@@ -4587,7 +4587,7 @@ def stuple_to_tuple(x):
45874587
cols=MultiIndex.from_tuples(map(stuple_to_tuple,recons.columns))
45884588
recons.columns = cols
45894589

4590-
type_map = dict(i='i',f='f',s='O',u='O',dt='O')
4590+
type_map = dict(i='i',f='f',s='O',u='O',dt='O',p='O')
45914591
if r_dtype:
45924592
if r_dtype == 'u': # unicode
45934593
r_dtype='O'
@@ -4599,6 +4599,11 @@ def stuple_to_tuple(x):
45994599
recons.index = np.array(map(Timestamp,recons.index),
46004600
dtype=r_dtype )
46014601
df.index = np.array(map(Timestamp,df.index),dtype=r_dtype )
4602+
elif r_dtype == 'p':
4603+
r_dtype='O'
4604+
recons.index = np.array(map(Timestamp,recons.index.to_datetime()),
4605+
dtype=r_dtype )
4606+
df.index = np.array(map(Timestamp,df.index.to_datetime()),dtype=r_dtype )
46024607
else:
46034608
r_dtype= type_map.get(r_dtype)
46044609
recons.index = np.array(recons.index,dtype=r_dtype )
@@ -4608,12 +4613,17 @@ def stuple_to_tuple(x):
46084613
c_dtype='O'
46094614
recons.columns = np.array(map(_to_uni,recons.columns),
46104615
dtype=c_dtype )
4611-
df.Columns = np.array(map(_to_uni,df.columns),dtype=c_dtype )
4616+
df.columns = np.array(map(_to_uni,df.columns),dtype=c_dtype )
46124617
elif c_dtype == 'dt':
46134618
c_dtype='O'
46144619
recons.columns = np.array(map(Timestamp,recons.columns),
46154620
dtype=c_dtype )
4616-
df.Columns = np.array(map(Timestamp,df.columns),dtype=c_dtype )
4621+
df.columns = np.array(map(Timestamp,df.columns),dtype=c_dtype )
4622+
elif c_dtype == 'p':
4623+
c_dtype='O'
4624+
recons.columns = np.array(map(Timestamp,recons.columns.to_datetime()),
4625+
dtype=c_dtype )
4626+
df.columns = np.array(map(Timestamp,df.columns.to_datetime()),dtype=c_dtype )
46174627
else:
46184628
c_dtype= type_map.get(c_dtype)
46194629
recons.columns = np.array(recons.columns,dtype=c_dtype )
@@ -4631,8 +4641,8 @@ def stuple_to_tuple(x):
46314641
_do_test(mkdf(nrows, ncols,r_idx_type='dt',
46324642
c_idx_type='s'),path, 'dt','s')
46334643

4634-
for r_idx_type in ['i', 'f','s','u']:
4635-
for c_idx_type in ['i', 'f','s','u','dt']:
4644+
for r_idx_type in ['i','s','u','p']:
4645+
for c_idx_type in ['i', 's','u','dt','p']:
46364646
for ncols in [1,2,128]:
46374647
base = int((chunksize// ncols or 1) or 1)
46384648
for nrows in [2,10,N-1,N,N+1,N+2,2*N-2,2*N-1,2*N,2*N+1,2*N+2,

pandas/tseries/index.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,26 @@ def __contains__(self, key):
568568
except (KeyError, TypeError):
569569
return False
570570

571+
def _format_with_header(self, header, **kwargs):
572+
return header + self._format_native_types(**kwargs)
573+
574+
def _format_native_types(self, na_rep=u'NaT', **kwargs):
575+
data = list(self)
576+
577+
# tz formatter or time formatter
578+
zero_time = time(0, 0)
579+
for d in data:
580+
if d.time() != zero_time or d.tzinfo is not None:
581+
return [u'%s' % x for x in data ]
582+
583+
values = np.array(data,dtype=object)
584+
mask = isnull(self.values)
585+
values[mask] = na_rep
586+
587+
imask = -mask
588+
values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ])
589+
return values.tolist()
590+
571591
def isin(self, values):
572592
"""
573593
Compute boolean array of whether each index value is found in the
@@ -627,11 +647,6 @@ def astype(self, dtype):
627647
else: # pragma: no cover
628648
raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
629649

630-
@property
631-
def asi8(self):
632-
# do not cache or you'll create a memory leak
633-
return self.values.view('i8')
634-
635650
def _get_time_micros(self):
636651
utc = _utc()
637652
values = self.asi8

pandas/tseries/period.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pandas.tseries.frequencies as _freq_mod
1313

1414
import pandas.core.common as com
15+
from pandas.core.common import isnull
1516

1617
from pandas.lib import Timestamp
1718
import pandas.lib as lib
@@ -792,6 +793,15 @@ def _mpl_repr(self):
792793
# how to represent ourselves to matplotlib
793794
return self._get_object_array()
794795

796+
def equals(self, other):
797+
"""
798+
Determines if two Index objects contain the same elements.
799+
"""
800+
if self is other:
801+
return True
802+
803+
return np.array_equal(self.asi8, other.asi8)
804+
795805
def tolist(self):
796806
"""
797807
Return a list of Period objects
@@ -1029,16 +1039,18 @@ def __getitem__(self, key):
10291039

10301040
return PeriodIndex(result, name=self.name, freq=self.freq)
10311041

1032-
def format(self, name=False, formatter=None):
1033-
"""
1034-
Render a string representation of the Index
1035-
"""
1036-
header = []
1042+
def _format_with_header(self, header, **kwargs):
1043+
return header + self._format_native_types(**kwargs)
10371044

1038-
if name:
1039-
header.append(str(self.name) if self.name is not None else '')
1045+
def _format_native_types(self, na_rep=u'NaT', **kwargs):
10401046

1041-
return header + ['%s' % Period(x, freq=self.freq) for x in self]
1047+
values = np.array(list(self),dtype=object)
1048+
mask = isnull(self.values)
1049+
values[mask] = na_rep
1050+
1051+
imask = -mask
1052+
values[imask] = np.array([ u'%s' % dt for dt in values[imask] ])
1053+
return values.tolist()
10421054

10431055
def __array_finalize__(self, obj):
10441056
if self.ndim == 0: # pragma: no cover

pandas/util/testing.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
406406
label will repeated at the corresponding level, you can specify just
407407
the first few, the rest will use the default ndupe_l of 1.
408408
len(ndupe_l) <= nlevels.
409-
idx_type - "i"/"f"/"s"/"u"/"dt".
409+
idx_type - "i"/"f"/"s"/"u"/"dt/"p".
410410
If idx_type is not None, `idx_nlevels` must be 1.
411411
"i"/"f" creates an integer/float index,
412412
"s"/"u" creates a string/unicode index
@@ -422,7 +422,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
422422
assert (names is None or names is False
423423
or names is True or len(names) is nlevels)
424424
assert idx_type is None or \
425-
(idx_type in ('i', 'f', 's', 'u', 'dt') and nlevels == 1)
425+
(idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1)
426426

427427
if names is True:
428428
# build default names
@@ -437,7 +437,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
437437

438438
# specific 1D index type requested?
439439
idx_func = dict(i=makeIntIndex, f=makeFloatIndex, s=makeStringIndex,
440-
u=makeUnicodeIndex, dt=makeDateIndex).get(idx_type)
440+
u=makeUnicodeIndex, dt=makeDateIndex, p=makePeriodIndex).get(idx_type)
441441
if idx_func:
442442
idx = idx_func(nentries)
443443
# but we need to fill in the name
@@ -446,7 +446,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
446446
return idx
447447
elif idx_type is not None:
448448
raise ValueError('"%s" is not a legal value for `idx_type`, use '
449-
'"i"/"f"/"s"/"u"/"dt".' % idx_type)
449+
'"i"/"f"/"s"/"u"/"dt/"p".' % idx_type)
450450

451451
if len(ndupe_l) < nlevels:
452452
ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
@@ -540,9 +540,9 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True,
540540
assert c_idx_nlevels > 0
541541
assert r_idx_nlevels > 0
542542
assert r_idx_type is None or \
543-
(r_idx_type in ('i', 'f', 's', 'u', 'dt') and r_idx_nlevels == 1)
543+
(r_idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and r_idx_nlevels == 1)
544544
assert c_idx_type is None or \
545-
(c_idx_type in ('i', 'f', 's', 'u', 'dt') and c_idx_nlevels == 1)
545+
(c_idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and c_idx_nlevels == 1)
546546

547547
columns = makeCustomIndex(ncols, nlevels=c_idx_nlevels, prefix='C',
548548
names=c_idx_names, ndupe_l=c_ndupe_l,

0 commit comments

Comments
 (0)