Skip to content

Commit 6253b07

Browse files
committed
TST: unit tests, and left justify index float formatting
1 parent 2dddc5d commit 6253b07

File tree

7 files changed

+68
-48
lines changed

7 files changed

+68
-48
lines changed

pandas/core/format.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def _get_column_name_list(self):
401401

402402

403403
def format_array(values, formatter, float_format=None, na_rep='NaN',
404-
digits=None, space=None):
404+
digits=None, space=None, justify='right'):
405405
if com.is_float_dtype(values.dtype):
406406
fmt_klass = FloatArrayFormatter
407407
elif com.is_integer_dtype(values.dtype):
@@ -420,7 +420,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
420420

421421
fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
422422
float_format=float_format,
423-
formatter=formatter, space=space)
423+
formatter=formatter, space=space,
424+
justify=justify)
424425

425426
return fmt_obj.get_result()
426427

@@ -496,7 +497,11 @@ def get_result(self):
496497
fmt_str = '%% .%df' % (self.digits - 1)
497498
fmt_values = self._format_with(fmt_str)
498499

499-
maxlen = max(len(x) for x in fmt_values)
500+
if len(fmt_values) > 0:
501+
maxlen = max(len(x) for x in fmt_values)
502+
else:
503+
maxlen =0
504+
500505
too_long = maxlen > self.digits + 5
501506

502507
# this is pretty arbitrary for now
@@ -506,7 +511,7 @@ def get_result(self):
506511
fmt_str = '%% .%de' % (self.digits - 1)
507512
fmt_values = self._format_with(fmt_str)
508513

509-
return _make_fixed_width(fmt_values)
514+
return _make_fixed_width(fmt_values, self.justify)
510515

511516

512517
class IntArrayFormatter(GenericArrayFormatter):

pandas/core/index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def format(self, name=False):
281281
if values.dtype == np.object_:
282282
result = [com._stringify(x) for x in values]
283283
else:
284-
result = _trim_front(format_array(values, None))
284+
result = _trim_front(format_array(values, None, justify='left'))
285285
return header + result
286286

287287
def equals(self, other):

pandas/core/series.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,14 @@ def na_op(x, y):
4747
try:
4848
result = op(x, y)
4949
except TypeError:
50-
if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
50+
if isinstance(y, np.ndarray):
5151
mask = notnull(x) & notnull(y)
5252
result = np.empty(len(x), dtype=x.dtype)
5353
result[mask] = op(x[mask], y[mask])
54-
elif isinstance(x, np.ndarray):
54+
else:
5555
mask = notnull(x)
5656
result = np.empty(len(x), dtype=x.dtype)
5757
result[mask] = op(x[mask], y)
58-
else:
59-
mask = notnull(y)
60-
result = np.empty(len(y), dtype=y.dtype)
61-
result[mask] = op(x, y[mask])
6258

6359
return result
6460

pandas/sparse/frame.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,6 @@ def _from_axes(self, data, axes):
100100
def _data(self):
101101
return _SparseMockBlockManager(self)
102102

103-
def _get_numeric_columns(self):
104-
# everything is necessarily float64
105-
return self.columns
106-
107103
def _consolidate_inplace(self):
108104
# do nothing when DataFrame calls this method
109105
pass
@@ -222,10 +218,7 @@ def to_dense(self):
222218
return DataFrame(data, index=self.index)
223219

224220
def astype(self, dtype):
225-
new_series = dict((k, v.astype(dtype)) for k, v in self.iteritems())
226-
return SparseDataFrame(new_series, index=self.index, columns=self.columns,
227-
default_fill_value=self.default_fill_value,
228-
default_kind=self.default_kind)
221+
raise NotImplementedError
229222

230223
def copy(self, deep=True):
231224
"""

pandas/tests/test_format.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -561,15 +561,11 @@ def test_rounding(self):
561561
self.assertEqual(result, u' 0.000')
562562

563563

564-
# class TestFloatArrayFormatter(unittest.TestCase):
565-
566-
# def test_trim_zeros(self):
567-
# values = np.array([1.5, 2.5, 3.5])
568-
# fmt = FloatArrayFormatter(values)
569-
570-
# result = fmt.get_result()
571-
# pass
564+
class TestFloatArrayFormatter(unittest.TestCase):
572565

566+
def test_misc(self):
567+
obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64))
568+
result = obj.get_result()
573569

574570
if __name__ == '__main__':
575571
import nose

pandas/tools/merge.py

+38-21
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,8 @@ def __init__(self, left, right, how='inner', on=None,
5656
self.left_on = com._maybe_make_list(left_on)
5757
self.right_on = com._maybe_make_list(right_on)
5858

59-
self.drop_keys = False # set this later...kludge
60-
6159
self.copy = copy
62-
6360
self.suffixes = suffixes
64-
6561
self.sort = sort
6662

6763
self.left_index = left_index
@@ -91,26 +87,33 @@ def get_result(self):
9187
return result
9288

9389
def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
94-
if not self.drop_keys:
95-
# do nothing, already found in one of the DataFrames
96-
return
97-
9890
# insert group keys
99-
for i, name in enumerate(self.join_names):
91+
92+
keys = zip(self.join_names, self.left_on, self.right_on)
93+
for i, (name, lname, rname) in enumerate(keys):
94+
if not _should_fill(lname, rname):
95+
continue
96+
10097
if name in result:
10198
key_col = result[name]
10299

103-
if name in self.left:
100+
if name in self.left and left_indexer is not None:
104101
na_indexer = (left_indexer == -1).nonzero()[0]
102+
if len(na_indexer) == 0:
103+
continue
104+
105105
right_na_indexer = right_indexer.take(na_indexer)
106106
key_col.put(na_indexer, com.take_1d(self.right_join_keys[i],
107107
right_na_indexer))
108-
else:
108+
elif name in self.right and right_indexer is not None:
109109
na_indexer = (right_indexer == -1).nonzero()[0]
110-
left_na_indexer = right_indexer.take(na_indexer)
110+
if len(na_indexer) == 0:
111+
continue
112+
113+
left_na_indexer = left_indexer.take(na_indexer)
111114
key_col.put(na_indexer, com.take_1d(self.left_join_keys[i],
112115
left_na_indexer))
113-
else:
116+
elif left_indexer is not None:
114117
# a faster way?
115118
key_col = com.take_1d(self.left_join_keys[i], left_indexer)
116119
na_indexer = (left_indexer == -1).nonzero()[0]
@@ -181,30 +184,41 @@ def _get_merge_keys(self):
181184
and self.right_on is None):
182185

183186
if self.left_index and self.right_index:
184-
pass
187+
self.left_on, self.right_on = (), ()
185188
elif self.left_index:
186189
if self.right_on is None:
187190
raise Exception('Must pass right_on or right_index=True')
191+
self.left_on = [None] * self.left.index.nlevels
188192
elif self.right_index:
189193
if self.left_on is None:
190194
raise Exception('Must pass left_on or left_index=True')
195+
self.right_on = [None] * self.right.index.nlevels
191196
else:
192197
# use the common columns
193198
common_cols = self.left.columns.intersection(self.right.columns)
194199
self.left_on = self.right_on = common_cols
195-
self.drop_keys = True
196-
197200
elif self.on is not None:
198201
if self.left_on is not None or self.right_on is not None:
199202
raise Exception('Can only pass on OR left_on and '
200203
'right_on')
201204
self.left_on = self.right_on = self.on
202-
self.drop_keys = True
205+
elif self.left_on is not None:
206+
n = len(self.left_on)
207+
if self.right_index:
208+
self.right_on = [None] * n
209+
else:
210+
assert(len(self.right_on) == n)
211+
elif self.right_on is not None:
212+
n = len(self.right_on)
213+
if self.left_index:
214+
self.left_on = [None] * n
215+
else:
216+
assert(len(self.left_on) == n)
203217

204218
left_keys = []
205219
right_keys = []
206220
join_names = []
207-
left_drop, right_drop = [], []
221+
right_drop = []
208222
left, right = self.left, self.right
209223

210224
is_lkey = lambda x: isinstance(x, np.ndarray) and len(x) == len(left)
@@ -249,8 +263,6 @@ def _get_merge_keys(self):
249263

250264
if right_drop:
251265
self.right = self.right.drop(right_drop, axis=1)
252-
if left_drop:
253-
self.left = self.left.drop(left_drop, axis=1)
254266

255267
return left_keys, right_keys, join_names
256268

@@ -1006,6 +1018,11 @@ def _consensus_name_attr(objs):
10061018
return None
10071019
return name
10081020

1021+
def _should_fill(lname, rname):
1022+
if not isinstance(lname, basestring) or not isinstance(rname, basestring):
1023+
return True
1024+
return lname == rname
1025+
10091026
def _all_indexes_same(indexes):
10101027
first = indexes[0]
10111028
for index in indexes[1:]:
@@ -1014,4 +1031,4 @@ def _all_indexes_same(indexes):
10141031
return True
10151032

10161033
def _any(x):
1017-
return x is not None and len(x) > 0
1034+
return x is not None and len(x) > 0 and any([y is not None for y in x])

pandas/tools/tests/test_merge.py

+13
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,19 @@ def test_intelligently_handle_join_key(self):
530530

531531
self.assert_(joined._data.is_consolidated())
532532

533+
def test_handle_join_key_pass_array(self):
534+
left = DataFrame({'key' : [1, 1, 2, 2, 3],
535+
'value' : range(5)}, columns=['value', 'key'])
536+
right = DataFrame({'rvalue' : range(6)})
537+
key = np.array([1, 1, 2, 3, 4, 5])
538+
539+
merged = merge(left, right, left_on='key', right_on=key, how='outer')
540+
merged2 = merge(right, left, left_on=key, right_on='key', how='outer')
541+
542+
assert_series_equal(merged['key'], merged2['key'])
543+
self.assert_(merged['key'].notnull().all())
544+
self.assert_(merged2['key'].notnull().all())
545+
533546
class TestMergeMulti(unittest.TestCase):
534547

535548
def setUp(self):

0 commit comments

Comments
 (0)