Skip to content

Commit ca41260

Browse files
committed
TST: merge test coverage and trim floating point zeros even if there are NAs
1 parent 9d8a3f1 commit ca41260

File tree

6 files changed

+103
-80
lines changed

6 files changed

+103
-80
lines changed

pandas/core/format.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ def __init__(self, *args, **kwargs):
488488
def _format_with(self, fmt_str):
489489
fmt_values = [fmt_str % x if notnull(x) else self.na_rep
490490
for x in self.values]
491-
return _trim_zeros(fmt_values)
491+
return _trim_zeros(fmt_values, self.na_rep)
492492

493493
def get_result(self):
494494
if self.formatter is not None:
@@ -537,17 +537,22 @@ def just(x):
537537

538538
return [just(x) for x in strings]
539539

540-
def _trim_zeros(str_floats):
540+
def _trim_zeros(str_floats, na_rep='NaN'):
541541
"""
542542
Trims zeros and decimal points
543543
"""
544544
# TODO: what if exponential?
545545
trimmed = str_floats
546-
while len(str_floats) > 0 and all([x.endswith('0') for x in trimmed]):
547-
trimmed = [x[:-1] for x in trimmed]
546+
547+
def _cond(values):
548+
non_na = [x for x in values if x != na_rep]
549+
return len(non_na) > 0 and all([x.endswith('0') for x in non_na])
550+
551+
while _cond(trimmed):
552+
trimmed = [x[:-1] if x != na_rep else x for x in trimmed]
548553

549554
# trim decimal points
550-
return [x[:-1] if x.endswith('.') else x for x in trimmed]
555+
return [x[:-1] if x.endswith('.') and x != na_rep else x for x in trimmed]
551556

552557

553558
def single_column_table(column):

pandas/tests/test_format.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,24 @@ def test_to_string_format_na(self):
241241
'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
242242
result = df.to_string()
243243

244-
expected = (' A B\n'
245-
'0 NaN NaN\n'
246-
'1 -1.000000 foo\n'
247-
'2 -2.123400 foooo\n'
248-
'3 3.000000 fooooo\n'
249-
'4 4.000000 bar')
244+
expected = (' A B\n'
245+
'0 NaN NaN\n'
246+
'1 -1.0000 foo\n'
247+
'2 -2.1234 foooo\n'
248+
'3 3.0000 fooooo\n'
249+
'4 4.0000 bar')
250+
self.assertEqual(result, expected)
251+
252+
df = DataFrame({'A' : [np.nan, -1., -2., 3., 4.],
253+
'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
254+
result = df.to_string()
255+
256+
expected = (' A B\n'
257+
'0 NaN NaN\n'
258+
'1 -1 foo\n'
259+
'2 -2 foooo\n'
260+
'3 3 fooooo\n'
261+
'4 4 bar')
250262
self.assertEqual(result, expected)
251263

252264
def test_to_html(self):
@@ -354,11 +366,11 @@ def test_to_string_float_na_spacing(self):
354366
s[::2] = np.nan
355367

356368
result = s.to_string()
357-
expected = ('0 NaN\n'
358-
'1 1.567800\n'
359-
'2 NaN\n'
360-
'3 -3.000000\n'
361-
'4 NaN')
369+
expected = ('0 NaN\n'
370+
'1 1.5678\n'
371+
'2 NaN\n'
372+
'3 -3.0000\n'
373+
'4 NaN')
362374
self.assertEqual(result, expected)
363375

364376
class TestEngFormatter(unittest.TestCase):

pandas/tests/test_frame.py

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import sys
99
import unittest
1010

11+
import nose
12+
1113
from numpy import random, nan
1214
from numpy.random import randn
1315
import numpy as np

pandas/tests/test_panel.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import operator
77
import unittest
8+
import nose
89

910
import numpy as np
1011

@@ -990,13 +991,20 @@ def test_from_frame_level1_unsorted(self):
990991
assert_frame_equal(p.minor_xs(2), df.ix[:,2].sort_index())
991992

992993
def test_to_excel(self):
994+
try:
995+
import xlwt
996+
import xlrd
997+
import openpyxl
998+
except ImportError:
999+
raise nose.SkipTest
1000+
9931001
path = '__tmp__.xlsx'
9941002
self.panel.to_excel(path)
9951003
reader = ExcelFile(path)
9961004
for item, df in self.panel.iteritems():
997-
recdf = reader.parse(str(item),index_col=0)
1005+
recdf = reader.parse(str(item),index_col=0)
9981006
assert_frame_equal(df, recdf)
999-
1007+
10001008
class TestLongPanel(unittest.TestCase):
10011009
"""
10021010
LongPanel no longer exists, but...

pandas/tools/merge.py

+17-28
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,12 @@ def _get_merge_keys(self):
221221
else:
222222
left_keys.append(left[k].values)
223223
join_names.append(k)
224+
if isinstance(self.right.index, MultiIndex):
225+
right_keys = [lev.values.take(lab)
226+
for lev, lab in zip(self.right.index.levels,
227+
self.right.index.labels)]
228+
else:
229+
right_keys = [self.right.index.values]
224230
elif _any(self.right_on):
225231
for k in self.right_on:
226232
if is_rkey(k):
@@ -229,6 +235,12 @@ def _get_merge_keys(self):
229235
else:
230236
right_keys.append(right[k].values)
231237
join_names.append(k)
238+
if isinstance(self.left.index, MultiIndex):
239+
left_keys = [lev.values.take(lab)
240+
for lev, lab in zip(self.left.index.levels,
241+
self.left.index.labels)]
242+
else:
243+
left_keys = [self.left.index.values]
232244

233245
if right_drop:
234246
self.right = self.right.drop(right_drop, axis=1)
@@ -248,7 +260,6 @@ def _validate_specification(self):
248260
elif self.right_index:
249261
if self.left_on is None:
250262
raise Exception('Must pass left_on or left_index=True')
251-
assert(len(self.left_on) == self.right.index.nlevels)
252263
else:
253264
# use the common columns
254265
common_cols = self.left.columns.intersection(self.right.columns)
@@ -261,19 +272,14 @@ def _validate_specification(self):
261272
elif self.left_on is not None:
262273
n = len(self.left_on)
263274
if self.right_index:
275+
assert(len(self.left_on) == self.right.index.nlevels)
264276
self.right_on = [None] * n
265-
else:
266-
assert(len(self.right_on) == n)
267277
elif self.right_on is not None:
268278
n = len(self.right_on)
269279
if self.left_index:
280+
assert(len(self.right_on) == self.left.index.nlevels)
270281
self.left_on = [None] * n
271-
else:
272-
assert(len(self.left_on) == n)
273-
elif self.left_index:
274-
assert(len(self.right_on) == self.left.index.nlevels)
275-
elif self.right_index:
276-
assert(len(self.left_on) == self.right.index.nlevels)
282+
assert(len(self.right_on) == len(self.left_on))
277283

278284
def _get_group_keys(self):
279285
"""
@@ -285,25 +291,8 @@ def _get_group_keys(self):
285291
-------
286292
287293
"""
288-
if self.left_index:
289-
if isinstance(self.left.index, MultiIndex):
290-
left_keys = [lev.values.take(lab)
291-
for lev, lab in zip(self.left.index.levels,
292-
self.left.index.labels)]
293-
else:
294-
left_keys = [self.left.index.values]
295-
else:
296-
left_keys = self.left_join_keys
297-
298-
if self.right_index:
299-
if isinstance(self.right.index, MultiIndex):
300-
right_keys = [lev.values.take(lab)
301-
for lev, lab in zip(self.right.index.levels,
302-
self.right.index.labels)]
303-
else:
304-
right_keys = [self.right.index.values]
305-
else:
306-
right_keys = self.right_join_keys
294+
left_keys = self.left_join_keys
295+
right_keys = self.right_join_keys
307296

308297
assert(len(left_keys) == len(right_keys))
309298

pandas/tools/tests/test_merge.py

+41-34
Original file line numberDiff line numberDiff line change
@@ -35,29 +35,29 @@ class TestMerge(unittest.TestCase):
3535

3636
def setUp(self):
3737
# aggregate multiple columns
38-
self.df = DataFrame({'key1' : get_test_data(),
39-
'key2' : get_test_data(),
40-
'data1' : np.random.randn(N),
41-
'data2' : np.random.randn(N)})
38+
self.df = DataFrame({'key1': get_test_data(),
39+
'key2': get_test_data(),
40+
'data1': np.random.randn(N),
41+
'data2': np.random.randn(N)})
4242

4343
# exclude a couple keys for fun
4444
self.df = self.df[self.df['key2'] > 1]
4545

4646
self.df2 = DataFrame({'key1' : get_test_data(n=N//5),
4747
'key2' : get_test_data(ngroups=NGROUPS//2,
4848
n=N//5),
49-
'value' : np.random.randn(N // 5)})
49+
'value': np.random.randn(N // 5)})
5050

5151
index, data = tm.getMixedTypeDict()
5252
self.target = DataFrame(data, index=index)
5353

5454
# Join on string value
55-
self.source = DataFrame({'MergedA' : data['A'], 'MergedD' : data['D']},
55+
self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']},
5656
index=data['C'])
5757

58-
self.left = DataFrame({'key' : ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
59-
'v1' : np.random.randn(7)})
60-
self.right = DataFrame({'v2' : np.random.randn(4)},
58+
self.left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
59+
'v1': np.random.randn(7)})
60+
self.right = DataFrame({'v2': np.random.randn(4)},
6161
index=['d', 'b', 'c', 'a'])
6262

6363
def test_cython_left_outer_join(self):
@@ -134,9 +134,6 @@ def test_cython_inner_join(self):
134134
self.assert_(np.array_equal(ls, exp_ls))
135135
self.assert_(np.array_equal(rs, exp_rs))
136136

137-
def test_cython_full_outer_join(self):
138-
pass
139-
140137
def test_left_outer_join(self):
141138
joined_key2 = merge(self.df, self.df2, on='key2')
142139
_check_join(self.df, self.df2, joined_key2, ['key2'], how='left')
@@ -197,11 +194,11 @@ def test_join_on(self):
197194
self.assert_(np.array_equal(merged['MergedD'], target['D']))
198195

199196
# join with duplicates (fix regression from DataFrame/Matrix merge)
200-
df = DataFrame({'key' : ['a', 'a', 'b', 'b', 'c']})
201-
df2 = DataFrame({'value' : [0, 1, 2]}, index=['a', 'b', 'c'])
197+
df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']})
198+
df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c'])
202199
joined = df.join(df2, on='key')
203-
expected = DataFrame({'key' : ['a', 'a', 'b', 'b', 'c'],
204-
'value' : [0, 0, 1, 1, 2]})
200+
expected = DataFrame({'key': ['a', 'a', 'b', 'b', 'c'],
201+
'value': [0, 0, 1, 1, 2]})
205202
assert_frame_equal(joined, expected)
206203

207204
# Test when some are missing
@@ -245,8 +242,8 @@ def test_join_with_len0(self):
245242
self.assertEqual(len(merged2), 0)
246243

247244
def test_join_on_inner(self):
248-
df = DataFrame({'key' : ['a', 'a', 'd', 'b', 'b', 'c']})
249-
df2 = DataFrame({'value' : [0, 1]}, index=['a', 'b'])
245+
df = DataFrame({'key': ['a', 'a', 'd', 'b', 'b', 'c']})
246+
df2 = DataFrame({'value': [0, 1]}, index=['a', 'b'])
250247

251248
joined = df.join(df2, on='key', how='inner')
252249

@@ -257,8 +254,8 @@ def test_join_on_inner(self):
257254
self.assert_(joined.index.equals(expected.index))
258255

259256
def test_join_on_singlekey_list(self):
260-
df = DataFrame({'key' : ['a', 'a', 'b', 'b', 'c']})
261-
df2 = DataFrame({'value' : [0, 1, 2]}, index=['a', 'b', 'c'])
257+
df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']})
258+
df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c'])
262259

263260
# corner cases
264261
joined = df.join(df2, on=['key'])
@@ -277,18 +274,18 @@ def test_join_on_series_buglet(self):
277274
ds = Series([2], index=[1], name='b')
278275
result = df.join(ds, on='a')
279276
expected = DataFrame({'a' : [1, 1],
280-
'b' : [2, 2]}, index=df.index)
277+
'b': [2, 2]}, index=df.index)
281278
tm.assert_frame_equal(result, expected)
282279

283280
def test_join_index_mixed(self):
284281

285-
df1 = DataFrame({'A' : 1., 'B' : 2, 'C' : 'foo', 'D' : True},
282+
df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True},
286283
index=np.arange(10),
287284
columns=['A', 'B', 'C', 'D'])
288285
self.assert_(df1['B'].dtype == np.int64)
289286
self.assert_(df1['D'].dtype == np.bool_)
290287

291-
df2 = DataFrame({'A' : 1., 'B' : 2, 'C' : 'foo', 'D' : True},
288+
df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True},
292289
index=np.arange(0, 10, 2),
293290
columns=['A', 'B', 'C', 'D'])
294291

@@ -375,8 +372,8 @@ def test_join_inner_multiindex(self):
375372
'three', 'one']
376373

377374
data = np.random.randn(len(key1))
378-
data = DataFrame({'key1' : key1, 'key2' : key2,
379-
'data' : data})
375+
data = DataFrame({'key1': key1, 'key2': key2,
376+
'data': data})
380377

381378
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
382379
['one', 'two', 'three']],
@@ -416,9 +413,9 @@ def test_join_float64_float32(self):
416413
assert_frame_equal(joined, expected)
417414

418415
def test_merge_index_singlekey_right_vs_left(self):
419-
left = DataFrame({'key' : ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
420-
'v1' : np.random.randn(7)})
421-
right = DataFrame({'v2' : np.random.randn(4)},
416+
left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
417+
'v1': np.random.randn(7)})
418+
right = DataFrame({'v2': np.random.randn(4)},
422419
index=['d', 'b', 'c', 'a'])
423420

424421
merged1 = merge(left, right, left_on='key',
@@ -434,9 +431,9 @@ def test_merge_index_singlekey_right_vs_left(self):
434431
assert_frame_equal(merged1, merged2.ix[:, merged1.columns])
435432

436433
def test_merge_index_singlekey_inner(self):
437-
left = DataFrame({'key' : ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
438-
'v1' : np.random.randn(7)})
439-
right = DataFrame({'v2' : np.random.randn(4)},
434+
left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
435+
'v1': np.random.randn(7)})
436+
right = DataFrame({'v2': np.random.randn(4)},
440437
index=['d', 'b', 'c', 'a'])
441438

442439
# inner join
@@ -459,6 +456,9 @@ def test_merge_misspecified(self):
459456
self.assertRaises(Exception, merge, self.left, self.left,
460457
left_on='key', on='key')
461458

459+
self.assertRaises(Exception, merge, self.df, self.df2,
460+
left_on=['key1'], right_on=['key1', 'key2'])
461+
462462
def test_merge_overlap(self):
463463
merged = merge(self.left, self.left, on='key')
464464
exp_len = (self.left['key'].value_counts() ** 2).sum()
@@ -467,9 +467,9 @@ def test_merge_overlap(self):
467467
self.assert_('v1.y' in merged)
468468

469469
def test_merge_different_column_key_names(self):
470-
left = DataFrame({'lkey' : ['foo', 'bar', 'baz', 'foo'],
471-
'value' : [1, 2, 3, 4]})
472-
right = DataFrame({'rkey' : ['foo', 'bar', 'qux', 'foo'],
470+
left = DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
471+
'value': [1, 2, 3, 4]})
472+
right = DataFrame({'rkey': ['foo', 'bar', 'qux', 'foo'],
473473
'value' : [5, 6, 7, 8]})
474474

475475
merged = left.merge(right, left_on='lkey', right_on='rkey',
@@ -552,6 +552,13 @@ def test_handle_join_key_pass_array(self):
552552
self.assert_(np.array_equal(merged['key_0'],
553553
np.array([1, 1, 1, 1, 2, 2, 3, 4, 5])))
554554

555+
left = DataFrame({'value': range(3)})
556+
right = DataFrame({'rvalue' : range(6)})
557+
558+
key = np.array([0, 1, 1, 2, 2, 3])
559+
merged = merge(left, right, left_index=True, right_on=key, how='outer')
560+
self.assert_(np.array_equal(merged['key_0'], key))
561+
555562
class TestMergeMulti(unittest.TestCase):
556563

557564
def setUp(self):

0 commit comments

Comments
 (0)