Skip to content

Commit 5d55410

Browse files
committed
TST: test coverage, refactored hash table unique into nanops for now
1 parent 3a17c1e commit 5d55410

File tree

4 files changed

+71
-23
lines changed

4 files changed

+71
-23
lines changed

pandas/core/nanops.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,26 @@ def f(x, y):
346346
nanle = make_nancomp(operator.le)
347347
naneq = make_nancomp(operator.eq)
348348
nanne = make_nancomp(operator.ne)
349+
350+
def unique1d(values):
351+
"""
352+
Hash table-based unique
353+
"""
354+
if issubclass(values.dtype.type, np.floating):
355+
if values.dtype != np.float64:
356+
values = values.astype(np.float64)
357+
table = lib.Float64HashTable(len(values))
358+
uniques = np.array(table.unique(values), dtype=np.float64)
359+
elif issubclass(values.dtype.type, np.integer):
360+
if values.dtype != np.int64:
361+
values = values.astype(np.int64)
362+
table = lib.Int64HashTable(len(values))
363+
uniques = np.array(table.unique(values), dtype=np.int64)
364+
else:
365+
if not values.dtype == np.object_:
366+
values = values.astype(np.object_)
367+
table = lib.PyObjectHashTable(len(values))
368+
uniques = lib.list_to_object_array(table.unique(values))
369+
uniques = lib.maybe_convert_objects(uniques)
370+
return uniques
371+

pandas/core/series.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,9 @@ def _get_values_tuple(self, key):
349349
if not isinstance(self.index, MultiIndex):
350350
raise ValueError('Can only tuple-index with a MultiIndex')
351351

352+
# If key is contained, would have returned by now
352353
indexer, new_index = self.index.get_loc_level(key)
353-
354-
if com.is_integer(indexer):
355-
return self.values[indexer]
356-
else:
357-
return Series(self.values[indexer], index=new_index,
358-
name=self.name)
354+
return Series(self.values[indexer], index=new_index, name=self.name)
359355

360356
def _get_values(self, indexer):
361357
try:
@@ -647,10 +643,7 @@ def iteritems(self, index=True):
647643
"""
648644
Lazily iterate over (index, value) tuples
649645
"""
650-
if index:
651-
return izip(iter(self.index), iter(self))
652-
else:
653-
return izip(iter(self))
646+
return izip(iter(self.index), iter(self))
654647

655648
iterkv = iteritems
656649
if py3compat.PY3: # pragma: no cover
@@ -833,19 +826,7 @@ def unique(self):
833826
-------
834827
uniques : ndarray
835828
"""
836-
values = self.values
837-
if issubclass(values.dtype.type, np.floating):
838-
if values.dtype != np.float64:
839-
values = values.astype(np.float64)
840-
table = lib.Float64HashTable(len(values))
841-
uniques = np.array(table.unique(values), dtype=np.float64)
842-
else:
843-
if not values.dtype == np.object_:
844-
values = values.astype(np.object_)
845-
table = lib.PyObjectHashTable(len(values))
846-
uniques = lib.list_to_object_array(table.unique(values))
847-
uniques = lib.maybe_convert_objects(uniques)
848-
return uniques
829+
return nanops.unique1d(self.values)
849830

850831
def nunique(self):
851832
"""

pandas/tests/test_multilevel.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,13 @@ def test_unstack_preserve_types(self):
977977
self.assert_(unstacked['E', 1].dtype == np.object_)
978978
self.assert_(unstacked['F', 1].dtype == np.float64)
979979

980+
def test_getitem_lowerdim_corner(self):
981+
self.assertRaises(KeyError, self.frame.ix.__getitem__,
982+
(('bar', 'three'), 'B'))
983+
984+
self.assertRaises(KeyError, self.frame.ix.__setitem__,
985+
(('bar', 'three'), 'B'), 0)
986+
980987
#----------------------------------------------------------------------
981988
# AMBIGUOUS CASES!
982989

@@ -1007,6 +1014,8 @@ def test_fancy_2d(self):
10071014
expected = ft.xs('B')['foo']
10081015
assert_series_equal(result, expected)
10091016

1017+
#----------------------------------------------------------------------
1018+
10101019
def test_to_html(self):
10111020
self.ymd.columns.name = 'foo'
10121021
self.ymd.to_html()
@@ -1028,6 +1037,8 @@ def test_level_with_tuples(self):
10281037
assert_series_equal(result, expected)
10291038
assert_series_equal(result2, expected)
10301039

1040+
self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2))
1041+
10311042
result = frame.ix[('foo', 'bar', 0)]
10321043
result2 = frame.xs(('foo', 'bar', 0))
10331044
expected = frame[:2]

pandas/tests/test_series.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313

1414
from pandas import Index, Series, TimeSeries, DataFrame, isnull, notnull
1515
from pandas.core.index import MultiIndex
16+
1617
import pandas.core.datetools as datetools
18+
import pandas.core.nanops as nanops
19+
1720
from pandas.util import py3compat
1821
from pandas.util.testing import assert_series_equal, assert_almost_equal
1922
import pandas.util.testing as tm
@@ -145,6 +148,19 @@ def test_to_sparse_pass_name(self):
145148
result = self.ts.to_sparse()
146149
self.assertEquals(result.name, self.ts.name)
147150

151+
class TestNanops(unittest.TestCase):
152+
153+
def test_comparisons(self):
154+
left = np.random.randn(10)
155+
right = np.random.randn(10)
156+
left[:3] = np.nan
157+
158+
result = nanops.nangt(left, right)
159+
expected = (left > right).astype('O')
160+
expected[:3] = np.nan
161+
162+
assert_almost_equal(result, expected)
163+
148164
class SafeForSparse(object):
149165
pass
150166

@@ -1430,6 +1446,23 @@ def test_unique(self):
14301446
result = s.unique()
14311447
self.assert_(len(result) == 2)
14321448

1449+
# integers
1450+
s = Series(np.random.randint(0, 100, size=100))
1451+
result = np.sort(s.unique())
1452+
expected = np.unique(s.values)
1453+
self.assert_(np.array_equal(result, expected))
1454+
1455+
s = Series(np.random.randint(0, 100, size=100).astype(np.int32))
1456+
result = np.sort(s.unique())
1457+
expected = np.unique(s.values)
1458+
self.assert_(np.array_equal(result, expected))
1459+
1460+
# test string arrays for coverage
1461+
strings = np.tile(np.array([tm.rands(10) for _ in xrange(10)]), 10)
1462+
result = np.sort(nanops.unique1d(strings))
1463+
expected = np.unique(strings)
1464+
self.assert_(np.array_equal(result, expected))
1465+
14331466
def test_sort(self):
14341467
ts = self.ts.copy()
14351468
ts.sort()

0 commit comments

Comments
 (0)