Skip to content

Commit be6b004

Browse files
committed
TST: unit tests, and custom delete for Index objects
1 parent 80b9176 commit be6b004

File tree

6 files changed

+135
-18
lines changed

6 files changed

+135
-18
lines changed

RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ Release notes
9797
* Note that under ther hood there is a new essentially "lazy evaluation"
9898
scheme within respect to adding columns to DataFrame. During some
9999
operations, like-typed blocks will be "consolidated" but not before.
100+
* `DataFrame` accessing columns repeatedly is now significantly faster than
101+
`DataMatrix` used to be in 0.3.0 due to an internal Series caching mechanism
102+
(which are all views on the underlying data)
100103
* Column ordering for mixed type data is now completely consistent in
101104
`DataFrame`. In prior releases, there was inconsistent column ordering in
102105
`DataMatrix`

pandas/core/index.py

+7
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ def slice_locs(self, start=None, end=None):
296296

297297
return beg_slice, end_slice
298298

299+
def delete(self, loc):
300+
arr = np.delete(np.asarray(self), loc)
301+
return Index(arr)
299302

300303
class DateIndex(Index):
301304
pass
@@ -768,6 +771,10 @@ def _assert_can_do_setop(self, other):
768771

769772
assert(self.nlevels == other.nlevels)
770773

774+
def delete(self, loc):
775+
new_labels = [np.delete(lab, loc) for lab in self.labels]
776+
return MultiIndex(levels=self.levels, labels=new_labels)
777+
771778
get_major_bounds = slice_locs
772779

773780
__bounds = None

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def _getitem_tuple(self, key):
105105
if isinstance(self.frame.index, MultiIndex):
106106
try:
107107
return self.frame.xs(key)
108-
except KeyError:
108+
except (KeyError, TypeError):
109109
# could do something more intelligent here? like raising the
110110
# exception if each tuple value are in the levels?
111111
pass

pandas/core/series.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,14 @@ def __getitem__(self, key):
264264
try:
265265
if isinstance(self.index, MultiIndex):
266266
return self._multilevel_index(key)
267-
elif isinstance(key, int):
268-
return self._regular_index(key)
269267
else:
270-
return self._regular_index(key)
268+
values = self.values
269+
try:
270+
return values[self.index.get_loc(key)]
271+
except KeyError:
272+
if isinstance(key, (int, np.integer)):
273+
return values[key]
274+
raise Exception('Requested index not in this series!')
271275
except Exception:
272276
pass
273277

@@ -293,16 +297,6 @@ def _index_with(indexer):
293297
key = np.asarray(key)
294298
return _index_with(key)
295299

296-
def _regular_index(self, key):
297-
values = self.values
298-
299-
try:
300-
return values[self.index.get_loc(key)]
301-
except KeyError:
302-
if isinstance(key, (int, np.integer)):
303-
return values[key]
304-
raise Exception('Requested index not in this series!')
305-
306300
def _multilevel_index(self, key):
307301
values = self.values
308302
try:
@@ -720,7 +714,11 @@ def values(self):
720714
-------
721715
arr : numpy.ndarray
722716
"""
723-
return self.view(ndarray)
717+
try:
718+
return self._values
719+
except AttributeError:
720+
self._values = self.view(ndarray)
721+
return self._values
724722

725723
def iteritems(self):
726724
"""

pandas/tests/test_frame.py

+45
Original file line numberDiff line numberDiff line change
@@ -1501,6 +1501,51 @@ def test_dropIncompleteRows(self):
15011501
samesize_frame = frame.dropna(subset=['bar'])
15021502
self.assert_(samesize_frame.index.equals(self.frame.index))
15031503

1504+
def test_dropna(self):
1505+
df = DataFrame(np.random.randn(6, 4))
1506+
df[2][:2] = nan
1507+
1508+
dropped = df.dropna(axis=1)
1509+
expected = df.ix[:, [0, 1, 3]]
1510+
assert_frame_equal(dropped, expected)
1511+
1512+
dropped = df.dropna(axis=0)
1513+
expected = df.ix[range(2, 6)]
1514+
assert_frame_equal(dropped, expected)
1515+
1516+
# threshold
1517+
dropped = df.dropna(axis=1, thresh=5)
1518+
expected = df.ix[:, [0, 1, 3]]
1519+
assert_frame_equal(dropped, expected)
1520+
1521+
dropped = df.dropna(axis=0, thresh=4)
1522+
expected = df.ix[range(2, 6)]
1523+
assert_frame_equal(dropped, expected)
1524+
1525+
dropped = df.dropna(axis=1, thresh=4)
1526+
assert_frame_equal(dropped, df)
1527+
1528+
dropped = df.dropna(axis=1, thresh=3)
1529+
assert_frame_equal(dropped, df)
1530+
1531+
# subset
1532+
dropped = df.dropna(axis=0, subset=[0, 1, 3])
1533+
assert_frame_equal(dropped, df)
1534+
1535+
# all
1536+
dropped = df.dropna(axis=1, how='all')
1537+
assert_frame_equal(dropped, df)
1538+
1539+
df[2] = nan
1540+
dropped = df.dropna(axis=1, how='all')
1541+
expected = df.ix[:, [0, 1, 3]]
1542+
assert_frame_equal(dropped, expected)
1543+
1544+
def test_dropna_corner(self):
1545+
# bad input
1546+
self.assertRaises(ValueError, self.frame.dropna, how='foo')
1547+
self.assertRaises(ValueError, self.frame.dropna, how=None)
1548+
15041549
def test_fillna(self):
15051550
self.tsframe['A'][:5] = nan
15061551
self.tsframe['A'][-5:] = nan

pandas/tests/test_multilevel.py

+67-3
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,35 @@ def setUp(self):
3535
self.frame = DataFrame(np.random.randn(10, 3), index=index,
3636
columns=['A', 'B', 'C'])
3737

38+
tm.N = 100
3839
self.tdf = tm.makeTimeDataFrame()
3940
self.ymd = self.tdf.groupby([lambda x: x.year, lambda x: x.month,
4041
lambda x: x.day]).sum()
4142

43+
def test_pickle(self):
44+
import cPickle
45+
def _test_roundtrip(frame):
46+
pickled = cPickle.dumps(frame)
47+
unpickled = cPickle.loads(pickled)
48+
assert_frame_equal(frame, unpickled)
49+
50+
_test_roundtrip(self.frame)
51+
_test_roundtrip(self.frame.T)
52+
_test_roundtrip(self.ymd)
53+
_test_roundtrip(self.ymd.T)
54+
55+
def test_repr_to_string(self):
56+
repr(self.frame)
57+
repr(self.ymd)
58+
repr(self.frame.T)
59+
repr(self.ymd.T)
60+
61+
buf = StringIO()
62+
self.frame.toString(buf=buf)
63+
self.ymd.toString(buf=buf)
64+
self.frame.T.toString(buf=buf)
65+
self.ymd.T.toString(buf=buf)
66+
4267
def test_getitem_simple(self):
4368
df = self.frame.T
4469

@@ -80,9 +105,12 @@ def test_getitem_toplevel(self):
80105
assert_frame_equal(result, expected)
81106

82107
result = df['bar']
108+
result2 = df.ix[:, 'bar']
109+
83110
expected = df.reindex(columns=df.columns[3:5])
84111
expected.columns = expected.columns.droplevel(0)
85112
assert_frame_equal(result, expected)
113+
assert_frame_equal(result, result2)
86114

87115
def test_getitem_partial(self):
88116
ymd = self.ymd.T
@@ -92,11 +120,47 @@ def test_getitem_partial(self):
92120
expected.columns = expected.columns.droplevel(0).droplevel(0)
93121
assert_frame_equal(result, expected)
94122

123+
def test_setitem_change_dtype(self):
124+
dft = self.frame.T
125+
s = dft['foo', 'two']
126+
dft['foo', 'two'] = s > s.median()
127+
assert_series_equal(dft['foo', 'two'], s > s.median())
128+
self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex))
129+
130+
reindexed = dft.reindex(columns=[('foo', 'two')])
131+
assert_series_equal(reindexed['foo', 'two'], s > s.median())
132+
95133
def test_fancy_slice_partial(self):
96-
pass
134+
result = self.frame.ix['bar':'baz']
135+
expected = self.frame[3:7]
136+
assert_frame_equal(result, expected)
97137

98-
def test_fancy_select_toplevel(self):
99-
pass
138+
result = self.ymd.ix[(2000,2):(2000,4)]
139+
lev = self.ymd.index.labels[1]
140+
expected = self.ymd[(lev >= 1) & (lev <= 3)]
141+
assert_frame_equal(result, expected)
142+
143+
def test_sortlevel(self):
144+
df = self.frame.copy()
145+
df.index = np.arange(len(df))
146+
self.assertRaises(Exception, df.sortlevel, 0)
147+
148+
# axis=1
149+
150+
def test_sortlevel_mixed(self):
151+
sorted_before = self.frame.sortlevel(1)
152+
153+
df = self.frame.copy()
154+
df['foo'] = 'bar'
155+
sorted_after = df.sortlevel(1)
156+
assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1))
157+
158+
dft = self.frame.T
159+
sorted_before = dft.sortlevel(1, axis=1)
160+
dft['foo', 'three'] = 'bar'
161+
162+
sorted_after = dft.sortlevel(1, axis=1)
163+
assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1))
100164

101165
def test_alignment(self):
102166
pass

0 commit comments

Comments
 (0)