Skip to content

Commit 06dd4d8

Browse files
author
Artemy Kolchinsky
committed
BUG: Make .iloc and .loc indexing consistent on empty dataframes
Tests Fix Test reorder Doc update Tests fix Tests fix SQL tests fix
1 parent 76571d0 commit 06dd4d8

File tree

9 files changed

+54
-24
lines changed

9 files changed

+54
-24
lines changed

doc/source/whatsnew/v0.16.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ Bug Fixes
206206

207207
- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)
208208

209-
209+
- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`)
210210

211211

212212

pandas/core/frame.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1741,17 +1741,19 @@ def _ixs(self, i, axis=0):
17411741
lab_slice = slice(label[0], label[-1])
17421742
return self.ix[:, lab_slice]
17431743
else:
1744-
label = self.columns[i]
17451744
if isinstance(label, Index):
17461745
return self.take(i, axis=1, convert=True)
17471746

1747+
index_len = len(self.index)
1748+
17481749
# if the values returned are not the same length
17491750
# as the index (iow a not found value), iget returns
17501751
# a 0-len ndarray. This is effectively catching
17511752
# a numpy error (as numpy should really raise)
17521753
values = self._data.iget(i)
1753-
if not len(values):
1754-
values = np.array([np.nan] * len(self.index), dtype=object)
1754+
1755+
if index_len and not len(values):
1756+
values = np.array([np.nan] * index_len, dtype=object)
17551757
result = self._constructor_sliced.from_array(
17561758
values, index=self.index,
17571759
name=label, fastpath=True)

pandas/io/tests/test_json/test_pandas.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -324,12 +324,12 @@ def test_frame_to_json_except(self):
324324
def test_frame_empty(self):
325325
df = DataFrame(columns=['jim', 'joe'])
326326
self.assertFalse(df._is_mixed_type)
327-
assert_frame_equal(read_json(df.to_json()), df)
327+
assert_frame_equal(read_json(df.to_json()), df, check_dtype=False)
328328

329329
# mixed type
330330
df['joe'] = df['joe'].astype('i8')
331331
self.assertTrue(df._is_mixed_type)
332-
assert_frame_equal(read_json(df.to_json()), df)
332+
assert_frame_equal(read_json(df.to_json()), df, check_dtype=False)
333333

334334
def test_v12_compat(self):
335335
df = DataFrame(

pandas/io/tests/test_sql.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1256,10 +1256,14 @@ def test_transactions(self):
12561256
self._transaction_test()
12571257

12581258
def test_get_schema_create_table(self):
1259-
self._load_test2_data()
1259+
# Use a dataframe without a bool column, since MySQL converts bool to
1260+
# TINYINT (which read_sql_table returns as an int and causes a dtype
1261+
# mismatch)
1262+
1263+
self._load_test3_data()
12601264
tbl = 'test_get_schema_create_table'
1261-
create_sql = sql.get_schema(self.test_frame2, tbl, con=self.conn)
1262-
blank_test_df = self.test_frame2.iloc[:0]
1265+
create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn)
1266+
blank_test_df = self.test_frame3.iloc[:0]
12631267

12641268
self.drop_table(tbl)
12651269
self.conn.execute(create_sql)

pandas/stats/tests/test_moments.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,7 @@ def _non_null_values(x):
862862
if mock_mean:
863863
# check that mean equals mock_mean
864864
expected = mock_mean(x)
865-
assert_equal(mean_x, expected)
865+
assert_equal(mean_x, expected.astype('float64'))
866866

867867
# check that correlation of a series with itself is either 1 or NaN
868868
corr_x_x = corr(x, x)
@@ -1549,6 +1549,7 @@ def test_moment_functions_zero_length(self):
15491549
df1_expected = df1
15501550
df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns)
15511551
df2 = DataFrame(columns=['a'])
1552+
df2['a'] = df2['a'].astype('float64')
15521553
df2_expected = df2
15531554
df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns)
15541555

pandas/tests/test_frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11810,7 +11810,7 @@ def test_mode(self):
1181011810
pd.DataFrame({"A": [12]}))
1181111811
assert_frame_equal(df[["D"]].mode(),
1181211812
pd.DataFrame(pd.Series([], dtype="int64"),
11813-
columns=["D"]))
11813+
columns=["D"]), check_dtype=False)
1181411814
assert_frame_equal(df[["E"]].mode(),
1181511815
pd.DataFrame(pd.Series([1, 3, 8], dtype="int64"),
1181611816
columns=["E"]))

pandas/tests/test_groupby.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1720,10 +1720,10 @@ def test_groupby_head_tail(self):
17201720
assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))
17211721

17221722
empty_not_as = DataFrame(columns=df.columns)
1723-
assert_frame_equal(empty_not_as, g_not_as.head(0))
1724-
assert_frame_equal(empty_not_as, g_not_as.tail(0))
1725-
assert_frame_equal(empty_not_as, g_not_as.head(-1))
1726-
assert_frame_equal(empty_not_as, g_not_as.tail(-1))
1723+
assert_frame_equal(empty_not_as, g_not_as.head(0), check_dtype=False)
1724+
assert_frame_equal(empty_not_as, g_not_as.tail(0), check_dtype=False)
1725+
assert_frame_equal(empty_not_as, g_not_as.head(-1), check_dtype=False)
1726+
assert_frame_equal(empty_not_as, g_not_as.tail(-1), check_dtype=False)
17271727

17281728
assert_frame_equal(df, g_not_as.head(7)) # contains all
17291729
assert_frame_equal(df, g_not_as.tail(7))
@@ -1735,10 +1735,10 @@ def test_groupby_head_tail(self):
17351735
assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))
17361736

17371737
empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
1738-
assert_frame_equal(empty_as, g_as.head(0))
1739-
assert_frame_equal(empty_as, g_as.tail(0))
1740-
assert_frame_equal(empty_as, g_as.head(-1))
1741-
assert_frame_equal(empty_as, g_as.tail(-1))
1738+
assert_frame_equal(empty_as, g_as.head(0), check_dtype=False)
1739+
assert_frame_equal(empty_as, g_as.tail(0), check_dtype=False)
1740+
assert_frame_equal(empty_as, g_as.head(-1), check_dtype=False)
1741+
assert_frame_equal(empty_as, g_as.tail(-1), check_dtype=False)
17421742

17431743
assert_frame_equal(df_as, g_as.head(7)) # contains all
17441744
assert_frame_equal(df_as, g_as.tail(7))

pandas/tests/test_indexing.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,7 @@ def test_loc_setitem_consistency(self):
10631063

10641064
# empty (essentially noops)
10651065
expected = DataFrame(columns=['x', 'y'])
1066+
expected['x'] = expected['x'].astype(np.int64)
10661067
df = DataFrame(columns=['x', 'y'])
10671068
df.loc[:, 'x'] = 1
10681069
assert_frame_equal(df,expected)
@@ -3369,7 +3370,7 @@ def f():
33693370
expected = DataFrame(columns=['foo'])
33703371
def f():
33713372
df = DataFrame()
3372-
df['foo'] = Series([])
3373+
df['foo'] = Series([], dtype='object')
33733374
return df
33743375
assert_frame_equal(f(), expected)
33753376
def f():
@@ -3379,17 +3380,20 @@ def f():
33793380
assert_frame_equal(f(), expected)
33803381
def f():
33813382
df = DataFrame()
3382-
df['foo'] = Series(range(len(df)))
3383+
df['foo'] = df.index
33833384
return df
33843385
assert_frame_equal(f(), expected)
3386+
3387+
expected = DataFrame(columns=['foo'])
3388+
expected['foo'] = expected['foo'].astype('float64')
33853389
def f():
33863390
df = DataFrame()
33873391
df['foo'] = []
33883392
return df
33893393
assert_frame_equal(f(), expected)
33903394
def f():
33913395
df = DataFrame()
3392-
df['foo'] = df.index
3396+
df['foo'] = Series(range(len(df)))
33933397
return df
33943398
assert_frame_equal(f(), expected)
33953399
def f():
@@ -3422,21 +3426,31 @@ def f():
34223426

34233427
# GH5720, GH5744
34243428
# don't create rows when empty
3429+
expected = DataFrame(columns=['A','B','New'])
3430+
expected['A'] = expected['A'].astype('int64')
3431+
expected['B'] = expected['B'].astype('float64')
3432+
expected['New'] = expected['New'].astype('float64')
34253433
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
34263434
y = df[df.A > 5]
34273435
y['New'] = np.nan
3428-
assert_frame_equal(y,DataFrame(columns=['A','B','New']))
3436+
assert_frame_equal(y,expected)
3437+
#assert_frame_equal(y,expected)
34293438

3439+
expected = DataFrame(columns=['a','b','c c','d'])
3440+
expected['d'] = expected['d'].astype('int64')
34303441
df = DataFrame(columns=['a', 'b', 'c c'])
34313442
df['d'] = 3
3432-
assert_frame_equal(df,DataFrame(columns=['a','b','c c','d']))
3443+
assert_frame_equal(df,expected)
34333444
assert_series_equal(df['c c'],Series(name='c c',dtype=object))
34343445

34353446
# reindex columns is ok
34363447
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
34373448
y = df[df.A > 5]
34383449
result = y.reindex(columns=['A','B','C'])
34393450
expected = DataFrame(columns=['A','B','C'])
3451+
expected['A'] = expected['A'].astype('int64')
3452+
expected['B'] = expected['B'].astype('float64')
3453+
expected['C'] = expected['C'].astype('float64')
34403454
assert_frame_equal(result,expected)
34413455

34423456
# GH 5756
@@ -4422,6 +4436,14 @@ def test_indexing_assignment_dict_already_exists(self):
44224436
expected.loc[5] = [9, 99]
44234437
tm.assert_frame_equal(df, expected)
44244438

4439+
def test_indexing_dtypes_on_empty(self):
4440+
df = DataFrame({'a':[1,2,3],'b':['b','b2','b3']})
4441+
df2 = df.ix[[],:]
4442+
4443+
self.assertEqual(df2.loc[:,'a'].dtype, int)
4444+
assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0])
4445+
assert_series_equal(df2.loc[:,'a'], df2.ix[:,0])
4446+
44254447

44264448

44274449
class TestCategoricalIndex(tm.TestCase):

pandas/tseries/tests/test_period.py

+1
Original file line numberDiff line numberDiff line change
@@ -2118,6 +2118,7 @@ def test_range_slice_outofbounds(self):
21182118
for idx in [didx, pidx]:
21192119
df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx)
21202120
empty = DataFrame(index=idx.__class__([], freq='D'), columns=['units'])
2121+
empty['units'] = empty['units'].astype('int64')
21212122

21222123
tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty)
21232124
tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2])

0 commit comments

Comments
 (0)