diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index d00ce29fbfe92..fdd31cd4d8e36 100755 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -217,9 +217,8 @@ Bug Fixes - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`) - Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`) - - Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`) - +- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`) - Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 341e129b22212..01b0d65e055df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1740,17 +1740,19 @@ def _ixs(self, i, axis=0): lab_slice = slice(label[0], label[-1]) return self.ix[:, lab_slice] else: - label = self.columns[i] if isinstance(label, Index): return self.take(i, axis=1, convert=True) + index_len = len(self.index) + # if the values returned are not the same length # as the index (iow a not found value), iget returns # a 0-len ndarray. This is effectively catching # a numpy error (as numpy should really raise) values = self._data.iget(i) - if not len(values): - values = np.array([np.nan] * len(self.index), dtype=object) + + if index_len and not len(values): + values = np.array([np.nan] * index_len, dtype=object) result = self._constructor_sliced.from_array( values, index=self.index, name=label, fastpath=True) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 1e8ce7afa9492..26fae0717f956 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -324,12 +324,14 @@ def test_frame_to_json_except(self): def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) self.assertFalse(df._is_mixed_type) - assert_frame_equal(read_json(df.to_json()), df) + assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df) + def test_frame_empty_mixedtype(self): # mixed type + df = DataFrame(columns=['jim', 'joe']) df['joe'] = df['joe'].astype('i8') self.assertTrue(df._is_mixed_type) - assert_frame_equal(read_json(df.to_json()), df) + assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df) def test_v12_compat(self): df = DataFrame( diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index f3f00862054e4..fa7debeb228ce 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1256,10 +1256,14 @@ def test_transactions(self): self._transaction_test() def test_get_schema_create_table(self): - self._load_test2_data() + # Use a dataframe without a bool column, since MySQL converts bool to + # TINYINT (which read_sql_table returns as an int and causes a dtype + # mismatch) + + self._load_test3_data() tbl = 'test_get_schema_create_table' - create_sql = sql.get_schema(self.test_frame2, tbl, con=self.conn) - blank_test_df = self.test_frame2.iloc[:0] + create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn) + blank_test_df = self.test_frame3.iloc[:0] self.drop_table(tbl) self.conn.execute(create_sql) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index fe64937d292c9..445530bc5b00c 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -862,7 +862,7 @@ def _non_null_values(x): if mock_mean: # check that mean equals mock_mean expected = mock_mean(x) - assert_equal(mean_x, expected) + assert_equal(mean_x, expected.astype('float64')) # check that correlation of a series with itself is either 1 or NaN corr_x_x = corr(x, x) @@ -1550,6 +1550,7 @@ def test_moment_functions_zero_length(self): df1_expected = df1 df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns) df2 = DataFrame(columns=['a']) + df2['a'] = df2['a'].astype('float64') df2_expected = df2 df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 004c49005ca0e..cf7523b34595a 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11864,12 +11864,10 @@ def test_mode(self): "E": [8, 8, 1, 1, 3, 3]}) assert_frame_equal(df[["A"]].mode(), pd.DataFrame({"A": [12]})) - assert_frame_equal(df[["D"]].mode(), - pd.DataFrame(pd.Series([], dtype="int64"), - columns=["D"])) - assert_frame_equal(df[["E"]].mode(), - pd.DataFrame(pd.Series([1, 3, 8], dtype="int64"), - columns=["E"])) + expected = pd.Series([], dtype='int64', name='D').to_frame() + assert_frame_equal(df[["D"]].mode(), expected) + expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() + assert_frame_equal(df[["E"]].mode(), expected) assert_frame_equal(df[["A", "B"]].mode(), pd.DataFrame({"A": [12], "B": [10.]})) assert_frame_equal(df.mode(), diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index e3331b7423f2c..0e64d27649d80 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1728,6 +1728,8 @@ def test_groupby_head_tail(self): assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) empty_not_as = DataFrame(columns=df.columns) + empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype) + empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype) assert_frame_equal(empty_not_as, g_not_as.head(0)) assert_frame_equal(empty_not_as, g_not_as.tail(0)) assert_frame_equal(empty_not_as, g_not_as.head(-1)) @@ -1743,6 +1745,8 @@ def test_groupby_head_tail(self): assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) empty_as = DataFrame(index=df_as.index[:0], columns=df.columns) + empty_as['A'] = empty_not_as['A'].astype(df.A.dtype) + empty_as['B'] = empty_not_as['B'].astype(df.B.dtype) assert_frame_equal(empty_as, g_as.head(0)) assert_frame_equal(empty_as, g_as.tail(0)) assert_frame_equal(empty_as, g_as.head(-1)) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 19ed799853ed4..ece690ad21bcc 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1063,6 +1063,7 @@ def test_loc_setitem_consistency(self): # empty (essentially noops) expected = DataFrame(columns=['x', 'y']) + expected['x'] = expected['x'].astype(np.int64) df = DataFrame(columns=['x', 'y']) df.loc[:, 'x'] = 1 assert_frame_equal(df,expected) @@ -3376,7 +3377,7 @@ def f(): expected = DataFrame(columns=['foo']) def f(): df = DataFrame() - df['foo'] = Series([]) + df['foo'] = Series([], dtype='object') return df assert_frame_equal(f(), expected) def f(): @@ -3386,9 +3387,12 @@ def f(): assert_frame_equal(f(), expected) def f(): df = DataFrame() - df['foo'] = Series(range(len(df))) + df['foo'] = df.index return df assert_frame_equal(f(), expected) + + expected = DataFrame(columns=['foo']) + expected['foo'] = expected['foo'].astype('float64') def f(): df = DataFrame() df['foo'] = [] @@ -3396,7 +3400,7 @@ def f(): assert_frame_equal(f(), expected) def f(): df = DataFrame() - df['foo'] = df.index + df['foo'] = Series(range(len(df))) return df assert_frame_equal(f(), expected) def f(): @@ -3429,14 +3433,21 @@ def f(): # GH5720, GH5744 # don't create rows when empty + expected = DataFrame(columns=['A','B','New']) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['New'] = expected['New'].astype('float64') df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] y['New'] = np.nan - assert_frame_equal(y,DataFrame(columns=['A','B','New'])) + assert_frame_equal(y,expected) + #assert_frame_equal(y,expected) + expected = DataFrame(columns=['a','b','c c','d']) + expected['d'] = expected['d'].astype('int64') df = DataFrame(columns=['a', 'b', 'c c']) df['d'] = 3 - assert_frame_equal(df,DataFrame(columns=['a','b','c c','d'])) + assert_frame_equal(df,expected) assert_series_equal(df['c c'],Series(name='c c',dtype=object)) # reindex columns is ok @@ -3444,6 +3455,9 @@ def f(): y = df[df.A > 5] result = y.reindex(columns=['A','B','C']) expected = DataFrame(columns=['A','B','C']) + expected['A'] = expected['A'].astype('int64') + expected['B'] = expected['B'].astype('float64') + expected['C'] = expected['C'].astype('float64') assert_frame_equal(result,expected) # GH 5756 @@ -4429,6 +4443,15 @@ def test_indexing_assignment_dict_already_exists(self): expected.loc[5] = [9, 99] tm.assert_frame_equal(df, expected) + def test_indexing_dtypes_on_empty(self): + # Check that .iloc and .ix return correct dtypes GH9983 + df = DataFrame({'a':[1,2,3],'b':['b','b2','b3']}) + df2 = df.ix[[],:] + + self.assertEqual(df2.loc[:,'a'].dtype, int) + assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0]) + assert_series_equal(df2.loc[:,'a'], df2.ix[:,0]) + class TestCategoricalIndex(tm.TestCase): diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 642e50c37874d..cc0a0ea5662db 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -215,6 +215,14 @@ def test_multiindex_dtype(self): {'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b']) self._assert_not_equal(df1, df2, check_index_type=True) + def test_empty_dtypes(self): + df1=pd.DataFrame(columns=["col1","col2"]) + df1["col1"] = df1["col1"].astype('int64') + df2=pd.DataFrame(columns=["col1","col2"]) + self._assert_equal(df1, df2, check_dtype=False) + self._assert_not_equal(df1, df2, check_dtype=True) + + class TestRNGContext(unittest.TestCase): def test_RNGContext(self): diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 23785598783ea..70c706fc66398 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -2118,6 +2118,7 @@ def test_range_slice_outofbounds(self): for idx in [didx, pidx]: df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) empty = DataFrame(index=idx.__class__([], freq='D'), columns=['units']) + empty['units'] = empty['units'].astype('int64') tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty) tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2])