From 1a1baa224c23bd3143ed5a839f19ea6fb34e1c45 Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Mon, 20 May 2019 19:37:58 +0530 Subject: [PATCH 1/9] 1. Fix bug where list like object not returning empty Index. 2. Edit v0.25.0.rst to update changelog. --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb3275c27a4ac..57ca18cb69bca 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3402,7 +3402,7 @@ def _ensure_valid_index(self, value): passed value. """ # GH5632, make sure that we are a Series convertible - if not len(self.index) and is_list_like(value): + if not len(self.index) and len(value) and is_list_like(value): try: value = Series(value) except (ValueError, NotImplementedError, TypeError): From 47fb3444e8c4eb1521221b19281e1afabb8ab9b6 Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Mon, 20 May 2019 20:12:47 +0530 Subject: [PATCH 2/9] Added test for ensuring empty df has an empty index. --- pandas/core/frame.py | 2 +- pandas/tests/indexing/test_indexing.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 57ca18cb69bca..35ced3230cc98 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3402,7 +3402,7 @@ def _ensure_valid_index(self, value): passed value. """ # GH5632, make sure that we are a Series convertible - if not len(self.index) and len(value) and is_list_like(value): + if not len(self.index) and is_list_like(value) and len(value): try: value = Series(value) except (ValueError, NotImplementedError, TypeError): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a0e3df182b129..e05a6e0d092c8 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -717,6 +717,11 @@ def test_index_type_coercion(self): idxr(s2)['0'] = 0 assert s2.index.is_object() + def test_empty_index_for_empty_dataframe(self): + empty_df = pd.DataFrame(columns=['A']) + df_with_empty_value = pd.DataFrame(columns=['A'], data=[]) + assert df_with_empty_value.index.dtype == empty_df.index.dtype + class TestMisc(Base): From 3029d86499f05dc7e3ba5970fe464e6e75e62471 Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Tue, 21 May 2019 00:49:48 +0530 Subject: [PATCH 3/9] 1. Add documentation under right subsection. 2. Use proper test utils for test func. --- pandas/tests/indexing/test_indexing.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e05a6e0d092c8..ddc1c2d870b5b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -718,9 +718,11 @@ def test_index_type_coercion(self): assert s2.index.is_object() def test_empty_index_for_empty_dataframe(self): - empty_df = pd.DataFrame(columns=['A']) - df_with_empty_value = pd.DataFrame(columns=['A'], data=[]) - assert df_with_empty_value.index.dtype == empty_df.index.dtype + index=pd.Index([], name='idx') + df = pd.DataFrame(columns=['A'], index=index) + df2 = df.copy() + df['A'] = [] + tm.assert_index_equal(df.index, df2.index) class TestMisc(Base): From 830ca39a00f97b5ae2cebfde74c3e68f8b0acfe3 Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Tue, 21 May 2019 10:12:14 +0530 Subject: [PATCH 4/9] Fix test as empty df should return an empty index. --- pandas/tests/indexing/test_partial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index f2696f282c2c4..355e463a54e8e 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -451,7 +451,7 @@ def test_partial_set_empty_frame(self): # these work as they don't really change # anything but the index # GH5632 - expected = DataFrame(columns=['foo'], index=Index([], dtype='int64')) + expected = DataFrame(columns=['foo'], index=Index([])) def f(): df = DataFrame() From f2557a33ae6ed14a22464967213c8d61f54f9fad Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Mon, 10 Jun 2019 16:01:32 +0530 Subject: [PATCH 5/9] 1. Use proper variable names in tests. 2. Moved changelog to correct subsection. 3. Update another failing test --- pandas/tests/indexing/test_indexing.py | 10 +++++----- pandas/tests/indexing/test_partial.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ddc1c2d870b5b..fd0912d0373da 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -718,11 +718,11 @@ def test_index_type_coercion(self): assert s2.index.is_object() def test_empty_index_for_empty_dataframe(self): - index=pd.Index([], name='idx') - df = pd.DataFrame(columns=['A'], index=index) - df2 = df.copy() - df['A'] = [] - tm.assert_index_equal(df.index, df2.index) + index = pd.Index([], name='idx') + result = pd.DataFrame(columns=['A'], index=index) + result['A'] = [] + expected = pd.DataFrame(columns=['A'], index=index) + tm.assert_index_equal(result.index, expected.index) class TestMisc(Base): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 355e463a54e8e..cac3b7ed8bcfd 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -451,7 +451,7 @@ def test_partial_set_empty_frame(self): # these work as they don't really change # anything but the index # GH5632 - expected = DataFrame(columns=['foo'], index=Index([])) + expected = DataFrame(columns=['foo'], index=Index([], dtype='object')) def f(): df = DataFrame() From 4a3822f4052d75a66405ac3c1758702fce46d16d Mon Sep 17 00:00:00 2001 From: Sumanau Sareen Date: Mon, 10 Jun 2019 17:52:12 +0530 Subject: [PATCH 6/9] Fix failing tests, due to change in behaviour --- pandas/tests/indexing/test_partial.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index cac3b7ed8bcfd..a6fd6a81b736b 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -454,7 +454,7 @@ def test_partial_set_empty_frame(self): expected = DataFrame(columns=['foo'], index=Index([], dtype='object')) def f(): - df = DataFrame() + df = DataFrame(index=Index([], dtype='object')) df['foo'] = Series([], dtype='object') return df @@ -478,22 +478,21 @@ def f(): expected['foo'] = expected['foo'].astype('float64') def f(): - df = DataFrame() + df = DataFrame(index=Index([], dtype='int64')) df['foo'] = [] return df tm.assert_frame_equal(f(), expected) def f(): - df = DataFrame() + df = DataFrame(index=Index([], dtype='int64')) df['foo'] = Series(np.arange(len(df)), dtype='float64') return df tm.assert_frame_equal(f(), expected) def f(): - df = DataFrame() - tm.assert_index_equal(df.index, Index([], dtype='object')) + df = DataFrame(index=Index([], dtype='int64')) df['foo'] = range(len(df)) return df From 5b7f68d4e6d56572954ef0473828698d6b4a693d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 20 Aug 2019 14:10:25 -0500 Subject: [PATCH 7/9] restore master --- pandas/core/frame.py | 232 ++++++++++++++++++++++--------------------- 1 file changed, 118 insertions(+), 114 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e843bef18d87..1be7e0736f9fe 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2968,6 +2968,90 @@ def _get_value(self, index, col, takeable: bool = False): index = self.index.get_loc(index) return self._get_value(index, col, takeable=True) + def __setitem__(self, key, value): + key = com.apply_if_callable(key, self) + + # see if we can slice the rows + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._setitem_slice(indexer, value) + + if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: + self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) + else: + # set column + self._set_item(key, value) + + def _setitem_slice(self, key, value): + self._check_setitem_copy() + self.loc[key] = value + + def _setitem_array(self, key, value): + # also raises Exception if object array with NA values + if com.is_bool_indexer(key): + if len(key) != len(self.index): + raise ValueError( + "Item wrong length %d instead of %d!" % (len(key), len(self.index)) + ) + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + self._check_setitem_copy() + self.loc._setitem_with_indexer(indexer, value) + else: + if isinstance(value, DataFrame): + if len(value.columns) != len(key): + raise ValueError("Columns must be same length as key") + for k1, k2 in zip(key, value.columns): + self[k1] = value[k2] + else: + indexer = self.loc._get_listlike_indexer( + key, axis=1, raise_missing=False + )[1] + self._check_setitem_copy() + self.loc._setitem_with_indexer((slice(None), indexer), value) + + def _setitem_frame(self, key, value): + # support boolean setting with DataFrame input, e.g. + # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + key = self._constructor(key, **self._construct_axes_dict()) + + if key.values.size and not is_bool_dtype(key.values): + raise TypeError( + "Must pass DataFrame or 2-d ndarray with boolean values only" + ) + + self._check_inplace_setting(value) + self._check_setitem_copy() + self._where(-key, value, inplace=True) + + def _set_item(self, key, value): + """ + Add series to DataFrame in specified column. + + If series is a numpy-array (not a Series/TimeSeries), it must be the + same length as the DataFrames index or an error will be thrown. + + Series/TimeSeries will be conformed to the DataFrames index to + ensure homogeneity. + """ + + self._ensure_valid_index(value) + value = self._sanitize_column(key, value) + NDFrame._set_item(self, key, value) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + def _set_value(self, index, col, value, takeable: bool = False): """ Put single value at passed column and index. @@ -3005,6 +3089,40 @@ def _set_value(self, index, col, value, takeable: bool = False): return self + def _ensure_valid_index(self, value): + """ + Ensure that if we don't have an index, that we can create one from the + passed value. + """ + # GH5632, make sure that we are a Series convertible + if not len(self.index) and is_list_like(value): + try: + value = Series(value) + except (ValueError, NotImplementedError, TypeError): + raise ValueError( + "Cannot set a frame with no defined index " + "and a value that cannot be converted to a " + "Series" + ) + + self._data = self._data.reindex_axis( + value.index.copy(), axis=1, fill_value=np.nan + ) + + def _box_item_values(self, key, values): + items = self.columns[self.columns.get_loc(key)] + if values.ndim == 2: + return self._constructor(values.T, columns=items, index=self.index) + else: + return self._box_col_values(values, items) + + def _box_col_values(self, values, items): + """ + Provide boxed values for a column. + """ + klass = self._constructor_sliced + return klass(values, index=self.index, name=items, fastpath=True) + # ---------------------------------------------------------------------- # Unsorted @@ -3378,120 +3496,6 @@ def is_dtype_instance_mapper(idx, dtype): dtype_indexer = include_these & exclude_these return self.loc[_get_info_slice(self, dtype_indexer)] - def _box_item_values(self, key, values): - items = self.columns[self.columns.get_loc(key)] - if values.ndim == 2: - return self._constructor(values.T, columns=items, index=self.index) - else: - return self._box_col_values(values, items) - - def _box_col_values(self, values, items): - """ - Provide boxed values for a column. - """ - klass = self._constructor_sliced - return klass(values, index=self.index, name=items, fastpath=True) - - def __setitem__(self, key, value): - key = com.apply_if_callable(key, self) - - # see if we can slice the rows - indexer = convert_to_index_sliceable(self, key) - if indexer is not None: - return self._setitem_slice(indexer, value) - - if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: - self._setitem_frame(key, value) - elif isinstance(key, (Series, np.ndarray, list, Index)): - self._setitem_array(key, value) - else: - # set column - self._set_item(key, value) - - def _setitem_slice(self, key, value): - self._check_setitem_copy() - self.loc._setitem_with_indexer(key, value) - - def _setitem_array(self, key, value): - # also raises Exception if object array with NA values - if com.is_bool_indexer(key): - if len(key) != len(self.index): - raise ValueError( - "Item wrong length %d instead of %d!" % (len(key), len(self.index)) - ) - key = check_bool_indexer(self.index, key) - indexer = key.nonzero()[0] - self._check_setitem_copy() - self.loc._setitem_with_indexer(indexer, value) - else: - if isinstance(value, DataFrame): - if len(value.columns) != len(key): - raise ValueError("Columns must be same length as key") - for k1, k2 in zip(key, value.columns): - self[k1] = value[k2] - else: - indexer = self.loc._convert_to_indexer(key, axis=1) - self._check_setitem_copy() - self.loc._setitem_with_indexer((slice(None), indexer), value) - - def _setitem_frame(self, key, value): - # support boolean setting with DataFrame input, e.g. - # df[df > df2] = 0 - if isinstance(key, np.ndarray): - if key.shape != self.shape: - raise ValueError("Array conditional must be same shape as self") - key = self._constructor(key, **self._construct_axes_dict()) - - if key.values.size and not is_bool_dtype(key.values): - raise TypeError( - "Must pass DataFrame or 2-d ndarray with boolean values only" - ) - - self._check_inplace_setting(value) - self._check_setitem_copy() - self._where(-key, value, inplace=True) - - def _ensure_valid_index(self, value): - """ - Ensure that if we don't have an index, that we can create one from the - passed value. - """ - # GH5632, make sure that we are a Series convertible - if not len(self.index) and is_list_like(value) and len(value): - try: - value = Series(value) - except (ValueError, NotImplementedError, TypeError): - raise ValueError( - "Cannot set a frame with no defined index " - "and a value that cannot be converted to a " - "Series" - ) - - self._data = self._data.reindex_axis( - value.index.copy(), axis=1, fill_value=np.nan - ) - - def _set_item(self, key, value): - """ - Add series to DataFrame in specified column. - - If series is a numpy-array (not a Series/TimeSeries), it must be the - same length as the DataFrames index or an error will be thrown. - - Series/TimeSeries will be conformed to the DataFrames index to - ensure homogeneity. - """ - - self._ensure_valid_index(value) - value = self._sanitize_column(key, value) - NDFrame._set_item(self, key, value) - - # check if we are modifying a copy - # try to set first as we want an invalid - # value exception to occur first - if len(self): - self._check_setitem_copy() - def insert(self, loc, column, value, allow_duplicates=False): """ Insert column into DataFrame at specified location. From e9c5553b605e3226584a28865f6a10aeb90a4f8a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 20 Aug 2019 14:10:47 -0500 Subject: [PATCH 8/9] readd fix --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1be7e0736f9fe..7d71cb22f49ed 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3095,7 +3095,7 @@ def _ensure_valid_index(self, value): passed value. """ # GH5632, make sure that we are a Series convertible - if not len(self.index) and is_list_like(value): + if not len(self.index) and is_list_like(value) and len(value): try: value = Series(value) except (ValueError, NotImplementedError, TypeError): From 25a269b3c9ccd214d53e40486ced1503cc6f06a1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 20 Aug 2019 14:14:10 -0500 Subject: [PATCH 9/9] review comments --- pandas/tests/frame/test_indexing.py | 8 ++++++++ pandas/tests/indexing/test_indexing.py | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ae14563e5952a..a78b2ab7d1c4c 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -821,6 +821,14 @@ def test_setitem_empty_frame_with_boolean(self, dtype, kwargs): df[df > df2] = 47 assert_frame_equal(df, df2) + def test_setitem_with_empty_listlike(self): + # GH #17101 + index = pd.Index([], name="idx") + result = pd.DataFrame(columns=["A"], index=index) + result["A"] = [] + expected = pd.DataFrame(columns=["A"], index=index) + tm.assert_index_equal(result.index, expected.index) + def test_setitem_scalars_no_index(self): # GH16823 / 17894 df = DataFrame() diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 5ec33ffe79986..e375bd459e66f 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -799,13 +799,6 @@ def test_index_type_coercion(self): idxr(s2)["0"] = 0 assert s2.index.is_object() - def test_empty_index_for_empty_dataframe(self): - index = pd.Index([], name="idx") - result = pd.DataFrame(columns=["A"], index=index) - result["A"] = [] - expected = pd.DataFrame(columns=["A"], index=index) - tm.assert_index_equal(result.index, expected.index) - class TestMisc(Base): def test_float_index_to_mixed(self):