From 5ab83381a914e15c1b95f750fd54fb29fe9c6500 Mon Sep 17 00:00:00 2001 From: weikhor Date: Wed, 1 Jun 2022 23:33:50 +0800 Subject: [PATCH 1/8] add test --- pandas/core/internals/construction.py | 4 ++-- pandas/tests/frame/test_constructors.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7a5db56cb48fe..9b3fb4ec3c3ba 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -345,8 +345,8 @@ def ndarray_to_mgr( index, columns = _get_axes( values.shape[0], values.shape[1], index=index, columns=columns ) - - _check_values_indices_shape_match(values, index, columns) + if len(values) != 0: + _check_values_indices_shape_match(values, index, columns) if typ == "array": diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e62c050fbf812..d5adad85ca18d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3086,3 +3086,10 @@ def test_tzaware_data_tznaive_dtype(self, constructor): assert np.all(result.dtypes == "M8[ns]") assert np.all(result == ts_naive) + + @pytest.mark.parametrize("data", [[], np.array([])]) + def test_construction_empty_dataframe(self, data): + # GH#46822 + result = DataFrame(data=data, columns=["a", "b"]) + assert list(result.values) == [] + tm.assert_index_equal(result.columns, Index(["a", "b"])) From 0eaf831a3cce80d2eca0e446e5b41d391ecbac08 Mon Sep 17 00:00:00 2001 From: weikhor Date: Thu, 2 Jun 2022 01:11:36 +0800 Subject: [PATCH 2/8] test --- pandas/core/internals/construction.py | 14 +++++++++----- pandas/tests/frame/test_constructors.py | 5 ----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 9b3fb4ec3c3ba..49e650aba4a3a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -326,7 +326,8 @@ def ndarray_to_mgr( else: # by definition an array here # the dtypes will be coerced to a single dtype - values = _prep_ndarray(values, copy=copy_on_sanitize) + values = _prep_ndarray(values, columns, copy=copy_on_sanitize) + # values = values if dtype is not None and not is_dtype_equal(values.dtype, dtype): # GH#40110 see similar check inside sanitize_array @@ -345,8 +346,8 @@ def ndarray_to_mgr( index, columns = _get_axes( values.shape[0], values.shape[1], index=index, columns=columns ) - if len(values) != 0: - _check_values_indices_shape_match(values, index, columns) + + _check_values_indices_shape_match(values, index, columns) if typ == "array": @@ -537,7 +538,7 @@ def treat_as_nested(data) -> bool: # --------------------------------------------------------------------- -def _prep_ndarray(values, copy: bool = True) -> np.ndarray: +def _prep_ndarray(values, columns, copy: bool = True) -> np.ndarray: if isinstance(values, TimedeltaArray) or ( isinstance(values, DatetimeArray) and values.tz is None ): @@ -577,7 +578,10 @@ def convert(v): values = np.array(values, copy=copy) if values.ndim == 1: - values = values.reshape((values.shape[0], 1)) + if len(values) == 0: + values = values.reshape((0, len(columns))) + else: + values = values.reshape((values.shape[0], 1)) elif values.ndim != 2: raise ValueError(f"Must pass 2-d input. shape={values.shape}") diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d5adad85ca18d..6c7827bd471fe 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -621,11 +621,6 @@ def test_constructor_2d_index(self): tm.assert_frame_equal(df, expected) def test_constructor_error_msgs(self): - msg = "Empty data passed with indices specified." - # passing an empty array with columns specified. - with pytest.raises(ValueError, match=msg): - DataFrame(np.empty(0), columns=list("abc")) - msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size with pytest.raises(ValueError, match=msg): From a5b36db6ae4e0b6320d644e4cc60085c3d2a279b Mon Sep 17 00:00:00 2001 From: weikhor Date: Thu, 2 Jun 2022 20:18:03 +0800 Subject: [PATCH 3/8] test --- pandas/core/internals/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 49e650aba4a3a..cc30757670154 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -578,7 +578,7 @@ def convert(v): values = np.array(values, copy=copy) if values.ndim == 1: - if len(values) == 0: + if len(values) == 0 and columns is not None: values = values.reshape((0, len(columns))) else: values = values.reshape((values.shape[0], 1)) From 2ca726819b89b9a028979e60dfc87f743e79a5b0 Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 6 Jun 2022 16:20:06 +0800 Subject: [PATCH 4/8] add revert code --- pandas/core/internals/construction.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index cc30757670154..7a5db56cb48fe 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -326,8 +326,7 @@ def ndarray_to_mgr( else: # by definition an array here # the dtypes will be coerced to a single dtype - values = _prep_ndarray(values, columns, copy=copy_on_sanitize) - # values = values + values = _prep_ndarray(values, copy=copy_on_sanitize) if dtype is not None and not is_dtype_equal(values.dtype, dtype): # GH#40110 see similar check inside sanitize_array @@ -538,7 +537,7 @@ def treat_as_nested(data) -> bool: # --------------------------------------------------------------------- -def _prep_ndarray(values, columns, copy: bool = True) -> np.ndarray: +def _prep_ndarray(values, copy: bool = True) -> np.ndarray: if isinstance(values, TimedeltaArray) or ( isinstance(values, DatetimeArray) and values.tz is None ): @@ -578,10 +577,7 @@ def convert(v): values = np.array(values, copy=copy) if values.ndim == 1: - if len(values) == 0 and columns is not None: - values = values.reshape((0, len(columns))) - else: - values = values.reshape((values.shape[0], 1)) + values = values.reshape((values.shape[0], 1)) elif values.ndim != 2: raise ValueError(f"Must pass 2-d input. shape={values.shape}") From 7eda6ae056d38a960386001eccc0da01dcba64b1 Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 6 Jun 2022 16:21:10 +0800 Subject: [PATCH 5/8] add revert code --- pandas/tests/frame/test_constructors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6c7827bd471fe..d5adad85ca18d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -621,6 +621,11 @@ def test_constructor_2d_index(self): tm.assert_frame_equal(df, expected) def test_constructor_error_msgs(self): + msg = "Empty data passed with indices specified." + # passing an empty array with columns specified. + with pytest.raises(ValueError, match=msg): + DataFrame(np.empty(0), columns=list("abc")) + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size with pytest.raises(ValueError, match=msg): From fce3021824e968f4651cdb572dcc5908c5a43fd9 Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 6 Jun 2022 16:24:05 +0800 Subject: [PATCH 6/8] add test --- pandas/tests/frame/test_constructors.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d5adad85ca18d..73d5ea60fbe1a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3087,9 +3087,8 @@ def test_tzaware_data_tznaive_dtype(self, constructor): assert np.all(result.dtypes == "M8[ns]") assert np.all(result == ts_naive) - @pytest.mark.parametrize("data", [[], np.array([])]) def test_construction_empty_dataframe(self, data): # GH#46822 - result = DataFrame(data=data, columns=["a", "b"]) - assert list(result.values) == [] - tm.assert_index_equal(result.columns, Index(["a", "b"])) + msg = "Empty data passed with indices specified." + with pytest.raises(ValueError, match=msg): + DataFrame(data=np.array([]), columns=["a", "b"]) From 1d2310fcd4fd3637e4d53522960b3052e26dcb46 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Tue, 7 Jun 2022 00:55:38 +0800 Subject: [PATCH 7/8] Update test_constructors.py --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ea584a6394bbe..d70f126a7dd35 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3085,7 +3085,7 @@ def test_tzaware_data_tznaive_dtype(self, constructor): assert np.all(result.dtypes == "M8[ns]") assert np.all(result == ts_naive) - def test_construction_empty_dataframe(self, data): + def test_construction_empty_dataframe(self): # GH#46822 msg = "Empty data passed with indices specified." with pytest.raises(ValueError, match=msg): From 7821130f8d6ebc9b94920e63da11c25717ad14d4 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Tue, 7 Jun 2022 21:02:26 +0800 Subject: [PATCH 8/8] Update test_constructors.py --- pandas/tests/frame/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d70f126a7dd35..cbff704ac11b3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3085,7 +3085,7 @@ def test_tzaware_data_tznaive_dtype(self, constructor): assert np.all(result.dtypes == "M8[ns]") assert np.all(result == ts_naive) - def test_construction_empty_dataframe(self): + def test_construction_empty_array_multi_column_raises(self): # GH#46822 msg = "Empty data passed with indices specified." with pytest.raises(ValueError, match=msg):