From 3d1cb464461a58fa000bc012d0cc089cb48f1215 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Mon, 17 Feb 2025 19:03:54 -0500 Subject: [PATCH 01/13] modified the files according to bug#60237 --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/dtypes/cast.py | 2 +- pandas/tests/extension/test_arrow.py | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4d9a45abe17cd..d2efde6c013c8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -668,6 +668,7 @@ Conversion - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) +- Bug in :meth:`convert_dtypes` not preserving timezone details for ArrowDtype in Series and DataFrame (:issue:`60237`) Strings ^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 94531c2ac87e8..f11aefeeaaa00 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1113,7 +1113,7 @@ def convert_dtypes( else: inferred_dtype = input_array.dtype - if dtype_backend == "pyarrow": + if dtype_backend == "pyarrow" and not isinstance(inferred_dtype, ArrowDtype): from pandas.core.arrays.arrow.array import to_pyarrow_type from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d6f428f4938a6..0561ae00871cc 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3511,3 +3511,27 @@ def test_map_numeric_na_action(): result = ser.map(lambda x: 42, na_action="ignore") expected = pd.Series([42.0, 42.0, np.nan], dtype="float64") tm.assert_series_equal(result, expected) + + +def test_convert_dtype_pyarrow_timezone_preserve(): + # GH 60237 + pytest.importorskip("pyarrow") + ser = pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + result = ser.convert_dtypes(dtype_backend="pyarrow") + expected = ser.copy() + tm.assert_series_equal(result, expected) + + df = pd.DataFrame( + { + "timestamps": pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + } + ) + result = df.convert_dtypes(dtype_backend="pyarrow") + expected = df.copy() + tm.assert_frame_equal(result, expected) From 3d40bcaf3b7694ca9b9361ad51bbc10c577441db Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 18 Feb 2025 12:33:34 -0500 Subject: [PATCH 02/13] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ecab5632f59a1..9d6c00ef1dc6f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -671,7 +671,7 @@ Conversion - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) -- Bug in :meth:`convert_dtypes` not preserving timezone details for ArrowDtype in Series and DataFrame (:issue:`60237`) +- Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) Strings ^^^^^^^ From 43e97c5ca3f2048f98bec739e27ac50d69519370 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 18 Feb 2025 13:02:27 -0500 Subject: [PATCH 03/13] moved test case to frame and serier folders --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/extension/test_arrow.py | 24 ------------------- .../frame/methods/test_convert_dtypes.py | 14 +++++++++++ .../series/methods/test_convert_dtypes.py | 11 +++++++++ 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9d6c00ef1dc6f..7cd5759af4989 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -670,8 +670,8 @@ Conversion - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) -- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) +- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) Strings ^^^^^^^ diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0561ae00871cc..d6f428f4938a6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3511,27 +3511,3 @@ def test_map_numeric_na_action(): result = ser.map(lambda x: 42, na_action="ignore") expected = pd.Series([42.0, 42.0, np.nan], dtype="float64") tm.assert_series_equal(result, expected) - - -def test_convert_dtype_pyarrow_timezone_preserve(): - # GH 60237 - pytest.importorskip("pyarrow") - ser = pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), - dtype="timestamp[ns, tz=UTC][pyarrow]", - ) - result = ser.convert_dtypes(dtype_backend="pyarrow") - expected = ser.copy() - tm.assert_series_equal(result, expected) - - df = pd.DataFrame( - { - "timestamps": pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), - dtype="timestamp[ns, tz=UTC][pyarrow]", - ) - } - ) - result = df.convert_dtypes(dtype_backend="pyarrow") - expected = df.copy() - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e7f6e5d625d3e..4e99444eced42 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -196,3 +196,17 @@ def test_convert_dtypes_from_arrow(self): result = df.convert_dtypes() expected = df.astype({"a": "string[python]"}) tm.assert_frame_equal(result, expected) + + def test_convert_dtype_pyarrow_timezone_preserve(self): + # GH 60237 + df = pd.DataFrame( + { + "timestamps": pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + } + ) + result = df.convert_dtypes(dtype_backend="pyarrow") + expected = df.copy() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 90c4056a39e84..d373386108ff6 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -297,3 +297,14 @@ def test_convert_dtypes_pyarrow_null(self): result = ser.convert_dtypes(dtype_backend="pyarrow") expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) tm.assert_series_equal(result, expected) + + def test_convert_dtype_pyarrow_timezone_preserve(self): + # GH 60237 + pytest.importorskip("pyarrow") + ser = pd.Series( + pd.to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + result = ser.convert_dtypes(dtype_backend="pyarrow") + expected = ser.copy() + tm.assert_series_equal(result, expected) From 0b47d24ef03af73c264c332851e9370bfbd5ab98 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 18 Feb 2025 13:50:35 -0500 Subject: [PATCH 04/13] fix pyarrow import error --- pandas/tests/frame/methods/test_convert_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 4e99444eced42..d0f30204758d3 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -199,6 +199,7 @@ def test_convert_dtypes_from_arrow(self): def test_convert_dtype_pyarrow_timezone_preserve(self): # GH 60237 + pytest.importorskip("pyarrow") df = pd.DataFrame( { "timestamps": pd.Series( From f4922c22f25821d3f669bee73274f923a2651610 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Fri, 21 Feb 2025 20:55:24 -0500 Subject: [PATCH 05/13] inconsistent issue fix --- pandas/core/reshape/concat.py | 7 +++++-- pandas/tests/reshape/concat/test_concat.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index e7cb7069bbc26..4449e6aa096ce 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -477,18 +477,21 @@ def _sanitize_mixed_ndim( else: name = getattr(obj, "name", None) + name_none_flag = False if ignore_index or name is None: if axis == 1: # doing a row-wise concatenation so need everything # to line up - name = 0 + if name is None: + name_none_flag = True + name = 0 else: # doing a column-wise concatenation so need series # to have unique names name = current_column current_column += 1 obj = sample._constructor(obj, copy=False) - if isinstance(obj, ABCDataFrame): + if isinstance(obj, ABCDataFrame) and name_none_flag: obj.columns = range(name, name + 1, 1) else: obj = sample._constructor({name: obj}, copy=False) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index d3edee17366f7..cd7cb0326801f 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -943,3 +943,15 @@ def test_concat_with_moot_ignore_index_and_keys(): msg = f"Cannot set {ignore_index=} and specify keys. Either should be used." with pytest.raises(ValueError, match=msg): concat([df1, df2], keys=keys, ignore_index=ignore_index) + + +def test_concat_of_series_and_frame_with_names_for_ignore_index(): + # GH #60723 and #56257 + ser = Series([4, 5], name="c") + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + + result = concat([df, ser], ignore_index=True) + expected = DataFrame( + {"a": [0, 1, None, None], "b": [2, 3, None, None], "c": [None, None, 4, 5]} + ) + tm.assert_frame_equal(result, expected) From 252e5b238c6898b304f1d8bfff1d45cc831ceba1 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sat, 22 Feb 2025 01:42:47 -0500 Subject: [PATCH 06/13] added test cases and fixed old pr test cases --- pandas/tests/reshape/concat/test_concat.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index cd7cb0326801f..51586397dc512 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -280,7 +280,7 @@ def test_concat_mixed_objs_columns(self): tm.assert_frame_equal(result, expected) expected = DataFrame( - np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3] + np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 0, 0, 0] ) result = concat([s1, df, s2, s2, s1], axis=1) tm.assert_frame_equal(result, expected) @@ -325,7 +325,8 @@ def test_concat_mixed_objs_index(self): def test_concat_mixed_objs_index_names(self): # Test row-wise concat for mixed series/frames with distinct names - # GH2385, GH15047 + # GH2385, GH15047 () + # GH #60723 Updated the test case, as the previous ones were incorrect index = date_range("01-Jan-2013", periods=10, freq="h") arr = np.arange(10, dtype="int64") @@ -333,16 +334,11 @@ def test_concat_mixed_objs_index_names(self): s2 = Series(arr, index=index, name="bar") df = DataFrame(arr.reshape(-1, 1), index=index) - expected = DataFrame( - np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, - index=index.tolist() * 3, - columns=["foo", 0, "bar"], - ) + expected = concat([s1.to_frame(), df, s2.to_frame()]) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) - # Rename all series to 0 when ignore_index=True - expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) + expected = concat([s1.to_frame(), df, s2.to_frame()], ignore_index=True) result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected) From f98a81459aca8678f3e061545166d1f0524651fe Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sat, 22 Feb 2025 01:54:44 -0500 Subject: [PATCH 07/13] added rst and small changes in tests file --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/reshape/concat/test_concat.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7cd5759af4989..6f67275a211e7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -777,6 +777,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) +- Bug in :meth:`concat` where concatenating dataframe and series with ignore_index = True drops the series name (:issue:`60723`, :issue:`56257`) Sparse ^^^^^^ diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 51586397dc512..8574dfd9a8b14 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -326,7 +326,8 @@ def test_concat_mixed_objs_index(self): def test_concat_mixed_objs_index_names(self): # Test row-wise concat for mixed series/frames with distinct names # GH2385, GH15047 () - # GH #60723 Updated the test case, as the previous ones were incorrect + # GH #60723 & GH #56257 (Updated the test case, + # as the above GH PR ones were incorrect) index = date_range("01-Jan-2013", periods=10, freq="h") arr = np.arange(10, dtype="int64") From d9de374827fc9cfc374035bf85b58a437429e1fb Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sat, 22 Feb 2025 19:05:28 -0500 Subject: [PATCH 08/13] fixed column name issue for column wise concat --- pandas/core/reshape/concat.py | 8 ++++---- pandas/tests/reshape/concat/test_concat.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4449e6aa096ce..1d46532713e4a 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -482,13 +482,13 @@ def _sanitize_mixed_ndim( if axis == 1: # doing a row-wise concatenation so need everything # to line up - if name is None: - name_none_flag = True - name = 0 + name = 0 else: # doing a column-wise concatenation so need series # to have unique names - name = current_column + if name is None: + name_none_flag = True + name = current_column current_column += 1 obj = sample._constructor(obj, copy=False) if isinstance(obj, ABCDataFrame) and name_none_flag: diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 8574dfd9a8b14..2e52d24c1629a 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -280,7 +280,7 @@ def test_concat_mixed_objs_columns(self): tm.assert_frame_equal(result, expected) expected = DataFrame( - np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 0, 0, 0] + np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3] ) result = concat([s1, df, s2, s2, s1], axis=1) tm.assert_frame_equal(result, expected) From e19e820b66391b7d00a8676030c44942fd95f86d Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sat, 22 Feb 2025 19:32:00 -0500 Subject: [PATCH 09/13] fixed text case for concat --- pandas/tests/reshape/concat/test_concat.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 2e52d24c1629a..40bf882a37a8c 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -335,11 +335,19 @@ def test_concat_mixed_objs_index_names(self): s2 = Series(arr, index=index, name="bar") df = DataFrame(arr.reshape(-1, 1), index=index) - expected = concat([s1.to_frame(), df, s2.to_frame()]) + expected = DataFrame( + np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, + index=index.tolist() * 3, + columns=["foo", 0, "bar"], + ) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) - expected = concat([s1.to_frame(), df, s2.to_frame()], ignore_index=True) + expected = DataFrame( + np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, + index=np.arange(30, dtype=np.int64), + columns=["foo", 0, "bar"], + ) result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected) From 42c51ec8e4efc3c8a221625c92b22e2730b5fe7e Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sat, 22 Feb 2025 20:51:59 -0500 Subject: [PATCH 10/13] fix test cases issue --- pandas/tests/reshape/concat/test_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 40bf882a37a8c..f1e4f3480b201 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -325,7 +325,7 @@ def test_concat_mixed_objs_index(self): def test_concat_mixed_objs_index_names(self): # Test row-wise concat for mixed series/frames with distinct names - # GH2385, GH15047 () + # GH2385, GH15047 # GH #60723 & GH #56257 (Updated the test case, # as the above GH PR ones were incorrect) From a6ef45a74c0303307428bd56b2d0d01fa88a038b Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Sun, 23 Feb 2025 02:07:37 -0500 Subject: [PATCH 11/13] Trigger redeployment From dba9778f887b3d628d0544cfb643fb540a3ba802 Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 4 Mar 2025 01:04:36 -0500 Subject: [PATCH 12/13] fixed reviewed changes and added extra test cases --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/reshape/concat.py | 12 ++-- pandas/tests/reshape/concat/test_concat.py | 80 +++++++++++++++++++++- 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index fa7dbaa0febed..6c8fb37279246 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -779,7 +779,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) -- Bug in :meth:`concat` where concatenating dataframe and series with ignore_index = True drops the series name (:issue:`60723`, :issue:`56257`) +- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 1d46532713e4a..5efaf0dc051bd 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -477,21 +477,23 @@ def _sanitize_mixed_ndim( else: name = getattr(obj, "name", None) - name_none_flag = False + rename_columns = False if ignore_index or name is None: if axis == 1: # doing a row-wise concatenation so need everything # to line up - name = 0 + if name is None: + name = 0 + rename_columns = True else: # doing a column-wise concatenation so need series # to have unique names if name is None: - name_none_flag = True + rename_columns = True name = current_column - current_column += 1 + current_column += 1 obj = sample._constructor(obj, copy=False) - if isinstance(obj, ABCDataFrame) and name_none_flag: + if isinstance(obj, ABCDataFrame) and rename_columns: obj.columns = range(name, name + 1, 1) else: obj = sample._constructor({name: obj}, copy=False) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index f1e4f3480b201..9caf173164f8c 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -955,8 +955,86 @@ def test_concat_of_series_and_frame_with_names_for_ignore_index(): ser = Series([4, 5], name="c") df = DataFrame({"a": [0, 1], "b": [2, 3]}) + result = concat([df, ser]) + expected = DataFrame( + {"a": [0, 1, None, None], "b": [2, 3, None, None], "c": [None, None, 4, 5]}, + index=[0, 1, 0, 1], + ) + tm.assert_frame_equal(result, expected) + + ser = Series([4, 5], name="c") + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + result = concat([df, ser], ignore_index=True) expected = DataFrame( - {"a": [0, 1, None, None], "b": [2, 3, None, None], "c": [None, None, 4, 5]} + {"a": [0, 1, None, None], "b": [2, 3, None, None], "c": [None, None, 4, 5]}, + index=[0, 1, 2, 3], ) tm.assert_frame_equal(result, expected) + + ser = Series([4, 5]) + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + + result = concat([df, ser, ser], axis=1) + expected = DataFrame({"a": [0, 1], "b": [2, 3], 0: [4, 5], 1: [4, 5]}, index=[0, 1]) + tm.assert_frame_equal(result, expected) + + ser = Series([4, 5]) + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + + result = concat([df, ser, ser], axis=1, ignore_index=True) + expected = DataFrame({0: [0, 1], 1: [2, 3], 2: [4, 5], 3: [4, 5]}, index=[0, 1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "inputs, ignore_index, axis, expected", + [ + # Concatenating DataFrame and named Series without ignore_index + ( + [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5], name="c")], + False, + 0, + DataFrame( + { + "a": [0, 1, None, None], + "b": [2, 3, None, None], + "c": [None, None, 4, 5], + }, + index=[0, 1, 0, 1], + ), + ), + # Concatenating DataFrame and named Series with ignore_index + ( + [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5], name="c")], + True, + 0, + DataFrame( + { + "a": [0, 1, None, None], + "b": [2, 3, None, None], + "c": [None, None, 4, 5], + }, + index=[0, 1, 2, 3], + ), + ), + # Concatenating DataFrame and unnamed Series along columns + ( + [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5]), Series([4, 5])], + False, + 1, + DataFrame({"a": [0, 1], "b": [2, 3], 0: [4, 5], 1: [4, 5]}, index=[0, 1]), + ), + # Concatenating DataFrame and unnamed Series along columns with ignore_index + ( + [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5]), Series([4, 5])], + True, + 1, + DataFrame({0: [0, 1], 1: [2, 3], 2: [4, 5], 3: [4, 5]}, index=[0, 1]), + ), + ], +) +def test_concat_of_series_and_frame(inputs, ignore_index, axis, expected): + # GH #60723 and #56257 + result = concat(inputs, ignore_index=ignore_index, axis=axis) + tm.assert_frame_equal(result, expected) From d53dc0a3a065fa449eedae57b9429e0e51a6260a Mon Sep 17 00:00:00 2001 From: Anurag Varma Date: Tue, 4 Mar 2025 01:14:07 -0500 Subject: [PATCH 13/13] removed duplicate test case --- pandas/tests/reshape/concat/test_concat.py | 37 ---------------------- 1 file changed, 37 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 9caf173164f8c..2d0eb5d14a1d9 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -950,43 +950,6 @@ def test_concat_with_moot_ignore_index_and_keys(): concat([df1, df2], keys=keys, ignore_index=ignore_index) -def test_concat_of_series_and_frame_with_names_for_ignore_index(): - # GH #60723 and #56257 - ser = Series([4, 5], name="c") - df = DataFrame({"a": [0, 1], "b": [2, 3]}) - - result = concat([df, ser]) - expected = DataFrame( - {"a": [0, 1, None, None], "b": [2, 3, None, None], "c": [None, None, 4, 5]}, - index=[0, 1, 0, 1], - ) - tm.assert_frame_equal(result, expected) - - ser = Series([4, 5], name="c") - df = DataFrame({"a": [0, 1], "b": [2, 3]}) - - result = concat([df, ser], ignore_index=True) - expected = DataFrame( - {"a": [0, 1, None, None], "b": [2, 3, None, None], "c": [None, None, 4, 5]}, - index=[0, 1, 2, 3], - ) - tm.assert_frame_equal(result, expected) - - ser = Series([4, 5]) - df = DataFrame({"a": [0, 1], "b": [2, 3]}) - - result = concat([df, ser, ser], axis=1) - expected = DataFrame({"a": [0, 1], "b": [2, 3], 0: [4, 5], 1: [4, 5]}, index=[0, 1]) - tm.assert_frame_equal(result, expected) - - ser = Series([4, 5]) - df = DataFrame({"a": [0, 1], "b": [2, 3]}) - - result = concat([df, ser, ser], axis=1, ignore_index=True) - expected = DataFrame({0: [0, 1], 1: [2, 3], 2: [4, 5], 3: [4, 5]}, index=[0, 1]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "inputs, ignore_index, axis, expected", [