From 774b4aa851f21b22c4a7a6172215d36aa03db10a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 19 Mar 2024 10:37:02 -0700 Subject: [PATCH 1/2] REF/PERF: Use concat(..., ignore_index=True) when index doesn't matter --- pandas/core/groupby/generic.py | 8 +++++--- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/reshape.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3b20b854b344e..361e9e87fadb8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -574,7 +574,7 @@ def _transform_general( if results: from pandas.core.reshape.concat import concat - concatenated = concat(results) + concatenated = concat(results, ignore_index=True) result = self._set_result_index_ordered(concatenated) else: result = self.obj._constructor(dtype=np.float64) @@ -1803,7 +1803,9 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): applied.append(res) concat_index = obj.columns - concatenated = concat(applied, axis=0, verify_integrity=False) + concatenated = concat( + applied, axis=0, verify_integrity=False, ignore_index=True + ) concatenated = concatenated.reindex(concat_index, axis=1) return self._set_result_index_ordered(concatenated) @@ -2797,7 +2799,7 @@ def _wrap_transform_general_frame( # other dimension; this will preserve dtypes # GH14457 if res.index.is_(obj.index): - res_frame = concat([res] * len(group.columns), axis=1) + res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True) res_frame.columns = group.columns res_frame.index = group.index else: diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 24a070a536150..f51a833e5f906 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -243,7 +243,7 @@ def melt( not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes ): mdata[value_name] = concat( - [frame.iloc[:, i] for i in range(frame.shape[1])] + [frame.iloc[:, i] for i in range(frame.shape[1])], ignore_index=True ).values else: mdata[value_name] = frame._values.ravel("F") diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b28010c13d6dd..bb3c608d559cc 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -952,7 +952,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame: result: Series | DataFrame if len(buf) > 0 and not frame.empty: - result = concat(buf) + result = concat(buf, ignore_index=True) ratio = len(result) // len(frame) else: # input is empty From ae63a5f11b4649567ae0e9c9fda6eb3383a5ec2c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 21 Mar 2024 10:12:35 -0700 Subject: [PATCH 2/2] Add more ignore_index --- pandas/core/arrays/categorical.py | 2 +- pandas/core/methods/describe.py | 1 + pandas/core/reshape/pivot.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 60529c1c2251b..429dc9236cf45 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2622,7 +2622,7 @@ def describe(self) -> DataFrame: from pandas import Index from pandas.core.reshape.concat import concat - result = concat([counts, freqs], axis=1) + result = concat([counts, freqs], ignore_index=True, axis=1) result.columns = Index(["counts", "freqs"]) result.index.name = "categories" diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 380bf9ce55659..ef20d4c509732 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -175,6 +175,7 @@ def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: d = concat( [x.reindex(col_names) for x in ldesc], axis=1, + ignore_index=True, sort=False, ) d.columns = data.columns.copy() diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7b2fbb54f7d35..b62f550662f5d 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -835,7 +835,7 @@ def _normalize( elif normalize == "index": index_margin = index_margin / index_margin.sum() - table = table._append(index_margin) + table = table._append(index_margin, ignore_index=True) table = table.fillna(0) table.index = table_index @@ -844,7 +844,7 @@ def _normalize( index_margin = index_margin / index_margin.sum() index_margin.loc[margins_name] = 1 table = concat([table, column_margin], axis=1) - table = table._append(index_margin) + table = table._append(index_margin, ignore_index=True) table = table.fillna(0) table.index = table_index