[SPARK-43241][PS] MultiIndex.append not checking names for equality

itholic · zhengruifeng · commit d7e827e52c45 · 2023-09-05T11:45:29.000+08:00
### What changes were proposed in this pull request? This PR proposes to fix the behavior of `MultiIndex.append` to do not checking names. ### Why are the changes needed? To match the behavior with pandas according to pandas-dev/pandas#48288 ### Does this PR introduce _any_ user-facing change?  Yes, the behavior is changed to match with pandas: **Testing data** ```python >>> psmidx1 MultiIndex([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)], names=['x', 'y', 'z']) >>> psmidx2 MultiIndex([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)], names=['p', 'q', 'r']) ``` **Before** ```python >>> psmidx1.append(psmidx2) MultiIndex([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3), ('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)], names=['x', 'y', 'z']) ``` **After** ```python >>> psmidx1.append(psmidx2) MultiIndex([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3), ('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)], ) ``` ### How was this patch tested?  Fix the existing UTs. ### Was this patch authored or co-authored using generative AI tooling?  No. Closes #42787 from itholic/SPARK-43241. Authored-by: Haejoon Lee <haejoon.lee@databricks.com> Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py
@@ -1917,18 +1917,12 @@ def append(self, other: "Index") -> "Index":
         sdf_other = other._internal.spark_frame.select(other._internal.index_spark_columns)
         sdf_appended = sdf_self.union(sdf_other)
 
-        # names should be kept when MultiIndex, but Index wouldn't keep its name.
-        if isinstance(self, MultiIndex):
-            index_names = self._internal.index_names
-        else:
-            index_names = None
-
         internal = InternalFrame(
             spark_frame=sdf_appended,
             index_spark_columns=[
                 scol_for(sdf_appended, col) for col in self._internal.index_spark_column_names
             ],
-            index_names=index_names,
+            index_names=None,
             index_fields=index_fields,
         )
 
diff --git a/python/pyspark/pandas/tests/indexes/test_base_slow.py b/python/pyspark/pandas/tests/indexes/test_base_slow.py
@@ -107,29 +107,9 @@ def test_append(self):
         psmidx1 = ps.from_pandas(pmidx1)
         psmidx2 = ps.from_pandas(pmidx2)
 
-        # TODO(SPARK-43241): MultiIndex.append not checking names for equality.
-        # Also refer to https://github.com/pandas-dev/pandas/pull/48288.
-        if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
-            self.assert_eq(
-                pmidx1.append(pmidx2), psmidx1.append(psmidx2).rename([None, None, None])
-            )
-        else:
-            self.assert_eq(pmidx1.append(pmidx2), psmidx1.append(psmidx2))
-
-        if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
-            self.assert_eq(
-                pmidx2.append(pmidx1), psmidx2.append(psmidx1).rename([None, None, None])
-            )
-        else:
-            self.assert_eq(pmidx2.append(pmidx1), psmidx2.append(psmidx1))
-
-        if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
-            self.assert_eq(
-                pmidx1.append(pmidx2).names,
-                psmidx1.append(psmidx2).rename([None, None, None]).names,
-            )
-        else:
-            self.assert_eq(pmidx1.append(pmidx2).names, psmidx1.append(psmidx2).names)
+        self.assert_eq(pmidx1.append(pmidx2), psmidx1.append(psmidx2))
+        self.assert_eq(pmidx2.append(pmidx1), psmidx2.append(psmidx1))
+        self.assert_eq(pmidx1.append(pmidx2).names, psmidx1.append(psmidx2).names)
 
         # Index & MultiIndex is currently not supported
         expected_error_message = r"append\(\) between Index & MultiIndex is currently not supported"