Skip to content

Commit 94ab453

Browse files
itholicSandishKumarHN
authored andcommitted
[SPARK-40827][PS][TESTS] Re-enable the DataFrame.corrwith test after fixing in future pandas
### What changes were proposed in this pull request? This PR proposes to make the manual tests for `DataFrame.corrwith` back into formal approach, if the pandas version is not 1.5.0. ### Why are the changes needed? There was a regression introduced by pandas 1.5.0 (pandas-dev/pandas#48826), and seems it's resolved now. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The fixed test should pass the CI. Closes apache#38455 from itholic/SPARK-40827. Authored-by: itholic <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent ceade83 commit 94ab453

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

python/pyspark/pandas/tests/test_dataframe.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6091,10 +6091,12 @@ def test_corrwith(self):
60916091
def _test_corrwith(self, psdf, psobj):
60926092
pdf = psdf._to_pandas()
60936093
pobj = psobj._to_pandas()
6094-
# Regression in pandas 1.5.0 when other is Series and method is "pearson" or "spearman"
6094+
# There was a regression in pandas 1.5.0
6095+
# when other is Series and method is "pearson" or "spearman", and fixed in pandas 1.5.1
6096+
# Therefore, we only test the pandas 1.5.0 in different way.
60956097
# See https://github.com/pandas-dev/pandas/issues/48826 for the reported issue,
60966098
# and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes.
6097-
if LooseVersion(pd.__version__) >= LooseVersion("1.5.0") and isinstance(pobj, pd.Series):
6099+
if LooseVersion(pd.__version__) == LooseVersion("1.5.0") and isinstance(pobj, pd.Series):
60986100
methods = ["kendall"]
60996101
else:
61006102
methods = ["pearson", "spearman", "kendall"]

python/pyspark/pandas/tests/test_ops_on_diff_frames.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1866,12 +1866,13 @@ def test_corrwith(self):
18661866
self._test_corrwith((df1 + 1), df2.B)
18671867
self._test_corrwith((df1 + 1), (df2.B + 2))
18681868

1869-
# Regression in pandas 1.5.0
1869+
# There was a regression in pandas 1.5.0, and fixed in pandas 1.5.1.
1870+
# Therefore, we only test the pandas 1.5.0 in different way.
18701871
# See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue,
18711872
# and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes.
18721873
df_bool = ps.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]})
18731874
ser_bool = ps.Series([True, True, False, True])
1874-
if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"):
1875+
if LooseVersion(pd.__version__) == LooseVersion("1.5.0"):
18751876
expected = ps.Series([0.5773502691896257, 0.5773502691896257], index=["B", "A"])
18761877
self.assert_eq(df_bool.corrwith(ser_bool), expected, almost=True)
18771878
else:
@@ -1883,10 +1884,11 @@ def test_corrwith(self):
18831884
self._test_corrwith(self.psdf3, self.psdf4)
18841885

18851886
self._test_corrwith(self.psdf1, self.psdf1.a)
1886-
# Regression in pandas 1.5.0
1887+
# There was a regression in pandas 1.5.0, and fixed in pandas 1.5.1.
1888+
# Therefore, we only test the pandas 1.5.0 in different way.
18871889
# See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue,
18881890
# and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes.
1889-
if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"):
1891+
if LooseVersion(pd.__version__) == LooseVersion("1.5.0"):
18901892
expected = ps.Series([-0.08827348295047496, 0.4413674147523748], index=["b", "a"])
18911893
self.assert_eq(self.psdf1.corrwith(self.psdf2.b), expected, almost=True)
18921894
else:

0 commit comments

Comments
 (0)