[SPARK-43710][PS][CONNECT] Support functions.date_part for Spark Connect

zhengruifeng · HyukjinKwon · commit bc20e85b0e1d · 2023-06-22T08:47:27.000+09:00
### What changes were proposed in this pull request? switch to the [newly added `date_part` function](8dc0286) ### Why are the changes needed? to support connect ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? existing UT Closes #41691 from zhengruifeng/ps_date_part. Authored-by: Ruifeng Zheng <ruifengz@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/python/pyspark/pandas/indexes/timedelta.py b/python/pyspark/pandas/indexes/timedelta.py
@@ -150,9 +150,9 @@ def seconds(self) -> Index:
 
         @no_type_check
         def get_seconds(scol):
-            hour_scol = SF.date_part("HOUR", scol)
-            minute_scol = SF.date_part("MINUTE", scol)
-            second_scol = SF.date_part("SECOND", scol)
+            hour_scol = F.date_part("HOUR", scol)
+            minute_scol = F.date_part("MINUTE", scol)
+            second_scol = F.date_part("SECOND", scol)
             return (
                 F.when(
                     hour_scol < 0,
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
@@ -17,15 +17,11 @@
 """
 Additional Spark functions used in pandas-on-Spark.
 """
-from typing import Union, no_type_check
+from typing import Union
 
 from pyspark import SparkContext
 import pyspark.sql.functions as F
-from pyspark.sql.column import (
-    Column,
-    _to_java_column,
-    _create_column_from_literal,
-)
+from pyspark.sql.column import Column
 
 # For supporting Spark Connect
 from pyspark.sql.utils import is_remote
@@ -145,27 +141,3 @@ def repeat(col: Column, n: Union[int, Column]) -> Column:
     """
     _n = F.lit(n) if isinstance(n, int) else n
     return F.call_udf("repeat", col, _n)
-
-
-def date_part(field: Union[str, Column], source: Column) -> Column:
-    """
-    Extracts a part of the date/timestamp or interval source.
-    """
-    sc = SparkContext._active_spark_context
-    field = (
-        _to_java_column(field) if isinstance(field, Column) else _create_column_from_literal(field)
-    )
-    return _call_udf(sc, "date_part", field, _to_java_column(source))
-
-
-@no_type_check
-def _call_udf(sc, name, *cols):
-    return Column(sc._jvm.functions.callUDF(name, _make_arguments(sc, *cols)))
-
-
-@no_type_check
-def _make_arguments(sc, *cols):
-    java_arr = sc._gateway.new_array(sc._jvm.Column, len(cols))
-    for i, col in enumerate(cols):
-        java_arr[i] = col
-    return java_arr