Skip to content

Commit bc20e85

Browse files
zhengruifengHyukjinKwon
authored andcommitted
[SPARK-43710][PS][CONNECT] Support functions.date_part for Spark Connect
### What changes were proposed in this pull request? switch to the [newly added `date_part` function](8dc0286) ### Why are the changes needed? to support connect ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? existing UT Closes #41691 from zhengruifeng/ps_date_part. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent 10751dc commit bc20e85

File tree

2 files changed

+5
-33
lines changed

2 files changed

+5
-33
lines changed

python/pyspark/pandas/indexes/timedelta.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ def seconds(self) -> Index:
150150

151151
@no_type_check
152152
def get_seconds(scol):
153-
hour_scol = SF.date_part("HOUR", scol)
154-
minute_scol = SF.date_part("MINUTE", scol)
155-
second_scol = SF.date_part("SECOND", scol)
153+
hour_scol = F.date_part("HOUR", scol)
154+
minute_scol = F.date_part("MINUTE", scol)
155+
second_scol = F.date_part("SECOND", scol)
156156
return (
157157
F.when(
158158
hour_scol < 0,

python/pyspark/pandas/spark/functions.py

+2-30
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,11 @@
1717
"""
1818
Additional Spark functions used in pandas-on-Spark.
1919
"""
20-
from typing import Union, no_type_check
20+
from typing import Union
2121

2222
from pyspark import SparkContext
2323
import pyspark.sql.functions as F
24-
from pyspark.sql.column import (
25-
Column,
26-
_to_java_column,
27-
_create_column_from_literal,
28-
)
24+
from pyspark.sql.column import Column
2925

3026
# For supporting Spark Connect
3127
from pyspark.sql.utils import is_remote
@@ -145,27 +141,3 @@ def repeat(col: Column, n: Union[int, Column]) -> Column:
145141
"""
146142
_n = F.lit(n) if isinstance(n, int) else n
147143
return F.call_udf("repeat", col, _n)
148-
149-
150-
def date_part(field: Union[str, Column], source: Column) -> Column:
151-
"""
152-
Extracts a part of the date/timestamp or interval source.
153-
"""
154-
sc = SparkContext._active_spark_context
155-
field = (
156-
_to_java_column(field) if isinstance(field, Column) else _create_column_from_literal(field)
157-
)
158-
return _call_udf(sc, "date_part", field, _to_java_column(source))
159-
160-
161-
@no_type_check
162-
def _call_udf(sc, name, *cols):
163-
return Column(sc._jvm.functions.callUDF(name, _make_arguments(sc, *cols)))
164-
165-
166-
@no_type_check
167-
def _make_arguments(sc, *cols):
168-
java_arr = sc._gateway.new_array(sc._jvm.Column, len(cols))
169-
for i, col in enumerate(cols):
170-
java_arr[i] = col
171-
return java_arr

0 commit comments

Comments
 (0)