Skip to content

Commit 8371010

Browse files
itholicvpolet
authored and
vpolet
committed
[SPARK-43709][PS] Remove closed parameter from ps.date_range & enable test
### What changes were proposed in this pull request? This PR proposes to remove `closed` parameter from `ps.date_range` & enable test. See pandas-dev/pandas#40245 more detail. ### Why are the changes needed? To support pandas 2.0.0 and above. ### Does this PR introduce _any_ user-facing change? `closed` parameter will no longer available from `ps.date_range` API. ### How was this patch tested? Enabling the existing UT. Closes apache#42389 from itholic/closed_removing. Authored-by: itholic <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent cbc1181 commit 8371010

File tree

3 files changed

+2
-51
lines changed

3 files changed

+2
-51
lines changed

python/docs/source/migration_guide/pyspark_upgrade.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Upgrading from PySpark 3.5 to 4.0
3131
* In Spark 4.0, ``Series.mad`` has been removed from pandas API on Spark.
3232
* In Spark 4.0, ``na_sentinel`` parameter from ``Index.factorize`` and `Series.factorize`` has been removed from pandas API on Spark, use ``use_na_sentinel`` instead.
3333
* In Spark 4.0, ``inplace`` parameter from ``Categorical.add_categories``, ``Categorical.remove_categories``, ``Categorical.set_categories``, ``Categorical.rename_categories``, ``Categorical.reorder_categories``, ``Categorical.as_ordered``, ``Categorical.as_unordered`` have been removed from pandas API on Spark.
34+
* In Spark 4.0, ``closed`` parameter from ``ps.date_range`` has been removed from pandas API on Spark.
3435

3536

3637
Upgrading from PySpark 3.3 to 3.4

python/pyspark/pandas/namespace.py

+1-37
Original file line numberDiff line numberDiff line change
@@ -1751,7 +1751,7 @@ def pandas_to_datetime(
17511751
)
17521752

17531753

1754-
# TODO(SPARK-42621): Add `inclusive` parameter and replace `closed`.
1754+
# TODO(SPARK-42621): Add `inclusive` parameter.
17551755
# See https://github.com/pandas-dev/pandas/issues/40245
17561756
def date_range(
17571757
start: Union[str, Any] = None,
@@ -1761,7 +1761,6 @@ def date_range(
17611761
tz: Optional[Union[str, tzinfo]] = None,
17621762
normalize: bool = False,
17631763
name: Optional[str] = None,
1764-
closed: Optional[str] = None,
17651764
**kwargs: Any,
17661765
) -> DatetimeIndex:
17671766
"""
@@ -1785,12 +1784,6 @@ def date_range(
17851784
Normalize start/end dates to midnight before generating date range.
17861785
name : str, default None
17871786
Name of the resulting DatetimeIndex.
1788-
closed : {None, 'left', 'right'}, optional
1789-
Make the interval closed with respect to the given frequency to
1790-
the 'left', 'right', or both sides (None, the default).
1791-
1792-
.. deprecated:: 3.4.0
1793-
17941787
**kwargs
17951788
For compatibility. Has no effect on the result.
17961789
@@ -1874,37 +1867,9 @@ def date_range(
18741867
DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
18751868
'2019-01-31'],
18761869
dtype='datetime64[ns]', freq=None)
1877-
1878-
`closed` controls whether to include `start` and `end` that are on the
1879-
boundary. The default includes boundary points on either end.
1880-
1881-
>>> ps.date_range(
1882-
... start='2017-01-01', end='2017-01-04', closed=None
1883-
... ) # doctest: +SKIP
1884-
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
1885-
dtype='datetime64[ns]', freq=None)
1886-
1887-
Use ``closed='left'`` to exclude `end` if it falls on the boundary.
1888-
1889-
>>> ps.date_range(
1890-
... start='2017-01-01', end='2017-01-04', closed='left'
1891-
... ) # doctest: +SKIP
1892-
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq=None)
1893-
1894-
Use ``closed='right'`` to exclude `start` if it falls on the boundary.
1895-
1896-
>>> ps.date_range(
1897-
... start='2017-01-01', end='2017-01-04', closed='right'
1898-
... ) # doctest: +SKIP
1899-
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq=None)
19001870
"""
19011871
assert freq not in ["N", "ns"], "nanoseconds is not supported"
19021872
assert tz is None, "Localized DatetimeIndex is not supported"
1903-
if closed is not None:
1904-
warnings.warn(
1905-
"Argument `closed` is deprecated in 3.4.0 and will be removed in 4.0.0.",
1906-
FutureWarning,
1907-
)
19081873

19091874
return cast(
19101875
DatetimeIndex,
@@ -1917,7 +1882,6 @@ def date_range(
19171882
tz=tz,
19181883
normalize=normalize,
19191884
name=name,
1920-
closed=closed,
19211885
**kwargs,
19221886
)
19231887
),

python/pyspark/pandas/tests/test_namespace.py

-14
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,6 @@ def test_to_datetime(self):
190190
self.assert_eq(pd.to_datetime(pdf), ps.to_datetime(psdf))
191191
self.assert_eq(pd.to_datetime(dict_from_pdf), ps.to_datetime(dict_from_pdf))
192192

193-
@unittest.skipIf(
194-
LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
195-
"TODO(SPARK-43709): Enable NamespaceTests.test_date_range for pandas 2.0.0.",
196-
)
197193
def test_date_range(self):
198194
self.assert_eq(
199195
ps.date_range(start="1/1/2018", end="1/08/2018"),
@@ -225,16 +221,6 @@ def test_date_range(self):
225221
pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)),
226222
)
227223

228-
self.assert_eq(
229-
ps.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
230-
pd.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
231-
)
232-
233-
self.assert_eq(
234-
ps.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
235-
pd.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
236-
)
237-
238224
self.assertRaises(
239225
AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo")
240226
)

0 commit comments

Comments
 (0)