Skip to content

Commit ec37e6e

Browse files
jbrockmendelproost
authored andcommitted
DEPR: box arg in to_datetime (pandas-dev#30111)
1 parent d5528f9 commit ec37e6e

File tree

3 files changed

+44
-134
lines changed

3 files changed

+44
-134
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
538538
- Removed the previously deprecated :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`)
539539
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
540540
- Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
541+
- :func:`to_datetime` no longer accepts "box" argument, always returns :class:`DatetimeIndex` or :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`)
541542
- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
542543
- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
543544
- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)

pandas/core/tools/datetimes.py

+43-98
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
parse_time_string,
1515
)
1616
from pandas._libs.tslibs.strptime import array_strptime
17-
from pandas.util._decorators import deprecate_kwarg
1817

1918
from pandas.core.dtypes.common import (
2019
ensure_object,
@@ -45,12 +44,6 @@
4544
# types used in annotations
4645

4746
ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries]
48-
49-
# ---------------------------------------------------------------------
50-
51-
# ---------------------------------------------------------------------
52-
# types used in annotations
53-
5447
Scalar = Union[int, float, str]
5548
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
5649
DatetimeScalarOrArrayConvertible = Union[
@@ -154,7 +147,7 @@ def _maybe_cache(arg, format, cache, convert_listlike):
154147

155148
unique_dates = unique(arg)
156149
if len(unique_dates) < len(arg):
157-
cache_dates = convert_listlike(unique_dates, True, format)
150+
cache_dates = convert_listlike(unique_dates, format)
158151
cache_array = Series(cache_dates, index=unique_dates)
159152
return cache_array
160153

@@ -169,7 +162,7 @@ def _box_as_indexlike(
169162
Parameters
170163
----------
171164
dt_array: 1-d array
172-
array of datetimes to be boxed
165+
Array of datetimes to be wrapped in an Index.
173166
tz : object
174167
None or 'utc'
175168
name : string, default None
@@ -192,37 +185,30 @@ def _box_as_indexlike(
192185
def _convert_and_box_cache(
193186
arg: DatetimeScalarOrArrayConvertible,
194187
cache_array: ABCSeries,
195-
box: bool,
196188
name: Optional[str] = None,
197-
) -> Union[ABCIndex, np.ndarray]:
189+
) -> ABCIndexClass:
198190
"""
199-
Convert array of dates with a cache and box the result
191+
Convert array of dates with a cache and wrap the result in an Index.
200192
201193
Parameters
202194
----------
203195
arg : integer, float, string, datetime, list, tuple, 1-d array, Series
204196
cache_array : Series
205197
Cache of converted, unique dates
206-
box : boolean
207-
True boxes result as an Index-like, False returns an ndarray
208198
name : string, default None
209199
Name for a DatetimeIndex
210200
211201
Returns
212202
-------
213-
result : datetime of converted dates
214-
- Index-like if box=True
215-
- ndarray if box=False
203+
result : Index-like of converted dates
216204
"""
217205
from pandas import Series
218206

219207
result = Series(arg).map(cache_array)
220-
if box:
221-
return _box_as_indexlike(result, utc=None, name=name)
222-
return result.values
208+
return _box_as_indexlike(result, utc=None, name=name)
223209

224210

225-
def _return_parsed_timezone_results(result, timezones, box, tz, name):
211+
def _return_parsed_timezone_results(result, timezones, tz, name):
226212
"""
227213
Return results from array_strptime if a %z or %Z directive was passed.
228214
@@ -232,20 +218,14 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
232218
int64 date representations of the dates
233219
timezones : ndarray
234220
pytz timezone objects
235-
box : boolean
236-
True boxes result as an Index-like, False returns an ndarray
237221
tz : object
238222
None or pytz timezone object
239223
name : string, default None
240224
Name for a DatetimeIndex
241225
242226
Returns
243227
-------
244-
tz_result : ndarray of parsed dates with timezone
245-
Returns:
246-
247-
- Index-like if box=True
248-
- ndarray of Timestamps if box=False
228+
tz_result : Index-like of parsed dates with timezone
249229
"""
250230
if tz is not None:
251231
raise ValueError(
@@ -256,16 +236,13 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
256236
tz_results = np.array(
257237
[Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]
258238
)
259-
if box:
260-
from pandas import Index
239+
from pandas import Index
261240

262-
return Index(tz_results, name=name)
263-
return tz_results
241+
return Index(tz_results, name=name)
264242

265243

266244
def _convert_listlike_datetimes(
267245
arg,
268-
box,
269246
format,
270247
name=None,
271248
tz=None,
@@ -284,8 +261,6 @@ def _convert_listlike_datetimes(
284261
----------
285262
arg : list, tuple, ndarray, Series, Index
286263
date to be parced
287-
box : boolean
288-
True boxes result as an Index-like, False returns an ndarray
289264
name : object
290265
None or string for the Index name
291266
tz : object
@@ -305,11 +280,7 @@ def _convert_listlike_datetimes(
305280
306281
Returns
307282
-------
308-
ndarray of parsed dates
309-
Returns:
310-
311-
- Index-like if box=True
312-
- ndarray of Timestamps if box=False
283+
Index-like of parsed dates
313284
"""
314285
from pandas import DatetimeIndex
315286
from pandas.core.arrays import DatetimeArray
@@ -330,7 +301,7 @@ def _convert_listlike_datetimes(
330301
return arg
331302

332303
elif is_datetime64_ns_dtype(arg):
333-
if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
304+
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
334305
try:
335306
return DatetimeIndex(arg, tz=tz, name=name)
336307
except ValueError:
@@ -346,26 +317,25 @@ def _convert_listlike_datetimes(
346317
raise ValueError("cannot specify both format and unit")
347318
arg = getattr(arg, "values", arg)
348319
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
349-
if box:
350-
if errors == "ignore":
351-
from pandas import Index
320+
if errors == "ignore":
321+
from pandas import Index
352322

353-
result = Index(result, name=name)
323+
result = Index(result, name=name)
324+
else:
325+
result = DatetimeIndex(result, name=name)
326+
# GH 23758: We may still need to localize the result with tz
327+
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
328+
# result will be naive but in UTC
329+
try:
330+
result = result.tz_localize("UTC").tz_convert(tz_parsed)
331+
except AttributeError:
332+
# Regular Index from 'ignore' path
333+
return result
334+
if tz is not None:
335+
if result.tz is None:
336+
result = result.tz_localize(tz)
354337
else:
355-
result = DatetimeIndex(result, name=name)
356-
# GH 23758: We may still need to localize the result with tz
357-
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
358-
# result will be naive but in UTC
359-
try:
360-
result = result.tz_localize("UTC").tz_convert(tz_parsed)
361-
except AttributeError:
362-
# Regular Index from 'ignore' path
363-
return result
364-
if tz is not None:
365-
if result.tz is None:
366-
result = result.tz_localize(tz)
367-
else:
368-
result = result.tz_convert(tz)
338+
result = result.tz_convert(tz)
369339
return result
370340
elif getattr(arg, "ndim", 1) > 1:
371341
raise TypeError(
@@ -416,7 +386,7 @@ def _convert_listlike_datetimes(
416386
)
417387
if "%Z" in format or "%z" in format:
418388
return _return_parsed_timezone_results(
419-
result, timezones, box, tz, name
389+
result, timezones, tz, name
420390
)
421391
except tslibs.OutOfBoundsDatetime:
422392
if errors == "raise":
@@ -463,20 +433,12 @@ def _convert_listlike_datetimes(
463433
)
464434

465435
if tz_parsed is not None:
466-
if box:
467-
# We can take a shortcut since the datetime64 numpy array
468-
# is in UTC
469-
return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
470-
else:
471-
# Convert the datetime64 numpy array to an numpy array
472-
# of datetime objects
473-
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result]
474-
return np.array(result, dtype=object)
436+
# We can take a shortcut since the datetime64 numpy array
437+
# is in UTC
438+
return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
475439

476-
if box:
477-
utc = tz == "utc"
478-
return _box_as_indexlike(result, utc=utc, name=name)
479-
return result
440+
utc = tz == "utc"
441+
return _box_as_indexlike(result, utc=utc, name=name)
480442

481443

482444
def _adjust_to_origin(arg, origin, unit):
@@ -558,14 +520,12 @@ def _adjust_to_origin(arg, origin, unit):
558520
return arg
559521

560522

561-
@deprecate_kwarg(old_arg_name="box", new_arg_name=None)
562523
def to_datetime(
563524
arg,
564525
errors="raise",
565526
dayfirst=False,
566527
yearfirst=False,
567528
utc=None,
568-
box=True,
569529
format=None,
570530
exact=True,
571531
unit=None,
@@ -603,15 +563,6 @@ def to_datetime(
603563
utc : bool, default None
604564
Return UTC DatetimeIndex if True (converting any tz-aware
605565
datetime.datetime objects as well).
606-
box : bool, default True
607-
- If True returns a DatetimeIndex or Index-like object
608-
- If False returns ndarray of values.
609-
610-
.. deprecated:: 0.25.0
611-
Use :meth:`Series.to_numpy` or :meth:`Timestamp.to_datetime64`
612-
instead to get an ndarray of values or numpy.datetime64,
613-
respectively.
614-
615566
format : str, default None
616567
The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
617568
all the way up to nanoseconds.
@@ -764,25 +715,25 @@ def to_datetime(
764715
if not cache_array.empty:
765716
result = arg.map(cache_array)
766717
else:
767-
values = convert_listlike(arg._values, True, format)
718+
values = convert_listlike(arg._values, format)
768719
result = arg._constructor(values, index=arg.index, name=arg.name)
769720
elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
770-
result = _assemble_from_unit_mappings(arg, errors, box, tz)
721+
result = _assemble_from_unit_mappings(arg, errors, tz)
771722
elif isinstance(arg, ABCIndexClass):
772723
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
773724
if not cache_array.empty:
774-
result = _convert_and_box_cache(arg, cache_array, box, name=arg.name)
725+
result = _convert_and_box_cache(arg, cache_array, name=arg.name)
775726
else:
776727
convert_listlike = partial(convert_listlike, name=arg.name)
777-
result = convert_listlike(arg, box, format)
728+
result = convert_listlike(arg, format)
778729
elif is_list_like(arg):
779730
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
780731
if not cache_array.empty:
781-
result = _convert_and_box_cache(arg, cache_array, box)
732+
result = _convert_and_box_cache(arg, cache_array)
782733
else:
783-
result = convert_listlike(arg, box, format)
734+
result = convert_listlike(arg, format)
784735
else:
785-
result = convert_listlike(np.array([arg]), box, format)[0]
736+
result = convert_listlike(np.array([arg]), format)[0]
786737

787738
return result
788739

@@ -813,7 +764,7 @@ def to_datetime(
813764
}
814765

815766

816-
def _assemble_from_unit_mappings(arg, errors, box, tz):
767+
def _assemble_from_unit_mappings(arg, errors, tz):
817768
"""
818769
assemble the unit specified fields from the arg (DataFrame)
819770
Return a Series for actual parsing
@@ -826,10 +777,6 @@ def _assemble_from_unit_mappings(arg, errors, box, tz):
826777
- If 'raise', then invalid parsing will raise an exception
827778
- If 'coerce', then invalid parsing will be set as NaT
828779
- If 'ignore', then invalid parsing will return the input
829-
box : boolean
830-
831-
- If True, return a DatetimeIndex
832-
- If False, return an array
833780
tz : None or 'utc'
834781
835782
Returns
@@ -904,8 +851,6 @@ def coerce(values):
904851
"cannot assemble the datetimes [{value}]: "
905852
"{error}".format(value=value, error=e)
906853
)
907-
if not box:
908-
return values.values
909854
return values
910855

911856

pandas/tests/indexes/datetimes/test_tools.py

-36
Original file line numberDiff line numberDiff line change
@@ -921,22 +921,6 @@ def test_iso_8601_strings_with_same_offset(self):
921921
result = DatetimeIndex([ts_str] * 2)
922922
tm.assert_index_equal(result, expected)
923923

924-
def test_iso_8601_strings_same_offset_no_box(self):
925-
# GH 22446
926-
data = ["2018-01-04 09:01:00+09:00", "2018-01-04 09:02:00+09:00"]
927-
928-
with tm.assert_produces_warning(FutureWarning):
929-
result = pd.to_datetime(data, box=False)
930-
931-
expected = np.array(
932-
[
933-
datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)),
934-
datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540)),
935-
],
936-
dtype=object,
937-
)
938-
tm.assert_numpy_array_equal(result, expected)
939-
940924
def test_iso_8601_strings_with_different_offsets(self):
941925
# GH 17697, 11736
942926
ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
@@ -1024,16 +1008,6 @@ def test_timestamp_utc_true(self, ts, expected):
10241008
result = to_datetime(ts, utc=True)
10251009
assert result == expected
10261010

1027-
def test_to_datetime_box_deprecated(self):
1028-
expected = np.datetime64("2018-09-09")
1029-
1030-
# Deprecated - see GH24416
1031-
with tm.assert_produces_warning(FutureWarning):
1032-
pd.to_datetime(expected, box=False)
1033-
1034-
result = pd.to_datetime(expected).to_datetime64()
1035-
assert result == expected
1036-
10371011
@pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"])
10381012
def test_to_datetime_with_format_out_of_bounds(self, dt_str):
10391013
# GH 9107
@@ -1345,16 +1319,6 @@ def test_dataframe_dtypes(self, cache):
13451319
with pytest.raises(ValueError):
13461320
to_datetime(df, cache=cache)
13471321

1348-
def test_dataframe_box_false(self):
1349-
# GH 23760
1350-
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
1351-
1352-
with tm.assert_produces_warning(FutureWarning):
1353-
result = pd.to_datetime(df, box=False)
1354-
1355-
expected = np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]")
1356-
tm.assert_numpy_array_equal(result, expected)
1357-
13581322
def test_dataframe_utc_true(self):
13591323
# GH 23760
13601324
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})

0 commit comments

Comments
 (0)