forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_array_to_datetime.py
296 lines (236 loc) · 10 KB
/
test_array_to_datetime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
from datetime import (
date,
datetime,
timedelta,
timezone,
)
from dateutil.tz.tz import tzoffset
import numpy as np
import pytest
from pandas._libs import (
NaT,
iNaT,
tslib,
)
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas import Timestamp
import pandas._testing as tm
creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value
class TestArrayToDatetimeResolutionInference:
# TODO: tests that include tzs, ints
def test_infer_homogeoneous_datetimes(self):
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
arr = np.array([dt, dt, dt], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([dt, dt, dt], dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)
def test_infer_homogeoneous_date_objects(self):
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
dt2 = dt.date()
arr = np.array([None, dt2, dt2, dt2], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[s]")
tm.assert_numpy_array_equal(result, expected)
def test_infer_homogeoneous_dt64(self):
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
dt64 = np.datetime64(dt, "ms")
arr = np.array([None, dt64, dt64, dt64], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([np.datetime64("NaT"), dt64, dt64, dt64], dtype="M8[ms]")
tm.assert_numpy_array_equal(result, expected)
def test_infer_homogeoneous_timestamps(self):
dt = datetime(2023, 10, 27, 18, 3, 5, 678000)
ts = Timestamp(dt).as_unit("ns")
arr = np.array([None, ts, ts, ts], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([np.datetime64("NaT")] + [ts.asm8] * 3, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
def test_infer_homogeoneous_datetimes_strings(self):
item = "2023-10-27 18:03:05.678000"
arr = np.array([None, item, item, item], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([np.datetime64("NaT"), item, item, item], dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)
def test_infer_heterogeneous(self):
dtstr = "2023-10-27 18:03:05.678000"
arr = np.array([dtstr, dtstr[:-3], dtstr[:-7], None], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array(arr, dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)
result, tz = tslib.array_to_datetime(arr[::-1], creso=creso_infer)
assert tz is None
tm.assert_numpy_array_equal(result, expected[::-1])
class TestArrayToDatetimeWithTZResolutionInference:
def test_array_to_datetime_with_tz_resolution(self):
tz = tzoffset("custom", 3600)
vals = np.array(["2016-01-01 02:03:04.567", NaT], dtype=object)
res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer)
assert res.dtype == "M8[ms]"
vals2 = np.array([datetime(2016, 1, 1, 2, 3, 4), NaT], dtype=object)
res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer)
assert res2.dtype == "M8[us]"
vals3 = np.array([NaT, np.datetime64(12345, "s")], dtype=object)
res3 = tslib.array_to_datetime_with_tz(vals3, tz, False, False, creso_infer)
assert res3.dtype == "M8[s]"
def test_array_to_datetime_with_tz_resolution_all_nat(self):
tz = tzoffset("custom", 3600)
vals = np.array(["NaT"], dtype=object)
res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer)
assert res.dtype == "M8[ns]"
vals2 = np.array([NaT, NaT], dtype=object)
res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer)
assert res2.dtype == "M8[ns]"
@pytest.mark.parametrize(
"data,expected",
[
(
["01-01-2013", "01-02-2013"],
[
"2013-01-01T00:00:00.000000000",
"2013-01-02T00:00:00.000000000",
],
),
(
["Mon Sep 16 2013", "Tue Sep 17 2013"],
[
"2013-09-16T00:00:00.000000000",
"2013-09-17T00:00:00.000000000",
],
),
],
)
def test_parsing_valid_dates(data, expected):
arr = np.array(data, dtype=object)
result, _ = tslib.array_to_datetime(arr)
expected = np.array(expected, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"dt_string, expected_tz",
[
["01-01-2013 08:00:00+08:00", 480],
["2013-01-01T08:00:00.000000000+0800", 480],
["2012-12-31T16:00:00.000000000-0800", -480],
["12-31-2012 23:00:00-01:00", -60],
],
)
def test_parsing_timezone_offsets(dt_string, expected_tz):
# All of these datetime strings with offsets are equivalent
# to the same datetime after the timezone offset is added.
arr = np.array(["01-01-2013 00:00:00"], dtype=object)
expected, _ = tslib.array_to_datetime(arr)
arr = np.array([dt_string], dtype=object)
result, result_tz = tslib.array_to_datetime(arr)
tm.assert_numpy_array_equal(result, expected)
assert result_tz == timezone(timedelta(minutes=expected_tz))
def test_parsing_non_iso_timezone_offset():
dt_string = "01-01-2013T00:00:00.000000000+0000"
arr = np.array([dt_string], dtype=object)
with tm.assert_produces_warning(None):
# GH#50949 should not get tzlocal-deprecation warning here
result, result_tz = tslib.array_to_datetime(arr)
expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")])
tm.assert_numpy_array_equal(result, expected)
assert result_tz is timezone.utc
def test_parsing_different_timezone_offsets():
# see gh-17697
data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"]
data = np.array(data, dtype=object)
msg = "parsing datetimes with mixed time zones will raise an error"
with tm.assert_produces_warning(FutureWarning, match=msg):
result, result_tz = tslib.array_to_datetime(data)
expected = np.array(
[
datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)),
datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)),
],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)
assert result_tz is None
@pytest.mark.parametrize(
"data", [["-352.737091", "183.575577"], ["1", "2", "3", "4", "5"]]
)
def test_number_looking_strings_not_into_datetime(data):
# see gh-4601
#
# These strings don't look like datetimes, so
# they shouldn't be attempted to be converted.
arr = np.array(data, dtype=object)
result, _ = tslib.array_to_datetime(arr, errors="ignore")
tm.assert_numpy_array_equal(result, arr)
@pytest.mark.parametrize(
"invalid_date",
[
date(1000, 1, 1),
datetime(1000, 1, 1),
"1000-01-01",
"Jan 1, 1000",
np.datetime64("1000-01-01"),
],
)
@pytest.mark.parametrize("errors", ["coerce", "raise"])
def test_coerce_outside_ns_bounds(invalid_date, errors):
arr = np.array([invalid_date], dtype="object")
kwargs = {"values": arr, "errors": errors}
if errors == "raise":
msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"
with pytest.raises(OutOfBoundsDatetime, match=msg):
tslib.array_to_datetime(**kwargs)
else: # coerce.
result, _ = tslib.array_to_datetime(**kwargs)
expected = np.array([iNaT], dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
def test_coerce_outside_ns_bounds_one_valid():
arr = np.array(["1/1/1000", "1/1/2000"], dtype=object)
result, _ = tslib.array_to_datetime(arr, errors="coerce")
expected = [iNaT, "2000-01-01T00:00:00.000000000"]
expected = np.array(expected, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("errors", ["ignore", "coerce"])
def test_coerce_of_invalid_datetimes(errors):
arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object)
kwargs = {"values": arr, "errors": errors}
if errors == "ignore":
# Without coercing, the presence of any invalid
# dates prevents any values from being converted.
result, _ = tslib.array_to_datetime(**kwargs)
tm.assert_numpy_array_equal(result, arr)
else: # coerce.
# With coercing, the invalid dates becomes iNaT
result, _ = tslib.array_to_datetime(arr, errors="coerce")
expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT]
tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]"))
def test_to_datetime_barely_out_of_bounds():
# see gh-19382, gh-19529
#
# Close enough to bounds that dropping nanos
# would result in an in-bounds datetime.
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16, at position 0$"
with pytest.raises(tslib.OutOfBoundsDatetime, match=msg):
tslib.array_to_datetime(arr)
class SubDatetime(datetime):
pass
@pytest.mark.parametrize(
"data,expected",
[
([SubDatetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
([datetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
([Timestamp(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
],
)
def test_datetime_subclass(data, expected):
# GH 25851
# ensure that subclassed datetime works with
# array_to_datetime
arr = np.array(data, dtype=object)
result, _ = tslib.array_to_datetime(arr)
expected = np.array(expected, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)