forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_to_datetime.py
2445 lines (2107 loc) · 90 KB
/
test_to_datetime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
""" test to_datetime """
import calendar
from collections import deque
from datetime import datetime, timedelta
import locale
from dateutil.parser import parse
from dateutil.tz.tz import tzoffset
import numpy as np
import pytest
import pytz
from pandas._libs import tslib
from pandas._libs.tslibs import iNaT, parsing
from pandas.errors import OutOfBoundsDatetime
import pandas.util._test_decorators as td
from pandas.core.dtypes.common import is_datetime64_ns_dtype
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
NaT,
Series,
Timestamp,
date_range,
isna,
to_datetime,
)
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
from pandas.core.tools import datetimes as tools
class TestTimeConversionFormats:
@pytest.mark.parametrize("readonly", [True, False])
def test_to_datetime_readonly(self, readonly):
# GH#34857
arr = np.array([], dtype=object)
if readonly:
arr.setflags(write=False)
result = to_datetime(arr)
expected = to_datetime([])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_format(self, cache):
values = ["1/1/2000", "1/2/2000", "1/3/2000"]
results1 = [Timestamp("20000101"), Timestamp("20000201"), Timestamp("20000301")]
results2 = [Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103")]
for vals, expecteds in [
(values, (Index(results1), Index(results2))),
(Series(values), (Series(results1), Series(results2))),
(values[0], (results1[0], results2[0])),
(values[1], (results1[1], results2[1])),
(values[2], (results1[2], results2[2])),
]:
for i, fmt in enumerate(["%d/%m/%Y", "%m/%d/%Y"]):
result = to_datetime(vals, format=fmt, cache=cache)
expected = expecteds[i]
if isinstance(expected, Series):
tm.assert_series_equal(result, Series(expected))
elif isinstance(expected, Timestamp):
assert result == expected
else:
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_format_YYYYMMDD(self, cache):
s = Series([19801222, 19801222] + [19810105] * 5)
expected = Series([Timestamp(x) for x in s.apply(str)])
result = to_datetime(s, format="%Y%m%d", cache=cache)
tm.assert_series_equal(result, expected)
result = to_datetime(s.apply(str), format="%Y%m%d", cache=cache)
tm.assert_series_equal(result, expected)
# with NaT
expected = Series(
[Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5
)
expected[2] = np.nan
s[2] = np.nan
result = to_datetime(s, format="%Y%m%d", cache=cache)
tm.assert_series_equal(result, expected)
# string with NaT
s = s.apply(str)
s[2] = "nat"
result = to_datetime(s, format="%Y%m%d", cache=cache)
tm.assert_series_equal(result, expected)
# coercion
# GH 7930
s = Series([20121231, 20141231, 99991231])
result = pd.to_datetime(s, format="%Y%m%d", errors="ignore", cache=cache)
expected = Series(
[datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)],
dtype=object,
)
tm.assert_series_equal(result, expected)
result = pd.to_datetime(s, format="%Y%m%d", errors="coerce", cache=cache)
expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input_s",
[
# Null values with Strings
["19801222", "20010112", None],
["19801222", "20010112", np.nan],
["19801222", "20010112", pd.NaT],
["19801222", "20010112", "NaT"],
# Null values with Integers
[19801222, 20010112, None],
[19801222, 20010112, np.nan],
[19801222, 20010112, pd.NaT],
[19801222, 20010112, "NaT"],
],
)
def test_to_datetime_format_YYYYMMDD_with_none(self, input_s):
# GH 30011
# format='%Y%m%d'
# with None
expected = Series([Timestamp("19801222"), Timestamp("20010112"), pd.NaT])
result = Series(pd.to_datetime(input_s, format="%Y%m%d"))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input_s, expected",
[
# NaN before strings with invalid date values
[
Series(["19801222", np.nan, "20010012", "10019999"]),
Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
],
# NaN after strings with invalid date values
[
Series(["19801222", "20010012", "10019999", np.nan]),
Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
],
# NaN before integers with invalid date values
[
Series([20190813, np.nan, 20010012, 20019999]),
Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
],
# NaN after integers with invalid date values
[
Series([20190813, 20010012, np.nan, 20019999]),
Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
],
],
)
def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
# GH 25512
# format='%Y%m%d', errors='coerce'
result = pd.to_datetime(input_s, format="%Y%m%d", errors="coerce")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_format_integer(self, cache):
# GH 10178
s = Series([2000, 2001, 2002])
expected = Series([Timestamp(x) for x in s.apply(str)])
result = to_datetime(s, format="%Y", cache=cache)
tm.assert_series_equal(result, expected)
s = Series([200001, 200105, 200206])
expected = Series([Timestamp(x[:4] + "-" + x[4:]) for x in s.apply(str)])
result = to_datetime(s, format="%Y%m", cache=cache)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"int_date, expected",
[
# valid date, length == 8
[20121030, datetime(2012, 10, 30)],
# short valid date, length == 6
[199934, datetime(1999, 3, 4)],
# long integer date partially parsed to datetime(2012,1,1), length > 8
[2012010101, 2012010101],
# invalid date partially parsed to datetime(2012,9,9), length == 8
[20129930, 20129930],
# short integer date partially parsed to datetime(2012,9,9), length < 8
[2012993, 2012993],
# short invalid date, length == 4
[2121, 2121],
],
)
def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected):
# GH 26583
result = to_datetime(int_date, format="%Y%m%d", errors="ignore")
assert result == expected
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_format_microsecond(self, cache):
# these are locale dependent
lang, _ = locale.getlocale()
month_abbr = calendar.month_abbr[4]
val = f"01-{month_abbr}-2011 00:00:01.978"
format = "%d-%b-%Y %H:%M:%S.%f"
result = to_datetime(val, format=format, cache=cache)
exp = datetime.strptime(val, format)
assert result == exp
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_format_time(self, cache):
data = [
["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")],
["01/10/2010 05:43", "%m/%d/%Y %I:%M", Timestamp("2010-01-10 05:43")],
[
"01/10/2010 13:56:01",
"%m/%d/%Y %H:%M:%S",
Timestamp("2010-01-10 13:56:01"),
] # ,
# ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p',
# Timestamp('2010-01-10 20:14')],
# ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p',
# Timestamp('2010-01-10 07:40')],
# ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p',
# Timestamp('2010-01-10 09:12:56')]
]
for s, format, dt in data:
assert to_datetime(s, format=format, cache=cache) == dt
@td.skip_if_has_locale
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_with_non_exact(self, cache):
# GH 10834
# 8904
# exact kw
s = Series(
["19MAY11", "foobar19MAY11", "19MAY11:00:00:00", "19MAY11 00:00:00Z"]
)
result = to_datetime(s, format="%d%b%y", exact=False, cache=cache)
expected = to_datetime(
s.str.extract(r"(\d+\w+\d+)", expand=False), format="%d%b%y", cache=cache
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_parse_nanoseconds_with_formula(self, cache):
# GH8989
# truncating the nanoseconds when a format was provided
for v in [
"2012-01-01 09:00:00.000000001",
"2012-01-01 09:00:00.000001",
"2012-01-01 09:00:00.001",
"2012-01-01 09:00:00.001000",
"2012-01-01 09:00:00.001000000",
]:
expected = pd.to_datetime(v, cache=cache)
result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f", cache=cache)
assert result == expected
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_format_weeks(self, cache):
data = [
["2009324", "%Y%W%w", Timestamp("2009-08-13")],
["2013020", "%Y%U%w", Timestamp("2013-01-13")],
]
for s, format, dt in data:
assert to_datetime(s, format=format, cache=cache) == dt
@pytest.mark.parametrize(
"fmt,dates,expected_dates",
[
[
"%Y-%m-%d %H:%M:%S %Z",
["2010-01-01 12:00:00 UTC"] * 2,
[Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2,
],
[
"%Y-%m-%d %H:%M:%S %Z",
[
"2010-01-01 12:00:00 UTC",
"2010-01-01 12:00:00 GMT",
"2010-01-01 12:00:00 US/Pacific",
],
[
Timestamp("2010-01-01 12:00:00", tz="UTC"),
Timestamp("2010-01-01 12:00:00", tz="GMT"),
Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
],
],
[
"%Y-%m-%d %H:%M:%S%z",
["2010-01-01 12:00:00+0100"] * 2,
[Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2,
],
[
"%Y-%m-%d %H:%M:%S %z",
["2010-01-01 12:00:00 +0100"] * 2,
[Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2,
],
[
"%Y-%m-%d %H:%M:%S %z",
["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
[
Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60)),
Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(-60)),
],
],
[
"%Y-%m-%d %H:%M:%S %z",
["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"],
[
Timestamp(
"2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)
), # pytz coerces to UTC
Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)),
],
],
],
)
def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates):
# GH 13486
result = pd.to_datetime(dates, format=fmt)
expected = Index(expected_dates)
tm.assert_equal(result, expected)
def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self):
# GH 32792
dates = [
"2010-01-01 12:00:00 +0100",
"2010-01-01 12:00:00 -0100",
"2010-01-01 12:00:00 +0300",
"2010-01-01 12:00:00 +0400",
]
expected_dates = [
"2010-01-01 11:00:00+00:00",
"2010-01-01 13:00:00+00:00",
"2010-01-01 09:00:00+00:00",
"2010-01-01 08:00:00+00:00",
]
fmt = "%Y-%m-%d %H:%M:%S %z"
result = pd.to_datetime(dates, format=fmt, utc=True)
expected = DatetimeIndex(expected_dates)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""]
)
def test_to_datetime_parse_timezone_malformed(self, offset):
fmt = "%Y-%m-%d %H:%M:%S %z"
date = "2010-01-01 12:00:00 " + offset
msg = "does not match format|unconverted data remains"
with pytest.raises(ValueError, match=msg):
pd.to_datetime([date], format=fmt)
def test_to_datetime_parse_timezone_keeps_name(self):
# GH 21697
fmt = "%Y-%m-%d %H:%M:%S %z"
arg = Index(["2010-01-01 12:00:00 Z"], name="foo")
result = pd.to_datetime(arg, format=fmt)
expected = DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo")
tm.assert_index_equal(result, expected)
class TestToDatetime:
@pytest.mark.parametrize(
"s, _format, dt",
[
["2015-1-1", "%G-%V-%u", datetime(2014, 12, 29, 0, 0)],
["2015-1-4", "%G-%V-%u", datetime(2015, 1, 1, 0, 0)],
["2015-1-7", "%G-%V-%u", datetime(2015, 1, 4, 0, 0)],
],
)
def test_to_datetime_iso_week_year_format(self, s, _format, dt):
# See GH#16607
assert to_datetime(s, format=_format) == dt
@pytest.mark.parametrize(
"msg, s, _format",
[
[
"ISO week directive '%V' must be used with the ISO year directive "
"'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
"1999 50",
"%Y %V",
],
[
"ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
"1999 51",
"%G %V",
],
[
"ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
"1999 Monday",
"%G %A",
],
[
"ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
"1999 Mon",
"%G %a",
],
[
"ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
"1999 6",
"%G %w",
],
[
"ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
"1999 6",
"%G %u",
],
[
"ISO year directive '%G' must be used with the ISO week directive "
"'%V' and a weekday directive '%A', '%a', '%w', or '%u'.",
"2051",
"%G",
],
[
"Day of the year directive '%j' is not compatible with ISO year "
"directive '%G'. Use '%Y' instead.",
"1999 51 6 256",
"%G %V %u %j",
],
[
"ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.",
"1999 51 Sunday",
"%Y %V %A",
],
[
"ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.",
"1999 51 Sun",
"%Y %V %a",
],
[
"ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.",
"1999 51 1",
"%Y %V %w",
],
[
"ISO week directive '%V' is incompatible with the year directive "
"'%Y'. Use the ISO year '%G' instead.",
"1999 51 1",
"%Y %V %u",
],
[
"ISO week directive '%V' must be used with the ISO year directive "
"'%G' and a weekday directive '%A', '%a', '%w', or '%u'.",
"20",
"%V",
],
],
)
def test_error_iso_week_year(self, msg, s, _format):
# See GH#16607
# This test checks for errors thrown when giving the wrong format
# However, as discussed on PR#25541, overriding the locale
# causes a different error to be thrown due to the format being
# locale specific, but the test data is in english.
# Therefore, the tests only run when locale is not overwritten,
# as a sort of solution to this problem.
if locale.getlocale() != ("zh_CN", "UTF-8") and locale.getlocale() != (
"it_IT",
"UTF-8",
):
with pytest.raises(ValueError, match=msg):
to_datetime(s, format=_format)
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_to_datetime_dtarr(self, tz):
# DatetimeArray
dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz)
arr = DatetimeArray(dti)
result = to_datetime(arr)
assert result is arr
result = to_datetime(arr)
assert result is arr
def test_to_datetime_pydatetime(self):
actual = pd.to_datetime(datetime(2008, 1, 15))
assert actual == datetime(2008, 1, 15)
def test_to_datetime_YYYYMMDD(self):
actual = pd.to_datetime("20080115")
assert actual == datetime(2008, 1, 15)
def test_to_datetime_unparseable_ignore(self):
# unparseable
s = "Month 1, 1999"
assert pd.to_datetime(s, errors="ignore") == s
@td.skip_if_windows # `tm.set_timezone` does not work in windows
def test_to_datetime_now(self):
# See GH#18666
with tm.set_timezone("US/Eastern"):
npnow = np.datetime64("now").astype("datetime64[ns]")
pdnow = pd.to_datetime("now")
pdnow2 = pd.to_datetime(["now"])[0]
# These should all be equal with infinite perf; this gives
# a generous margin of 10 seconds
assert abs(pdnow.value - npnow.astype(np.int64)) < 1e10
assert abs(pdnow2.value - npnow.astype(np.int64)) < 1e10
assert pdnow.tzinfo is None
assert pdnow2.tzinfo is None
@td.skip_if_windows # `tm.set_timezone` does not work in windows
def test_to_datetime_today(self):
# See GH#18666
# Test with one timezone far ahead of UTC and another far behind, so
# one of these will _almost_ always be in a different day from UTC.
# Unfortunately this test between 12 and 1 AM Samoa time
# this both of these timezones _and_ UTC will all be in the same day,
# so this test will not detect the regression introduced in #18666.
with tm.set_timezone("Pacific/Auckland"): # 12-13 hours ahead of UTC
nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
pdtoday = pd.to_datetime("today")
pdtoday2 = pd.to_datetime(["today"])[0]
tstoday = Timestamp("today")
tstoday2 = Timestamp.today()
# These should all be equal with infinite perf; this gives
# a generous margin of 10 seconds
assert abs(pdtoday.normalize().value - nptoday) < 1e10
assert abs(pdtoday2.normalize().value - nptoday) < 1e10
assert abs(pdtoday.value - tstoday.value) < 1e10
assert abs(pdtoday.value - tstoday2.value) < 1e10
assert pdtoday.tzinfo is None
assert pdtoday2.tzinfo is None
with tm.set_timezone("US/Samoa"): # 11 hours behind UTC
nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
pdtoday = pd.to_datetime("today")
pdtoday2 = pd.to_datetime(["today"])[0]
# These should all be equal with infinite perf; this gives
# a generous margin of 10 seconds
assert abs(pdtoday.normalize().value - nptoday) < 1e10
assert abs(pdtoday2.normalize().value - nptoday) < 1e10
assert pdtoday.tzinfo is None
assert pdtoday2.tzinfo is None
def test_to_datetime_today_now_unicode_bytes(self):
to_datetime(["now"])
to_datetime(["today"])
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_dt64s(self, cache):
in_bound_dts = [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")]
for dt in in_bound_dts:
assert pd.to_datetime(dt, cache=cache) == Timestamp(dt)
@pytest.mark.parametrize(
"dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
msg = f"Out of bounds nanosecond timestamp: {dt}"
with pytest.raises(OutOfBoundsDatetime, match=msg):
pd.to_datetime(dt, errors="raise")
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp(dt)
assert pd.to_datetime(dt, errors="coerce", cache=cache) is NaT
@pytest.mark.parametrize("cache", [True, False])
@pytest.mark.parametrize("unit", ["s", "D"])
def test_to_datetime_array_of_dt64s(self, cache, unit):
# https://github.com/pandas-dev/pandas/issues/31491
# Need at least 50 to ensure cache is used.
dts = [
np.datetime64("2000-01-01", unit),
np.datetime64("2000-01-02", unit),
] * 30
# Assuming all datetimes are in bounds, to_datetime() returns
# an array that is equal to Timestamp() parsing
tm.assert_index_equal(
pd.to_datetime(dts, cache=cache),
DatetimeIndex([Timestamp(x).asm8 for x in dts]),
)
# A list of datetimes where the last one is out of bounds
dts_with_oob = dts + [np.datetime64("9999-01-01")]
msg = "Out of bounds nanosecond timestamp: 9999-01-01 00:00:00"
with pytest.raises(OutOfBoundsDatetime, match=msg):
pd.to_datetime(dts_with_oob, errors="raise")
tm.assert_index_equal(
pd.to_datetime(dts_with_oob, errors="coerce", cache=cache),
DatetimeIndex(
[Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30
+ [pd.NaT],
),
)
# With errors='ignore', out of bounds datetime64s
# are converted to their .item(), which depending on the version of
# numpy is either a python datetime.datetime or datetime.date
tm.assert_index_equal(
pd.to_datetime(dts_with_oob, errors="ignore", cache=cache),
Index([dt.item() for dt in dts_with_oob]),
)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_tz(self, cache):
# xref 8260
# uniform returns a DatetimeIndex
arr = [
Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"),
]
result = pd.to_datetime(arr, cache=cache)
expected = DatetimeIndex(
["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific"
)
tm.assert_index_equal(result, expected)
# mixed tzs will raise
arr = [
Timestamp("2013-01-01 13:00:00", tz="US/Pacific"),
Timestamp("2013-01-02 14:00:00", tz="US/Eastern"),
]
msg = (
"Tz-aware datetime.datetime cannot be "
"converted to datetime64 unless utc=True"
)
with pytest.raises(ValueError, match=msg):
pd.to_datetime(arr, cache=cache)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_different_offsets(self, cache):
# inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
# see GH-26097 for more
ts_string_1 = "March 1, 2018 12:00:00+0400"
ts_string_2 = "March 1, 2018 12:00:00+0500"
arr = [ts_string_1] * 5 + [ts_string_2] * 5
expected = Index([parse(x) for x in arr])
result = pd.to_datetime(arr, cache=cache)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_tz_pytz(self, cache):
# see gh-8260
us_eastern = pytz.timezone("US/Eastern")
arr = np.array(
[
us_eastern.localize(
datetime(year=2000, month=1, day=1, hour=3, minute=0)
),
us_eastern.localize(
datetime(year=2000, month=6, day=1, hour=3, minute=0)
),
],
dtype=object,
)
result = pd.to_datetime(arr, utc=True, cache=cache)
expected = DatetimeIndex(
["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"],
dtype="datetime64[ns, UTC]",
freq=None,
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
@pytest.mark.parametrize(
"init_constructor, end_constructor, test_method",
[
(Index, DatetimeIndex, tm.assert_index_equal),
(list, DatetimeIndex, tm.assert_index_equal),
(np.array, DatetimeIndex, tm.assert_index_equal),
(Series, Series, tm.assert_series_equal),
],
)
def test_to_datetime_utc_true(
self, cache, init_constructor, end_constructor, test_method
):
# See gh-11934 & gh-6415
data = ["20100102 121314", "20100102 121315"]
expected_data = [
Timestamp("2010-01-02 12:13:14", tz="utc"),
Timestamp("2010-01-02 12:13:15", tz="utc"),
]
result = pd.to_datetime(
init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache
)
expected = end_constructor(expected_data)
test_method(result, expected)
# Test scalar case as well
for scalar, expected in zip(data, expected_data):
result = pd.to_datetime(
scalar, format="%Y%m%d %H%M%S", utc=True, cache=cache
)
assert result == expected
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_utc_true_with_series_single_value(self, cache):
# GH 15760 UTC=True with Series
ts = 1.5e18
result = pd.to_datetime(Series([ts]), utc=True, cache=cache)
expected = Series([Timestamp(ts, tz="utc")])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_to_datetime_utc_true_with_series_tzaware_string(self, cache):
ts = "2013-01-01 00:00:00-01:00"
expected_ts = "2013-01-01 01:00:00"
data = Series([ts] * 3)
result = pd.to_datetime(data, utc=True, cache=cache)
expected = Series([Timestamp(expected_ts, tz="utc")] * 3)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
@pytest.mark.parametrize(
"date, dtype",
[
("2013-01-01 01:00:00", "datetime64[ns]"),
("2013-01-01 01:00:00", "datetime64[ns, UTC]"),
],
)
def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, dtype):
expected = Series([Timestamp("2013-01-01 01:00:00", tz="UTC")])
result = pd.to_datetime(Series([date], dtype=dtype), utc=True, cache=cache)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
@td.skip_if_no("psycopg2")
def test_to_datetime_tz_psycopg2(self, cache):
# xref 8260
import psycopg2
# misc cases
tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)
tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None)
arr = np.array(
[
datetime(2000, 1, 1, 3, 0, tzinfo=tz1),
datetime(2000, 6, 1, 3, 0, tzinfo=tz2),
],
dtype=object,
)
result = pd.to_datetime(arr, errors="coerce", utc=True, cache=cache)
expected = DatetimeIndex(
["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"],
dtype="datetime64[ns, UTC]",
freq=None,
)
tm.assert_index_equal(result, expected)
# dtype coercion
i = DatetimeIndex(
["2000-01-01 08:00:00"],
tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None),
)
assert is_datetime64_ns_dtype(i)
# tz coercion
result = pd.to_datetime(i, errors="coerce", cache=cache)
tm.assert_index_equal(result, i)
result = pd.to_datetime(i, errors="coerce", utc=True, cache=cache)
expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("cache", [True, False])
def test_datetime_bool(self, cache):
# GH13176
msg = r"dtype bool cannot be converted to datetime64\[ns\]"
with pytest.raises(TypeError, match=msg):
to_datetime(False)
assert to_datetime(False, errors="coerce", cache=cache) is NaT
assert to_datetime(False, errors="ignore", cache=cache) is False
with pytest.raises(TypeError, match=msg):
to_datetime(True)
assert to_datetime(True, errors="coerce", cache=cache) is NaT
assert to_datetime(True, errors="ignore", cache=cache) is True
msg = f"{type(cache)} is not convertible to datetime"
with pytest.raises(TypeError, match=msg):
to_datetime([False, datetime.today()], cache=cache)
with pytest.raises(TypeError, match=msg):
to_datetime(["20130101", True], cache=cache)
tm.assert_index_equal(
to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache),
DatetimeIndex(
[to_datetime(0, cache=cache), NaT, NaT, to_datetime(0, cache=cache)]
),
)
def test_datetime_invalid_datatype(self):
# GH13176
msg = "is not convertible to datetime"
with pytest.raises(TypeError, match=msg):
pd.to_datetime(bool)
with pytest.raises(TypeError, match=msg):
pd.to_datetime(pd.to_datetime)
@pytest.mark.parametrize("value", ["a", "00:01:99"])
@pytest.mark.parametrize("infer", [True, False])
@pytest.mark.parametrize("format", [None, "H%:M%:S%"])
def test_datetime_invalid_scalar(self, value, format, infer):
# GH24763
res = pd.to_datetime(
value, errors="ignore", format=format, infer_datetime_format=infer
)
assert res == value
res = pd.to_datetime(
value, errors="coerce", format=format, infer_datetime_format=infer
)
assert res is pd.NaT
msg = (
"is a bad directive in format|"
"second must be in 0..59|"
"Given date string not likely a datetime"
)
with pytest.raises(ValueError, match=msg):
pd.to_datetime(
value, errors="raise", format=format, infer_datetime_format=infer
)
@pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
@pytest.mark.parametrize("infer", [True, False])
@pytest.mark.parametrize("format", [None, "H%:M%:S%"])
def test_datetime_outofbounds_scalar(self, value, format, infer):
# GH24763
res = pd.to_datetime(
value, errors="ignore", format=format, infer_datetime_format=infer
)
assert res == value
res = pd.to_datetime(
value, errors="coerce", format=format, infer_datetime_format=infer
)
assert res is pd.NaT
if format is not None:
msg = "is a bad directive in format|Out of bounds nanosecond timestamp"
with pytest.raises(ValueError, match=msg):
pd.to_datetime(
value, errors="raise", format=format, infer_datetime_format=infer
)
else:
msg = "Out of bounds nanosecond timestamp"
with pytest.raises(OutOfBoundsDatetime, match=msg):
pd.to_datetime(
value, errors="raise", format=format, infer_datetime_format=infer
)
@pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]])
@pytest.mark.parametrize("infer", [True, False])
@pytest.mark.parametrize("format", [None, "H%:M%:S%"])
def test_datetime_invalid_index(self, values, format, infer):
# GH24763
res = pd.to_datetime(
values, errors="ignore", format=format, infer_datetime_format=infer
)
tm.assert_index_equal(res, Index(values))
res = pd.to_datetime(
values, errors="coerce", format=format, infer_datetime_format=infer
)
tm.assert_index_equal(res, DatetimeIndex([pd.NaT] * len(values)))
msg = (
"is a bad directive in format|"
"Given date string not likely a datetime|"
"second must be in 0..59"
)
with pytest.raises(ValueError, match=msg):
pd.to_datetime(
values, errors="raise", format=format, infer_datetime_format=infer
)
@pytest.mark.parametrize("utc", [True, None])
@pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
@pytest.mark.parametrize("constructor", [list, tuple, np.array, Index, deque])
def test_to_datetime_cache(self, utc, format, constructor):
date = "20130101 00:00:00"
test_dates = [date] * 10 ** 5
data = constructor(test_dates)
result = pd.to_datetime(data, utc=utc, format=format, cache=True)
expected = pd.to_datetime(data, utc=utc, format=format, cache=False)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"listlike",
[
(deque([Timestamp("2010-06-02 09:30:00")] * 51)),
([Timestamp("2010-06-02 09:30:00")] * 51),
(tuple([Timestamp("2010-06-02 09:30:00")] * 51)),
],
)
def test_no_slicing_errors_in_should_cache(self, listlike):
# GH 29403
assert tools.should_cache(listlike) is True
def test_to_datetime_from_deque(self):
# GH 29403
result = pd.to_datetime(deque([Timestamp("2010-06-02 09:30:00")] * 51))
expected = pd.to_datetime([Timestamp("2010-06-02 09:30:00")] * 51)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("utc", [True, None])
@pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
def test_to_datetime_cache_series(self, utc, format):
date = "20130101 00:00:00"
test_dates = [date] * 10 ** 5
data = Series(test_dates)
result = pd.to_datetime(data, utc=utc, format=format, cache=True)
expected = pd.to_datetime(data, utc=utc, format=format, cache=False)
tm.assert_series_equal(result, expected)
def test_to_datetime_cache_scalar(self):
date = "20130101 00:00:00"
result = pd.to_datetime(date, cache=True)
expected = Timestamp("20130101 00:00:00")
assert result == expected
@pytest.mark.parametrize(
"date, format",
[
("2017-20", "%Y-%W"),
("20 Sunday", "%W %A"),
("20 Sun", "%W %a"),
("2017-21", "%Y-%U"),
("20 Sunday", "%U %A"),
("20 Sun", "%U %a"),
],
)
def test_week_without_day_and_calendar_year(self, date, format):
# GH16774
msg = "Cannot use '%W' or '%U' without day and year"
with pytest.raises(ValueError, match=msg):
pd.to_datetime(date, format=format)
def test_to_datetime_coerce(self):
# GH 26122
ts_strings = [
"March 1, 2018 12:00:00+0400",
"March 1, 2018 12:00:00+0500",
"20100240",
]
result = to_datetime(ts_strings, errors="coerce")
expected = Index(
[
datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)),
datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)),
NaT,
]
)
tm.assert_index_equal(result, expected)
def test_to_datetime_coerce_malformed(self):
# GH 28299
ts_strings = ["200622-12-31", "111111-24-11"]
result = to_datetime(ts_strings, errors="coerce")
expected = Index([NaT, NaT])
tm.assert_index_equal(result, expected)
def test_iso_8601_strings_with_same_offset(self):
# GH 17697, 11736
ts_str = "2015-11-18 15:30:00+05:30"
result = to_datetime(ts_str)
expected = Timestamp(ts_str)
assert result == expected
expected = DatetimeIndex([Timestamp(ts_str)] * 2)