4
4
import os
5
5
from pathlib import Path
6
6
7
- import dateutil .parser
8
7
import numpy as np
9
8
import pytest
10
9
10
+ from pandas .compat import IS64
11
11
from pandas .errors import EmptyDataError
12
12
import pandas .util ._test_decorators as td
13
13
@@ -27,9 +27,9 @@ def data_test_ix(request, dirpath):
27
27
df = pd .read_csv (fname )
28
28
epoch = datetime (1960 , 1 , 1 )
29
29
t1 = pd .to_timedelta (df ["Column4" ], unit = "d" )
30
- df ["Column4" ] = epoch + t1
30
+ df ["Column4" ] = ( epoch + t1 ). astype ( "M8[s]" )
31
31
t2 = pd .to_timedelta (df ["Column12" ], unit = "d" )
32
- df ["Column12" ] = epoch + t2
32
+ df ["Column12" ] = ( epoch + t2 ). astype ( "M8[s]" )
33
33
for k in range (df .shape [1 ]):
34
34
col = df .iloc [:, k ]
35
35
if col .dtype == np .int64 :
@@ -59,7 +59,7 @@ def test_from_buffer(self, dirpath, data_test_ix):
59
59
buf , format = "sas7bdat" , iterator = True , encoding = "utf-8"
60
60
) as rdr :
61
61
df = rdr .read ()
62
- tm .assert_frame_equal (df , df0 , check_exact = False )
62
+ tm .assert_frame_equal (df , df0 )
63
63
64
64
@pytest .mark .slow
65
65
def test_from_iterator (self , dirpath , data_test_ix ):
@@ -157,6 +157,8 @@ def test_productsales(datapath):
157
157
df0 = pd .read_csv (fname , parse_dates = ["MONTH" ])
158
158
vn = ["ACTUAL" , "PREDICT" , "QUARTER" , "YEAR" ]
159
159
df0 [vn ] = df0 [vn ].astype (np .float64 )
160
+
161
+ df0 ["MONTH" ] = df0 ["MONTH" ].astype ("M8[s]" )
160
162
tm .assert_frame_equal (df , df0 )
161
163
162
164
@@ -175,7 +177,7 @@ def test_airline(datapath):
175
177
fname = datapath ("io" , "sas" , "data" , "airline.csv" )
176
178
df0 = pd .read_csv (fname )
177
179
df0 = df0 .astype (np .float64 )
178
- tm .assert_frame_equal (df , df0 , check_exact = False )
180
+ tm .assert_frame_equal (df , df0 )
179
181
180
182
181
183
def test_date_time (datapath ):
@@ -191,14 +193,20 @@ def test_date_time(datapath):
191
193
# access to SAS to read the sas7bdat file. We are really just testing
192
194
# that we are "close". This only seems to be an issue near the
193
195
# implementation bounds.
194
- res = df .iloc [:, 3 ].dt .round ("us" ).copy ()
195
196
196
- # the first and last elements are near the implementation bounds, where we
197
- # would expect floating point error to occur.
198
- res .iloc [0 ] -= pd .Timedelta (microseconds = 1 )
199
- res .iloc [- 1 ] += pd .Timedelta (microseconds = 1 )
197
+ df [df .columns [3 ]] = df .iloc [:, 3 ].dt .round ("us" )
198
+ df0 ["Date1" ] = df0 ["Date1" ].astype ("M8[s]" )
199
+ df0 ["Date2" ] = df0 ["Date2" ].astype ("M8[s]" )
200
+ df0 ["DateTime" ] = df0 ["DateTime" ].astype ("M8[ms]" )
201
+ df0 ["Taiw" ] = df0 ["Taiw" ].astype ("M8[s]" )
200
202
201
- df ["DateTimeHi" ] = res
203
+ res = df0 ["DateTimeHi" ].astype ("M8[us]" ).dt .round ("ms" )
204
+ df0 ["DateTimeHi" ] = res .astype ("M8[ms]" )
205
+
206
+ if not IS64 :
207
+ # No good reason for this, just what we get on the CI
208
+ df0 .loc [0 , "DateTimeHi" ] += np .timedelta64 (1 , "ms" )
209
+ df0 .loc [[2 , 3 ], "DateTimeHi" ] -= np .timedelta64 (1 , "ms" )
202
210
tm .assert_frame_equal (df , df0 )
203
211
204
212
@@ -258,16 +266,6 @@ def test_corrupt_read(datapath):
258
266
pd .read_sas (fname )
259
267
260
268
261
- def round_datetime_to_ms (ts ):
262
- if isinstance (ts , datetime ):
263
- return ts .replace (microsecond = int (round (ts .microsecond , - 3 ) / 1000 ) * 1000 )
264
- elif isinstance (ts , str ):
265
- _ts = dateutil .parser .parse (timestr = ts )
266
- return _ts .replace (microsecond = int (round (_ts .microsecond , - 3 ) / 1000 ) * 1000 )
267
- else :
268
- return ts
269
-
270
-
271
269
def test_max_sas_date (datapath ):
272
270
# GH 20927
273
271
# NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999
@@ -276,30 +274,33 @@ def test_max_sas_date(datapath):
276
274
fname = datapath ("io" , "sas" , "data" , "max_sas_date.sas7bdat" )
277
275
df = pd .read_sas (fname , encoding = "iso-8859-1" )
278
276
279
- # SAS likes to left pad strings with spaces - lstrip before comparing
280
- df = df .map (lambda x : x .lstrip () if isinstance (x , str ) else x )
281
- # GH 19732: Timestamps imported from sas will incur floating point errors
282
- try :
283
- df ["dt_as_dt" ] = df ["dt_as_dt" ].dt .round ("us" )
284
- except pd ._libs .tslibs .np_datetime .OutOfBoundsDatetime :
285
- df = df .map (round_datetime_to_ms )
286
- except AttributeError :
287
- df ["dt_as_dt" ] = df ["dt_as_dt" ].apply (round_datetime_to_ms )
288
- # if there are any date/times > pandas.Timestamp.max then ALL in that chunk
289
- # are returned as datetime.datetime
290
277
expected = pd .DataFrame (
291
278
{
292
279
"text" : ["max" , "normal" ],
293
280
"dt_as_float" : [253717747199.999 , 1880323199.999 ],
294
- "dt_as_dt" : [
295
- datetime (9999 , 12 , 29 , 23 , 59 , 59 , 999000 ),
296
- datetime (2019 , 8 , 1 , 23 , 59 , 59 , 999000 ),
297
- ],
281
+ "dt_as_dt" : np .array (
282
+ [
283
+ datetime (9999 , 12 , 29 , 23 , 59 , 59 , 999000 ),
284
+ datetime (2019 , 8 , 1 , 23 , 59 , 59 , 999000 ),
285
+ ],
286
+ dtype = "M8[ms]" ,
287
+ ),
298
288
"date_as_float" : [2936547.0 , 21762.0 ],
299
- "date_as_date" : [datetime (9999 , 12 , 29 ), datetime (2019 , 8 , 1 )],
289
+ "date_as_date" : np .array (
290
+ [
291
+ datetime (9999 , 12 , 29 ),
292
+ datetime (2019 , 8 , 1 ),
293
+ ],
294
+ dtype = "M8[s]" ,
295
+ ),
300
296
},
301
297
columns = ["text" , "dt_as_float" , "dt_as_dt" , "date_as_float" , "date_as_date" ],
302
298
)
299
+
300
+ if not IS64 :
301
+ # No good reason for this, just what we get on the CI
302
+ expected .loc [:, "dt_as_dt" ] -= np .timedelta64 (1 , "ms" )
303
+
303
304
tm .assert_frame_equal (df , expected )
304
305
305
306
@@ -312,41 +313,40 @@ def test_max_sas_date_iterator(datapath):
312
313
fname = datapath ("io" , "sas" , "data" , "max_sas_date.sas7bdat" )
313
314
results = []
314
315
for df in pd .read_sas (fname , encoding = "iso-8859-1" , chunksize = 1 ):
315
- # SAS likes to left pad strings with spaces - lstrip before comparing
316
- df = df .map (lambda x : x .lstrip () if isinstance (x , str ) else x )
317
316
# GH 19732: Timestamps imported from sas will incur floating point errors
318
- try :
319
- df ["dt_as_dt" ] = df ["dt_as_dt" ].dt .round ("us" )
320
- except pd ._libs .tslibs .np_datetime .OutOfBoundsDatetime :
321
- df = df .map (round_datetime_to_ms )
322
- except AttributeError :
323
- df ["dt_as_dt" ] = df ["dt_as_dt" ].apply (round_datetime_to_ms )
324
317
df .reset_index (inplace = True , drop = True )
325
318
results .append (df )
326
319
expected = [
327
320
pd .DataFrame (
328
321
{
329
322
"text" : ["max" ],
330
323
"dt_as_float" : [253717747199.999 ],
331
- "dt_as_dt" : [datetime (9999 , 12 , 29 , 23 , 59 , 59 , 999000 )],
324
+ "dt_as_dt" : np .array (
325
+ [datetime (9999 , 12 , 29 , 23 , 59 , 59 , 999000 )], dtype = "M8[ms]"
326
+ ),
332
327
"date_as_float" : [2936547.0 ],
333
- "date_as_date" : [datetime (9999 , 12 , 29 )],
328
+ "date_as_date" : np . array ( [datetime (9999 , 12 , 29 )], dtype = "M8[s]" ) ,
334
329
},
335
330
columns = col_order ,
336
331
),
337
332
pd .DataFrame (
338
333
{
339
334
"text" : ["normal" ],
340
335
"dt_as_float" : [1880323199.999 ],
341
- "dt_as_dt" : [ np .datetime64 ( "2019-08-01 23:59:59.999" )] ,
336
+ "dt_as_dt" : np .array ([ "2019-08-01 23:59:59.999" ], dtype = "M8[ms]" ) ,
342
337
"date_as_float" : [21762.0 ],
343
- "date_as_date" : [ np .datetime64 ( "2019-08-01" )] ,
338
+ "date_as_date" : np .array ([ "2019-08-01" ], dtype = "M8[s]" ) ,
344
339
},
345
340
columns = col_order ,
346
341
),
347
342
]
348
- for result , expected in zip (results , expected ):
349
- tm .assert_frame_equal (result , expected )
343
+ if not IS64 :
344
+ # No good reason for this, just what we get on the CI
345
+ expected [0 ].loc [0 , "dt_as_dt" ] -= np .timedelta64 (1 , "ms" )
346
+ expected [1 ].loc [0 , "dt_as_dt" ] -= np .timedelta64 (1 , "ms" )
347
+
348
+ tm .assert_frame_equal (results [0 ], expected [0 ])
349
+ tm .assert_frame_equal (results [1 ], expected [1 ])
350
350
351
351
352
352
def test_null_date (datapath ):
@@ -355,16 +355,25 @@ def test_null_date(datapath):
355
355
356
356
expected = pd .DataFrame (
357
357
{
358
- "datecol" : [
359
- datetime (9999 , 12 , 29 ),
360
- pd .NaT ,
361
- ],
362
- "datetimecol" : [
363
- datetime (9999 , 12 , 29 , 23 , 59 , 59 , 998993 ),
364
- pd .NaT ,
365
- ],
358
+ "datecol" : np .array (
359
+ [
360
+ datetime (9999 , 12 , 29 ),
361
+ np .datetime64 ("NaT" ),
362
+ ],
363
+ dtype = "M8[s]" ,
364
+ ),
365
+ "datetimecol" : np .array (
366
+ [
367
+ datetime (9999 , 12 , 29 , 23 , 59 , 59 , 999000 ),
368
+ np .datetime64 ("NaT" ),
369
+ ],
370
+ dtype = "M8[ms]" ,
371
+ ),
366
372
},
367
373
)
374
+ if not IS64 :
375
+ # No good reason for this, just what we get on the CI
376
+ expected .loc [0 , "datetimecol" ] -= np .timedelta64 (1 , "ms" )
368
377
tm .assert_frame_equal (df , expected )
369
378
370
379
0 commit comments