@@ -36,10 +36,9 @@ class TestDataFrameToCSV(tm.TestCase, TestData):
36
36
37
37
_multiprocess_can_split_ = True
38
38
39
- def test_to_csv_from_csv (self ):
39
+ def test_to_csv_from_csv1 (self ):
40
40
41
- pname = '__tmp_to_csv_from_csv__'
42
- with ensure_clean (pname ) as path :
41
+ with ensure_clean ('__tmp_to_csv_from_csv1__' ) as path :
43
42
self .frame ['A' ][:5 ] = nan
44
43
45
44
self .frame .to_csv (path )
@@ -69,7 +68,9 @@ def test_to_csv_from_csv(self):
69
68
recons = DataFrame .from_csv (path )
70
69
assert_frame_equal (dm , recons )
71
70
72
- with ensure_clean (pname ) as path :
71
+ def test_to_csv_from_csv2 (self ):
72
+
73
+ with ensure_clean ('__tmp_to_csv_from_csv2__' ) as path :
73
74
74
75
# duplicate index
75
76
df = DataFrame (np .random .randn (3 , 3 ), index = ['a' , 'a' , 'b' ],
@@ -101,7 +102,9 @@ def test_to_csv_from_csv(self):
101
102
self .assertRaises (ValueError , self .frame2 .to_csv , path ,
102
103
header = ['AA' , 'X' ])
103
104
104
- with ensure_clean (pname ) as path :
105
+ def test_to_csv_from_csv3 (self ):
106
+
107
+ with ensure_clean ('__tmp_to_csv_from_csv3__' ) as path :
105
108
df1 = DataFrame (np .random .randn (3 , 1 ))
106
109
df2 = DataFrame (np .random .randn (3 , 1 ))
107
110
@@ -113,7 +116,9 @@ def test_to_csv_from_csv(self):
113
116
xp .columns = lmap (int , xp .columns )
114
117
assert_frame_equal (xp , rs )
115
118
116
- with ensure_clean () as path :
119
+ def test_to_csv_from_csv4 (self ):
120
+
121
+ with ensure_clean ('__tmp_to_csv_from_csv4__' ) as path :
117
122
# GH 10833 (TimedeltaIndex formatting)
118
123
dt = pd .Timedelta (seconds = 1 )
119
124
df = pd .DataFrame ({'dt_data' : [i * dt for i in range (3 )]},
@@ -129,8 +134,10 @@ def test_to_csv_from_csv(self):
129
134
130
135
assert_frame_equal (df , result , check_index_type = True )
131
136
137
+ def test_to_csv_from_csv5 (self ):
138
+
132
139
# tz, 8260
133
- with ensure_clean (pname ) as path :
140
+ with ensure_clean ('__tmp_to_csv_from_csv5__' ) as path :
134
141
135
142
self .tzframe .to_csv (path )
136
143
result = pd .read_csv (path , index_col = 0 , parse_dates = ['A' ])
@@ -212,26 +219,56 @@ def _check_df(df, cols=None):
212
219
cols = ['b' , 'a' ]
213
220
_check_df (df , cols )
214
221
222
+ @slow
223
+ def test_to_csv_dtnat (self ):
224
+ # GH3437
225
+ from pandas import NaT
226
+
227
+ def make_dtnat_arr (n , nnat = None ):
228
+ if nnat is None :
229
+ nnat = int (n * 0.1 ) # 10%
230
+ s = list (date_range ('2000' , freq = '5min' , periods = n ))
231
+ if nnat :
232
+ for i in np .random .randint (0 , len (s ), nnat ):
233
+ s [i ] = NaT
234
+ i = np .random .randint (100 )
235
+ s [- i ] = NaT
236
+ s [i ] = NaT
237
+ return s
238
+
239
+ chunksize = 1000
240
+ # N=35000
241
+ s1 = make_dtnat_arr (chunksize + 5 )
242
+ s2 = make_dtnat_arr (chunksize + 5 , 0 )
243
+
244
+ # s3=make_dtnjat_arr(chunksize+5,0)
245
+ with ensure_clean ('1.csv' ) as pth :
246
+ df = DataFrame (dict (a = s1 , b = s2 ))
247
+ df .to_csv (pth , chunksize = chunksize )
248
+ recons = DataFrame .from_csv (pth )._convert (datetime = True ,
249
+ coerce = True )
250
+ assert_frame_equal (df , recons , check_names = False ,
251
+ check_less_precise = True )
252
+
215
253
@slow
216
254
def test_to_csv_moar (self ):
217
- path = '__tmp_to_csv_moar__'
218
255
219
- def _do_test (df , path , r_dtype = None , c_dtype = None ,
256
+ def _do_test (df , r_dtype = None , c_dtype = None ,
220
257
rnlvl = None , cnlvl = None , dupe_col = False ):
221
258
222
259
kwargs = dict (parse_dates = False )
223
260
if cnlvl :
224
261
if rnlvl is not None :
225
262
kwargs ['index_col' ] = lrange (rnlvl )
226
263
kwargs ['header' ] = lrange (cnlvl )
227
- with ensure_clean (path ) as path :
264
+ with ensure_clean ('__tmp_to_csv_moar__' ) as path :
228
265
df .to_csv (path , encoding = 'utf8' ,
229
266
chunksize = chunksize , tupleize_cols = False )
230
267
recons = DataFrame .from_csv (
231
268
path , tupleize_cols = False , ** kwargs )
232
269
else :
233
270
kwargs ['header' ] = 0
234
- with ensure_clean (path ) as path :
271
+ with ensure_clean ('__tmp_to_csv_moar__' ) as path :
235
272
df .to_csv (path , encoding = 'utf8' , chunksize = chunksize )
236
273
recons = DataFrame .from_csv (path , ** kwargs )
237
274
@@ -307,50 +344,21 @@ def _to_uni(x):
307
344
N = 100
308
345
chunksize = 1000
309
346
310
- # GH3437
311
- from pandas import NaT
312
-
313
- def make_dtnat_arr (n , nnat = None ):
314
- if nnat is None :
315
- nnat = int (n * 0.1 ) # 10%
316
- s = list (date_range ('2000' , freq = '5min' , periods = n ))
317
- if nnat :
318
- for i in np .random .randint (0 , len (s ), nnat ):
319
- s [i ] = NaT
320
- i = np .random .randint (100 )
321
- s [- i ] = NaT
322
- s [i ] = NaT
323
- return s
324
-
325
- # N=35000
326
- s1 = make_dtnat_arr (chunksize + 5 )
327
- s2 = make_dtnat_arr (chunksize + 5 , 0 )
328
- path = '1.csv'
329
-
330
- # s3=make_dtnjat_arr(chunksize+5,0)
331
- with ensure_clean ('.csv' ) as pth :
332
- df = DataFrame (dict (a = s1 , b = s2 ))
333
- df .to_csv (pth , chunksize = chunksize )
334
- recons = DataFrame .from_csv (pth )._convert (datetime = True ,
335
- coerce = True )
336
- assert_frame_equal (df , recons , check_names = False ,
337
- check_less_precise = True )
338
-
339
347
for ncols in [4 ]:
340
348
base = int ((chunksize // ncols or 1 ) or 1 )
341
349
for nrows in [2 , 10 , N - 1 , N , N + 1 , N + 2 , 2 * N - 2 ,
342
350
2 * N - 1 , 2 * N , 2 * N + 1 , 2 * N + 2 ,
343
351
base - 1 , base , base + 1 ]:
344
352
_do_test (mkdf (nrows , ncols , r_idx_type = 'dt' ,
345
- c_idx_type = 's' ), path , 'dt' , 's' )
353
+ c_idx_type = 's' ), 'dt' , 's' )
346
354
347
355
for ncols in [4 ]:
348
356
base = int ((chunksize // ncols or 1 ) or 1 )
349
357
for nrows in [2 , 10 , N - 1 , N , N + 1 , N + 2 , 2 * N - 2 ,
350
358
2 * N - 1 , 2 * N , 2 * N + 1 , 2 * N + 2 ,
351
359
base - 1 , base , base + 1 ]:
352
360
_do_test (mkdf (nrows , ncols , r_idx_type = 'dt' ,
353
- c_idx_type = 's' ), path , 'dt' , 's' )
361
+ c_idx_type = 's' ), 'dt' , 's' )
354
362
pass
355
363
356
364
for r_idx_type , c_idx_type in [('i' , 'i' ), ('s' , 's' ), ('u' , 'dt' ),
@@ -362,14 +370,14 @@ def make_dtnat_arr(n, nnat=None):
362
370
base - 1 , base , base + 1 ]:
363
371
_do_test (mkdf (nrows , ncols , r_idx_type = r_idx_type ,
364
372
c_idx_type = c_idx_type ),
365
- path , r_idx_type , c_idx_type )
373
+ r_idx_type , c_idx_type )
366
374
367
375
for ncols in [1 , 2 , 3 , 4 ]:
368
376
base = int ((chunksize // ncols or 1 ) or 1 )
369
377
for nrows in [10 , N - 2 , N - 1 , N , N + 1 , N + 2 , 2 * N - 2 ,
370
378
2 * N - 1 , 2 * N , 2 * N + 1 , 2 * N + 2 ,
371
379
base - 1 , base , base + 1 ]:
372
- _do_test (mkdf (nrows , ncols ), path )
380
+ _do_test (mkdf (nrows , ncols ))
373
381
374
382
for nrows in [10 , N - 2 , N - 1 , N , N + 1 , N + 2 ]:
375
383
df = mkdf (nrows , 3 )
@@ -381,19 +389,19 @@ def make_dtnat_arr(n, nnat=None):
381
389
ix [- 2 :] = ["rdupe" , "rdupe" ]
382
390
df .index = ix
383
391
df .columns = cols
384
- _do_test (df , path , dupe_col = True )
392
+ _do_test (df , dupe_col = True )
385
393
386
- _do_test (DataFrame (index = lrange (10 )), path )
387
- _do_test (mkdf (chunksize // 2 + 1 , 2 , r_idx_nlevels = 2 ), path , rnlvl = 2 )
394
+ _do_test (DataFrame (index = lrange (10 )))
395
+ _do_test (mkdf (chunksize // 2 + 1 , 2 , r_idx_nlevels = 2 ), rnlvl = 2 )
388
396
for ncols in [2 , 3 , 4 ]:
389
397
base = int (chunksize // ncols )
390
398
for nrows in [10 , N - 2 , N - 1 , N , N + 1 , N + 2 , 2 * N - 2 ,
391
399
2 * N - 1 , 2 * N , 2 * N + 1 , 2 * N + 2 ,
392
400
base - 1 , base , base + 1 ]:
393
- _do_test (mkdf (nrows , ncols , r_idx_nlevels = 2 ), path , rnlvl = 2 )
394
- _do_test (mkdf (nrows , ncols , c_idx_nlevels = 2 ), path , cnlvl = 2 )
401
+ _do_test (mkdf (nrows , ncols , r_idx_nlevels = 2 ), rnlvl = 2 )
402
+ _do_test (mkdf (nrows , ncols , c_idx_nlevels = 2 ), cnlvl = 2 )
395
403
_do_test (mkdf (nrows , ncols , r_idx_nlevels = 2 , c_idx_nlevels = 2 ),
396
- path , rnlvl = 2 , cnlvl = 2 )
404
+ rnlvl = 2 , cnlvl = 2 )
397
405
398
406
def test_to_csv_from_csv_w_some_infs (self ):
399
407
@@ -428,8 +436,7 @@ def test_to_csv_from_csv_w_all_infs(self):
428
436
429
437
def test_to_csv_no_index (self ):
430
438
# GH 3624, after appending columns, to_csv fails
431
- pname = '__tmp_to_csv_no_index__'
432
- with ensure_clean (pname ) as path :
439
+ with ensure_clean ('__tmp_to_csv_no_index__' ) as path :
433
440
df = DataFrame ({'c1' : [1 , 2 , 3 ], 'c2' : [4 , 5 , 6 ]})
434
441
df .to_csv (path , index = False )
435
442
result = read_csv (path )
@@ -451,10 +458,9 @@ def test_to_csv_with_mix_columns(self):
451
458
def test_to_csv_headers (self ):
452
459
# GH6186, the presence or absence of `index` incorrectly
453
460
# causes to_csv to have different header semantics.
454
- pname = '__tmp_to_csv_headers__'
455
461
from_df = DataFrame ([[1 , 2 ], [3 , 4 ]], columns = ['A' , 'B' ])
456
462
to_df = DataFrame ([[1 , 2 ], [3 , 4 ]], columns = ['X' , 'Y' ])
457
- with ensure_clean (pname ) as path :
463
+ with ensure_clean ('__tmp_to_csv_headers__' ) as path :
458
464
from_df .to_csv (path , header = ['X' , 'Y' ])
459
465
recons = DataFrame .from_csv (path )
460
466
assert_frame_equal (to_df , recons )
@@ -466,14 +472,13 @@ def test_to_csv_headers(self):
466
472
467
473
def test_to_csv_multiindex (self ):
468
474
469
- pname = '__tmp_to_csv_multiindex__'
470
475
frame = self .frame
471
476
old_index = frame .index
472
477
arrays = np .arange (len (old_index ) * 2 ).reshape (2 , - 1 )
473
478
new_index = MultiIndex .from_arrays (arrays , names = ['first' , 'second' ])
474
479
frame .index = new_index
475
480
476
- with ensure_clean (pname ) as path :
481
+ with ensure_clean ('__tmp_to_csv_multiindex__' ) as path :
477
482
478
483
frame .to_csv (path , header = False )
479
484
frame .to_csv (path , columns = ['A' , 'B' ])
@@ -514,7 +519,7 @@ def test_to_csv_multiindex(self):
514
519
# needed if setUP becomes classmethod
515
520
self .tsframe .index = old_index
516
521
517
- with ensure_clean (pname ) as path :
522
+ with ensure_clean ('__tmp_to_csv_multiindex__' ) as path :
518
523
# GH3571, GH1651, GH3141
519
524
520
525
def _make_frame (names = None ):
@@ -618,7 +623,7 @@ def _make_frame(names=None):
618
623
'MultiIndex' ):
619
624
df .to_csv (path , tupleize_cols = False , columns = ['foo' , 'bar' ])
620
625
621
- with ensure_clean (pname ) as path :
626
+ with ensure_clean ('__tmp_to_csv_multiindex__' ) as path :
622
627
# empty
623
628
tsframe [:0 ].to_csv (path )
624
629
recons = DataFrame .from_csv (path )
@@ -1022,8 +1027,7 @@ def test_to_csv_compression_value_error(self):
1022
1027
1023
1028
def test_to_csv_date_format (self ):
1024
1029
from pandas import to_datetime
1025
- pname = '__tmp_to_csv_date_format__'
1026
- with ensure_clean (pname ) as path :
1030
+ with ensure_clean ('__tmp_to_csv_date_format__' ) as path :
1027
1031
for engine in [None , 'python' ]:
1028
1032
w = FutureWarning if engine == 'python' else None
1029
1033
0 commit comments