6
6
import pytest
7
7
8
8
import pandas as pd
9
- from pandas import Categorical , DataFrame , Series , compat , date_range , timedelta_range
9
+ from pandas import (
10
+ Categorical ,
11
+ DataFrame ,
12
+ Series ,
13
+ SparseDtype ,
14
+ compat ,
15
+ date_range ,
16
+ timedelta_range ,
17
+ )
10
18
import pandas .util .testing as tm
11
19
12
20
13
- class TestDataFrameMisc :
21
+ class SharedWithSparse :
22
+ """
23
+ A collection of tests DataFrame and SparseDataFrame can share.
24
+
25
+ In generic tests on this class, use ``self._assert_frame_equal()`` and
26
+ ``self._assert_series_equal()`` which are implemented in sub-classes
27
+ and dispatch correctly.
28
+ """
29
+
30
+ def _assert_frame_equal (self , left , right ):
31
+ """Dispatch to frame class dependent assertion"""
32
+ raise NotImplementedError
33
+
34
+ def _assert_series_equal (self , left , right ):
35
+ """Dispatch to series class dependent assertion"""
36
+ raise NotImplementedError
37
+
14
38
def test_copy_index_name_checking (self , float_frame ):
15
39
# don't want to be able to modify the index stored elsewhere after
16
40
# making a copy
@@ -117,16 +141,16 @@ def test_tab_completion(self):
117
141
def test_not_hashable (self ):
118
142
empty_frame = DataFrame ()
119
143
120
- df = DataFrame ([1 ])
121
- msg = "'DataFrame' objects are mutable, thus they cannot be hashed"
144
+ df = self . klass ([1 ])
145
+ msg = "'(Sparse)? DataFrame' objects are mutable, thus they cannot be hashed"
122
146
with pytest .raises (TypeError , match = msg ):
123
147
hash (df )
124
148
with pytest .raises (TypeError , match = msg ):
125
149
hash (empty_frame )
126
150
127
151
def test_new_empty_index (self ):
128
- df1 = DataFrame (np .random .randn (0 , 3 ))
129
- df2 = DataFrame (np .random .randn (0 , 3 ))
152
+ df1 = self . klass (np .random .randn (0 , 3 ))
153
+ df2 = self . klass (np .random .randn (0 , 3 ))
130
154
df1 .index .name = "foo"
131
155
assert df2 .index .name is None
132
156
@@ -137,7 +161,7 @@ def test_array_interface(self, float_frame):
137
161
assert result .index is float_frame .index
138
162
assert result .columns is float_frame .columns
139
163
140
- tm . assert_frame_equal (result , float_frame .apply (np .sqrt ))
164
+ self . _assert_frame_equal (result , float_frame .apply (np .sqrt ))
141
165
142
166
def test_get_agg_axis (self , float_frame ):
143
167
cols = float_frame ._get_agg_axis (0 )
@@ -163,9 +187,9 @@ def test_nonzero(self, float_frame, float_string_frame):
163
187
assert not df .empty
164
188
165
189
def test_iteritems (self ):
166
- df = DataFrame ([[1 , 2 , 3 ], [4 , 5 , 6 ]], columns = ["a" , "a" , "b" ])
190
+ df = self . klass ([[1 , 2 , 3 ], [4 , 5 , 6 ]], columns = ["a" , "a" , "b" ])
167
191
for k , v in df .items ():
168
- assert isinstance (v , DataFrame ._constructor_sliced )
192
+ assert isinstance (v , self . klass ._constructor_sliced )
169
193
170
194
def test_items (self ):
171
195
# GH 17213, GH 13918
@@ -182,23 +206,23 @@ def test_iter(self, float_frame):
182
206
def test_iterrows (self , float_frame , float_string_frame ):
183
207
for k , v in float_frame .iterrows ():
184
208
exp = float_frame .loc [k ]
185
- tm . assert_series_equal (v , exp )
209
+ self . _assert_series_equal (v , exp )
186
210
187
211
for k , v in float_string_frame .iterrows ():
188
212
exp = float_string_frame .loc [k ]
189
- tm . assert_series_equal (v , exp )
213
+ self . _assert_series_equal (v , exp )
190
214
191
215
def test_iterrows_iso8601 (self ):
192
216
# GH 19671
193
- s = DataFrame (
217
+ s = self . klass (
194
218
{
195
219
"non_iso8601" : ["M1701" , "M1802" , "M1903" , "M2004" ],
196
220
"iso8601" : date_range ("2000-01-01" , periods = 4 , freq = "M" ),
197
221
}
198
222
)
199
223
for k , v in s .iterrows ():
200
224
exp = s .loc [k ]
201
- tm . assert_series_equal (v , exp )
225
+ self . _assert_series_equal (v , exp )
202
226
203
227
def test_iterrows_corner (self ):
204
228
# gh-12222
@@ -224,19 +248,19 @@ def test_iterrows_corner(self):
224
248
225
249
def test_itertuples (self , float_frame ):
226
250
for i , tup in enumerate (float_frame .itertuples ()):
227
- s = DataFrame ._constructor_sliced (tup [1 :])
251
+ s = self . klass ._constructor_sliced (tup [1 :])
228
252
s .name = tup [0 ]
229
253
expected = float_frame .iloc [i , :].reset_index (drop = True )
230
- tm . assert_series_equal (s , expected )
254
+ self . _assert_series_equal (s , expected )
231
255
232
- df = DataFrame (
256
+ df = self . klass (
233
257
{"floats" : np .random .randn (5 ), "ints" : range (5 )}, columns = ["floats" , "ints" ]
234
258
)
235
259
236
260
for tup in df .itertuples (index = False ):
237
261
assert isinstance (tup [1 ], int )
238
262
239
- df = DataFrame (data = {"a" : [1 , 2 , 3 ], "b" : [4 , 5 , 6 ]})
263
+ df = self . klass (data = {"a" : [1 , 2 , 3 ], "b" : [4 , 5 , 6 ]})
240
264
dfaa = df [["a" , "a" ]]
241
265
242
266
assert list (dfaa .itertuples ()) == [(0 , 1 , 1 ), (1 , 2 , 2 ), (2 , 3 , 3 )]
@@ -291,7 +315,7 @@ def test_sequence_like_with_categorical(self):
291
315
def test_len (self , float_frame ):
292
316
assert len (float_frame ) == len (float_frame .index )
293
317
294
- def test_values_mixed_dtypes (self , float_frame , float_string_frame ):
318
+ def test_values (self , float_frame , float_string_frame ):
295
319
frame = float_frame
296
320
arr = frame .values
297
321
@@ -308,7 +332,7 @@ def test_values_mixed_dtypes(self, float_frame, float_string_frame):
308
332
arr = float_string_frame [["foo" , "A" ]].values
309
333
assert arr [0 , 0 ] == "bar"
310
334
311
- df = DataFrame ({"complex" : [1j , 2j , 3j ], "real" : [1 , 2 , 3 ]})
335
+ df = self . klass ({"complex" : [1j , 2j , 3j ], "real" : [1 , 2 , 3 ]})
312
336
arr = df .values
313
337
assert arr [0 , 0 ] == 1j
314
338
@@ -348,17 +372,17 @@ def test_transpose(self, float_frame):
348
372
349
373
# mixed type
350
374
index , data = tm .getMixedTypeDict ()
351
- mixed = DataFrame (data , index = index )
375
+ mixed = self . klass (data , index = index )
352
376
353
377
mixed_T = mixed .T
354
378
for col , s in mixed_T .items ():
355
379
assert s .dtype == np .object_
356
380
357
381
def test_swapaxes (self ):
358
- df = DataFrame (np .random .randn (10 , 5 ))
359
- tm . assert_frame_equal (df .T , df .swapaxes (0 , 1 ))
360
- tm . assert_frame_equal (df .T , df .swapaxes (1 , 0 ))
361
- tm . assert_frame_equal (df , df .swapaxes (0 , 0 ))
382
+ df = self . klass (np .random .randn (10 , 5 ))
383
+ self . _assert_frame_equal (df .T , df .swapaxes (0 , 1 ))
384
+ self . _assert_frame_equal (df .T , df .swapaxes (1 , 0 ))
385
+ self . _assert_frame_equal (df , df .swapaxes (0 , 0 ))
362
386
msg = (
363
387
"No axis named 2 for object type"
364
388
r" <class 'pandas.core(.sparse)?.frame.(Sparse)?DataFrame'>"
@@ -389,7 +413,7 @@ def test_more_values(self, float_string_frame):
389
413
assert values .shape [1 ] == len (float_string_frame .columns )
390
414
391
415
def test_repr_with_mi_nat (self , float_string_frame ):
392
- df = DataFrame (
416
+ df = self . klass (
393
417
{"X" : [1 , 2 ]}, index = [[pd .NaT , pd .Timestamp ("20130101" )], ["a" , "b" ]]
394
418
)
395
419
result = repr (df )
@@ -406,26 +430,26 @@ def test_series_put_names(self, float_string_frame):
406
430
assert v .name == k
407
431
408
432
def test_empty_nonzero (self ):
409
- df = DataFrame ([1 , 2 , 3 ])
433
+ df = self . klass ([1 , 2 , 3 ])
410
434
assert not df .empty
411
- df = DataFrame (index = [1 ], columns = [1 ])
435
+ df = self . klass (index = [1 ], columns = [1 ])
412
436
assert not df .empty
413
- df = DataFrame (index = ["a" , "b" ], columns = ["c" , "d" ]).dropna ()
437
+ df = self . klass (index = ["a" , "b" ], columns = ["c" , "d" ]).dropna ()
414
438
assert df .empty
415
439
assert df .T .empty
416
440
empty_frames = [
417
- DataFrame (),
418
- DataFrame (index = [1 ]),
419
- DataFrame (columns = [1 ]),
420
- DataFrame ({1 : []}),
441
+ self . klass (),
442
+ self . klass (index = [1 ]),
443
+ self . klass (columns = [1 ]),
444
+ self . klass ({1 : []}),
421
445
]
422
446
for df in empty_frames :
423
447
assert df .empty
424
448
assert df .T .empty
425
449
426
450
def test_with_datetimelikes (self ):
427
451
428
- df = DataFrame (
452
+ df = self . klass (
429
453
{
430
454
"A" : date_range ("20130101" , periods = 10 ),
431
455
"B" : timedelta_range ("1 day" , periods = 10 ),
@@ -434,9 +458,20 @@ def test_with_datetimelikes(self):
434
458
t = df .T
435
459
436
460
result = t .dtypes .value_counts ()
437
- expected = Series ({np .dtype ("object" ): 10 })
461
+ if self .klass is DataFrame :
462
+ expected = Series ({np .dtype ("object" ): 10 })
463
+ else :
464
+ expected = Series ({SparseDtype (dtype = object ): 10 })
438
465
tm .assert_series_equal (result , expected )
439
466
467
+
468
+ class TestDataFrameMisc (SharedWithSparse ):
469
+
470
+ klass = DataFrame
471
+ # SharedWithSparse tests use generic, klass-agnostic assertion
472
+ _assert_frame_equal = staticmethod (tm .assert_frame_equal )
473
+ _assert_series_equal = staticmethod (tm .assert_series_equal )
474
+
440
475
def test_values (self , float_frame ):
441
476
float_frame .values [:, 0 ] = 5.0
442
477
assert (float_frame .values [:, 0 ] == 5 ).all ()
0 commit comments