1
1
from io import StringIO
2
2
import re
3
+ from string import ascii_uppercase as uppercase
3
4
import sys
4
5
import textwrap
5
6
8
9
9
10
from pandas .compat import PYPY
10
11
11
- import pandas as pd
12
+ from pandas import (
13
+ CategoricalIndex ,
14
+ DataFrame ,
15
+ MultiIndex ,
16
+ Series ,
17
+ date_range ,
18
+ option_context ,
19
+ reset_option ,
20
+ set_option ,
21
+ )
22
+ import pandas ._testing as tm
23
+
24
+
25
+ @pytest .fixture
26
+ def datetime_frame ():
27
+ """
28
+ Fixture for DataFrame of floats with DatetimeIndex
29
+
30
+ Columns are ['A', 'B', 'C', 'D']
31
+
32
+ A B C D
33
+ 2000-01-03 -1.122153 0.468535 0.122226 1.693711
34
+ 2000-01-04 0.189378 0.486100 0.007864 -1.216052
35
+ 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
36
+ 2000-01-06 0.430050 0.894352 0.090719 0.036939
37
+ 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
38
+ 2000-01-10 -0.752633 0.328434 -0.815325 0.699674
39
+ 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
40
+ ... ... ... ... ...
41
+ 2000-02-03 1.642618 -0.579288 0.046005 1.385249
42
+ 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
43
+ 2000-02-07 -2.656149 -0.601387 1.410148 0.444150
44
+ 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
45
+ 2000-02-09 1.377373 0.398619 1.008453 -0.928207
46
+ 2000-02-10 0.473194 -0.636677 0.984058 0.511519
47
+ 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948
48
+
49
+ [30 rows x 4 columns]
50
+ """
51
+ return DataFrame (tm .getTimeSeriesData ())
12
52
13
53
14
54
def test_info_categorical_column ():
15
55
16
56
# make sure it works
17
57
n = 2500
18
- df = pd . DataFrame ({"int64" : np .random .randint (100 , size = n )})
19
- df ["category" ] = pd . Series (
58
+ df = DataFrame ({"int64" : np .random .randint (100 , size = n )})
59
+ df ["category" ] = Series (
20
60
np .array (list ("abcdefghij" )).take (np .random .randint (0 , 10 , size = n ))
21
61
).astype ("category" )
22
62
df .isna ()
@@ -33,7 +73,7 @@ def test_info(float_frame, datetime_frame):
33
73
float_frame .info (buf = io )
34
74
datetime_frame .info (buf = io )
35
75
36
- frame = pd . DataFrame (np .random .randn (5 , 3 ))
76
+ frame = DataFrame (np .random .randn (5 , 3 ))
37
77
38
78
frame .info ()
39
79
frame .info (verbose = False )
@@ -43,7 +83,7 @@ def test_info_verbose():
43
83
buf = StringIO ()
44
84
size = 1001
45
85
start = 5
46
- frame = pd . DataFrame (np .random .randn (3 , size ))
86
+ frame = DataFrame (np .random .randn (3 , size ))
47
87
frame .info (verbose = True , buf = buf )
48
88
49
89
res = buf .getvalue ()
@@ -63,7 +103,7 @@ def test_info_verbose():
63
103
64
104
def test_info_memory ():
65
105
# https://github.com/pandas-dev/pandas/issues/21056
66
- df = pd . DataFrame ({"a" : pd . Series ([1 , 2 ], dtype = "i8" )})
106
+ df = DataFrame ({"a" : Series ([1 , 2 ], dtype = "i8" )})
67
107
buf = StringIO ()
68
108
df .info (buf = buf )
69
109
result = buf .getvalue ()
@@ -84,10 +124,8 @@ def test_info_memory():
84
124
85
125
86
126
def test_info_wide ():
87
- from pandas import set_option , reset_option
88
-
89
127
io = StringIO ()
90
- df = pd . DataFrame (np .random .randn (5 , 101 ))
128
+ df = DataFrame (np .random .randn (5 , 101 ))
91
129
df .info (buf = io )
92
130
93
131
io = StringIO ()
@@ -107,15 +145,15 @@ def test_info_duplicate_columns():
107
145
io = StringIO ()
108
146
109
147
# it works!
110
- frame = pd . DataFrame (np .random .randn (1500 , 4 ), columns = ["a" , "a" , "b" , "b" ])
148
+ frame = DataFrame (np .random .randn (1500 , 4 ), columns = ["a" , "a" , "b" , "b" ])
111
149
frame .info (buf = io )
112
150
113
151
114
152
def test_info_duplicate_columns_shows_correct_dtypes ():
115
153
# GH11761
116
154
io = StringIO ()
117
155
118
- frame = pd . DataFrame ([[1 , 2.0 ]], columns = ["a" , "a" ])
156
+ frame = DataFrame ([[1 , 2.0 ]], columns = ["a" , "a" ])
119
157
frame .info (buf = io )
120
158
io .seek (0 )
121
159
lines = io .readlines ()
@@ -137,7 +175,7 @@ def test_info_shows_column_dtypes():
137
175
n = 10
138
176
for i , dtype in enumerate (dtypes ):
139
177
data [i ] = np .random .randint (2 , size = n ).astype (dtype )
140
- df = pd . DataFrame (data )
178
+ df = DataFrame (data )
141
179
buf = StringIO ()
142
180
df .info (buf = buf )
143
181
res = buf .getvalue ()
@@ -152,10 +190,10 @@ def test_info_shows_column_dtypes():
152
190
153
191
154
192
def test_info_max_cols ():
155
- df = pd . DataFrame (np .random .randn (10 , 5 ))
193
+ df = DataFrame (np .random .randn (10 , 5 ))
156
194
for len_ , verbose in [(5 , None ), (5 , False ), (12 , True )]:
157
195
# For verbose always ^ setting ^ summarize ^ full output
158
- with pd . option_context ("max_info_columns" , 4 ):
196
+ with option_context ("max_info_columns" , 4 ):
159
197
buf = StringIO ()
160
198
df .info (buf = buf , verbose = verbose )
161
199
res = buf .getvalue ()
@@ -164,22 +202,22 @@ def test_info_max_cols():
164
202
for len_ , verbose in [(12 , None ), (5 , False ), (12 , True )]:
165
203
166
204
# max_cols not exceeded
167
- with pd . option_context ("max_info_columns" , 5 ):
205
+ with option_context ("max_info_columns" , 5 ):
168
206
buf = StringIO ()
169
207
df .info (buf = buf , verbose = verbose )
170
208
res = buf .getvalue ()
171
209
assert len (res .strip ().split ("\n " )) == len_
172
210
173
211
for len_ , max_cols in [(12 , 5 ), (5 , 4 )]:
174
212
# setting truncates
175
- with pd . option_context ("max_info_columns" , 4 ):
213
+ with option_context ("max_info_columns" , 4 ):
176
214
buf = StringIO ()
177
215
df .info (buf = buf , max_cols = max_cols )
178
216
res = buf .getvalue ()
179
217
assert len (res .strip ().split ("\n " )) == len_
180
218
181
219
# setting wouldn't truncate
182
- with pd . option_context ("max_info_columns" , 5 ):
220
+ with option_context ("max_info_columns" , 5 ):
183
221
buf = StringIO ()
184
222
df .info (buf = buf , max_cols = max_cols )
185
223
res = buf .getvalue ()
@@ -201,7 +239,7 @@ def test_info_memory_usage():
201
239
n = 10
202
240
for i , dtype in enumerate (dtypes ):
203
241
data [i ] = np .random .randint (2 , size = n ).astype (dtype )
204
- df = pd . DataFrame (data )
242
+ df = DataFrame (data )
205
243
buf = StringIO ()
206
244
207
245
# display memory usage case
@@ -232,10 +270,10 @@ def test_info_memory_usage():
232
270
n = 100
233
271
for i , dtype in enumerate (dtypes ):
234
272
data [i ] = np .random .randint (2 , size = n ).astype (dtype )
235
- df = pd . DataFrame (data )
273
+ df = DataFrame (data )
236
274
df .columns = dtypes
237
275
238
- df_with_object_index = pd . DataFrame ({"a" : [1 ]}, index = ["foo" ])
276
+ df_with_object_index = DataFrame ({"a" : [1 ]}, index = ["foo" ])
239
277
df_with_object_index .info (buf = buf , memory_usage = True )
240
278
res = buf .getvalue ().splitlines ()
241
279
assert re .match (r"memory usage: [^+]+\+" , res [- 1 ])
@@ -258,10 +296,10 @@ def test_info_memory_usage():
258
296
assert df .memory_usage ().sum () == df .memory_usage (deep = True ).sum ()
259
297
260
298
# test for validity
261
- pd . DataFrame (1 , index = ["a" ], columns = ["A" ]).memory_usage (index = True )
262
- pd . DataFrame (1 , index = ["a" ], columns = ["A" ]).index .nbytes
263
- df = pd . DataFrame (
264
- data = 1 , index = pd . MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
299
+ DataFrame (1 , index = ["a" ], columns = ["A" ]).memory_usage (index = True )
300
+ DataFrame (1 , index = ["a" ], columns = ["A" ]).index .nbytes
301
+ df = DataFrame (
302
+ data = 1 , index = MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
265
303
)
266
304
df .index .nbytes
267
305
df .memory_usage (index = True )
@@ -273,32 +311,32 @@ def test_info_memory_usage():
273
311
274
312
@pytest .mark .skipif (PYPY , reason = "on PyPy deep=True doesn't change result" )
275
313
def test_info_memory_usage_deep_not_pypy ():
276
- df_with_object_index = pd . DataFrame ({"a" : [1 ]}, index = ["foo" ])
314
+ df_with_object_index = DataFrame ({"a" : [1 ]}, index = ["foo" ])
277
315
assert (
278
316
df_with_object_index .memory_usage (index = True , deep = True ).sum ()
279
317
> df_with_object_index .memory_usage (index = True ).sum ()
280
318
)
281
319
282
- df_object = pd . DataFrame ({"a" : ["a" ]})
320
+ df_object = DataFrame ({"a" : ["a" ]})
283
321
assert df_object .memory_usage (deep = True ).sum () > df_object .memory_usage ().sum ()
284
322
285
323
286
324
@pytest .mark .skipif (not PYPY , reason = "on PyPy deep=True does not change result" )
287
325
def test_info_memory_usage_deep_pypy ():
288
- df_with_object_index = pd . DataFrame ({"a" : [1 ]}, index = ["foo" ])
326
+ df_with_object_index = DataFrame ({"a" : [1 ]}, index = ["foo" ])
289
327
assert (
290
328
df_with_object_index .memory_usage (index = True , deep = True ).sum ()
291
329
== df_with_object_index .memory_usage (index = True ).sum ()
292
330
)
293
331
294
- df_object = pd . DataFrame ({"a" : ["a" ]})
332
+ df_object = DataFrame ({"a" : ["a" ]})
295
333
assert df_object .memory_usage (deep = True ).sum () == df_object .memory_usage ().sum ()
296
334
297
335
298
336
@pytest .mark .skipif (PYPY , reason = "PyPy getsizeof() fails by design" )
299
337
def test_usage_via_getsizeof ():
300
- df = pd . DataFrame (
301
- data = 1 , index = pd . MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
338
+ df = DataFrame (
339
+ data = 1 , index = MultiIndex .from_product ([["a" ], range (1000 )]), columns = ["A" ],
302
340
)
303
341
mem = df .memory_usage (deep = True ).sum ()
304
342
# sys.getsizeof will call the .memory_usage with
@@ -310,27 +348,27 @@ def test_usage_via_getsizeof():
310
348
def test_info_memory_usage_qualified ():
311
349
312
350
buf = StringIO ()
313
- df = pd . DataFrame (1 , columns = list ("ab" ), index = [1 , 2 , 3 ])
351
+ df = DataFrame (1 , columns = list ("ab" ), index = [1 , 2 , 3 ])
314
352
df .info (buf = buf )
315
353
assert "+" not in buf .getvalue ()
316
354
317
355
buf = StringIO ()
318
- df = pd . DataFrame (1 , columns = list ("ab" ), index = list ("ABC" ))
356
+ df = DataFrame (1 , columns = list ("ab" ), index = list ("ABC" ))
319
357
df .info (buf = buf )
320
358
assert "+" in buf .getvalue ()
321
359
322
360
buf = StringIO ()
323
- df = pd . DataFrame (
324
- 1 , columns = list ("ab" ), index = pd . MultiIndex .from_product ([range (3 ), range (3 )]),
361
+ df = DataFrame (
362
+ 1 , columns = list ("ab" ), index = MultiIndex .from_product ([range (3 ), range (3 )]),
325
363
)
326
364
df .info (buf = buf )
327
365
assert "+" not in buf .getvalue ()
328
366
329
367
buf = StringIO ()
330
- df = pd . DataFrame (
368
+ df = DataFrame (
331
369
1 ,
332
370
columns = list ("ab" ),
333
- index = pd . MultiIndex .from_product ([range (3 ), ["foo" , "bar" ]]),
371
+ index = MultiIndex .from_product ([range (3 ), ["foo" , "bar" ]]),
334
372
)
335
373
df .info (buf = buf )
336
374
assert "+" in buf .getvalue ()
@@ -340,17 +378,15 @@ def test_info_memory_usage_bug_on_multiindex():
340
378
# GH 14308
341
379
# memory usage introspection should not materialize .values
342
380
343
- from string import ascii_uppercase as uppercase
344
-
345
381
def memory_usage (f ):
346
382
return f .memory_usage (deep = True ).sum ()
347
383
348
384
N = 100
349
385
M = len (uppercase )
350
- index = pd . MultiIndex .from_product (
351
- [list (uppercase ), pd . date_range ("20160101" , periods = N )], names = ["id" , "date" ],
386
+ index = MultiIndex .from_product (
387
+ [list (uppercase ), date_range ("20160101" , periods = N )], names = ["id" , "date" ],
352
388
)
353
- df = pd . DataFrame ({"value" : np .random .randn (N * M )}, index = index )
389
+ df = DataFrame ({"value" : np .random .randn (N * M )}, index = index )
354
390
355
391
unstacked = df .unstack ("id" )
356
392
assert df .values .nbytes == unstacked .values .nbytes
@@ -362,8 +398,8 @@ def memory_usage(f):
362
398
363
399
def test_info_categorical ():
364
400
# GH14298
365
- idx = pd . CategoricalIndex (["a" , "b" ])
366
- df = pd . DataFrame (np .zeros ((2 , 2 )), index = idx , columns = idx )
401
+ idx = CategoricalIndex (["a" , "b" ])
402
+ df = DataFrame (np .zeros ((2 , 2 )), index = idx , columns = idx )
367
403
368
404
buf = StringIO ()
369
405
df .info (buf = buf )
0 commit comments