@@ -77,16 +77,17 @@ def test_readjson_chunks(lines_json_df, chunksize):
77
77
# GH17048: memory usage when lines=True
78
78
79
79
unchunked = read_json (StringIO (lines_json_df ), lines = True )
80
- reader = read_json (StringIO (lines_json_df ), lines = True , chunksize = chunksize )
81
- chunked = pd .concat (reader )
80
+ with read_json (StringIO (lines_json_df ), lines = True , chunksize = chunksize ) as reader :
81
+ chunked = pd .concat (reader )
82
82
83
83
tm .assert_frame_equal (chunked , unchunked )
84
84
85
85
86
86
def test_readjson_chunksize_requires_lines (lines_json_df ):
87
87
msg = "chunksize can only be passed if lines=True"
88
88
with pytest .raises (ValueError , match = msg ):
89
- pd .read_json (StringIO (lines_json_df ), lines = False , chunksize = 2 )
89
+ with pd .read_json (StringIO (lines_json_df ), lines = False , chunksize = 2 ) as _ :
90
+ pass
90
91
91
92
92
93
def test_readjson_chunks_series ():
@@ -97,15 +98,17 @@ def test_readjson_chunks_series():
97
98
unchunked = pd .read_json (strio , lines = True , typ = "Series" )
98
99
99
100
strio = StringIO (s .to_json (lines = True , orient = "records" ))
100
- chunked = pd .concat (pd .read_json (strio , lines = True , typ = "Series" , chunksize = 1 ))
101
+ with pd .read_json (strio , lines = True , typ = "Series" , chunksize = 1 ) as reader :
102
+ chunked = pd .concat (reader )
101
103
102
104
tm .assert_series_equal (chunked , unchunked )
103
105
104
106
105
107
def test_readjson_each_chunk (lines_json_df ):
106
108
# Other tests check that the final result of read_json(chunksize=True)
107
109
# is correct. This checks the intermediate chunks.
108
- chunks = list (pd .read_json (StringIO (lines_json_df ), lines = True , chunksize = 2 ))
110
+ with pd .read_json (StringIO (lines_json_df ), lines = True , chunksize = 2 ) as reader :
111
+ chunks = list (reader )
109
112
assert chunks [0 ].shape == (2 , 2 )
110
113
assert chunks [1 ].shape == (1 , 2 )
111
114
@@ -114,7 +117,8 @@ def test_readjson_chunks_from_file():
114
117
with tm .ensure_clean ("test.json" ) as path :
115
118
df = DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ]})
116
119
df .to_json (path , lines = True , orient = "records" )
117
- chunked = pd .concat (pd .read_json (path , lines = True , chunksize = 1 ))
120
+ with pd .read_json (path , lines = True , chunksize = 1 ) as reader :
121
+ chunked = pd .concat (reader )
118
122
unchunked = pd .read_json (path , lines = True )
119
123
tm .assert_frame_equal (unchunked , chunked )
120
124
@@ -141,7 +145,8 @@ def test_readjson_chunks_closes(chunksize):
141
145
compression = None ,
142
146
nrows = None ,
143
147
)
144
- reader .read ()
148
+ with reader :
149
+ reader .read ()
145
150
assert (
146
151
reader .handles .handle .closed
147
152
), f"didn't close stream with chunksize = { chunksize } "
@@ -152,7 +157,10 @@ def test_readjson_invalid_chunksize(lines_json_df, chunksize):
152
157
msg = r"'chunksize' must be an integer >=1"
153
158
154
159
with pytest .raises (ValueError , match = msg ):
155
- pd .read_json (StringIO (lines_json_df ), lines = True , chunksize = chunksize )
160
+ with pd .read_json (
161
+ StringIO (lines_json_df ), lines = True , chunksize = chunksize
162
+ ) as _ :
163
+ pass
156
164
157
165
158
166
@pytest .mark .parametrize ("chunksize" , [None , 1 , 2 ])
@@ -176,7 +184,8 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
176
184
orig = DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ]})
177
185
test = pd .read_json (j , lines = True , chunksize = chunksize )
178
186
if chunksize is not None :
179
- test = pd .concat (test )
187
+ with test :
188
+ test = pd .concat (test )
180
189
tm .assert_frame_equal (orig , test , obj = f"chunksize: { chunksize } " )
181
190
182
191
@@ -212,8 +221,8 @@ def test_readjson_nrows_chunks(nrows, chunksize):
212
221
{"a": 3, "b": 4}
213
222
{"a": 5, "b": 6}
214
223
{"a": 7, "b": 8}"""
215
- reader = read_json (jsonl , lines = True , nrows = nrows , chunksize = chunksize )
216
- chunked = pd .concat (reader )
224
+ with read_json (jsonl , lines = True , nrows = nrows , chunksize = chunksize ) as reader :
225
+ chunked = pd .concat (reader )
217
226
expected = DataFrame ({"a" : [1 , 3 , 5 , 7 ], "b" : [2 , 4 , 6 , 8 ]}).iloc [:nrows ]
218
227
tm .assert_frame_equal (chunked , expected )
219
228
@@ -240,6 +249,6 @@ def test_readjson_lines_chunks_fileurl(datapath):
240
249
]
241
250
os_path = datapath ("io" , "json" , "data" , "line_delimited.json" )
242
251
file_url = Path (os_path ).as_uri ()
243
- url_reader = pd .read_json (file_url , lines = True , chunksize = 1 )
244
- for index , chuck in enumerate (url_reader ):
245
- tm .assert_frame_equal (chuck , df_list_expected [index ])
252
+ with pd .read_json (file_url , lines = True , chunksize = 1 ) as url_reader :
253
+ for index , chuck in enumerate (url_reader ):
254
+ tm .assert_frame_equal (chuck , df_list_expected [index ])
0 commit comments