Skip to content

Commit 1532991

Browse files
committed
REGR: be able to read Stata files without reading them fully into memory
Fixes pandas-dev#48700 Regressed in pandas-dev#9245 Regressed in 2f0ada3
1 parent e43d75e commit 1532991

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

doc/source/whatsnew/v1.5.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ Fixed regressions
8383
- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
8484
- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
8585
- Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`)
86-
-
86+
- Fixed regression in Stata reading where files were needlessly buffered in memory (:issue:`48922`)
8787

8888
.. ---------------------------------------------------------------------------
8989

pandas/io/stata.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -1164,15 +1164,23 @@ def __init__(
11641164
self._lines_read = 0
11651165

11661166
self._native_byteorder = _set_endianness(sys.byteorder)
1167-
with get_handle(
1167+
1168+
handles = get_handle(
11681169
path_or_buf,
11691170
"rb",
11701171
storage_options=storage_options,
11711172
is_text=False,
11721173
compression=compression,
1173-
) as handles:
1174-
# Copy to BytesIO, and ensure no encoding
1175-
self.path_or_buf = BytesIO(handles.handle.read())
1174+
)
1175+
if hasattr(handles.handle, "seekable") and handles.handle.seekable():
1176+
# If the handle is directly seekable, use it without an extra copy.
1177+
self.path_or_buf = handles.handle
1178+
self._close_file = handles.close
1179+
else:
1180+
# Copy to memory, and ensure no encoding.
1181+
with handles:
1182+
self.path_or_buf = BytesIO(handles.handle.read())
1183+
self._close_file = self.path_or_buf.close
11761184

11771185
self._read_header()
11781186
self._setup_dtype()
@@ -1192,7 +1200,7 @@ def __exit__(
11921200

11931201
def close(self) -> None:
11941202
"""close the handle if its open"""
1195-
self.path_or_buf.close()
1203+
self._close_file()
11961204

11971205
def _set_encoding(self) -> None:
11981206
"""

0 commit comments

Comments
 (0)