Skip to content

Commit c0b23e2

Browse files
committed
BUG: Enable stata files to be written to buffers
Enable support for general file-like objects when exporting stata files closes #21041
1 parent 90c2237 commit c0b23e2

File tree

4 files changed

+55
-14
lines changed

4 files changed

+55
-14
lines changed

doc/source/whatsnew/v0.23.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ Indexing
8181
I/O
8282
^^^
8383

84-
-
84+
- Bug in :math:`DataFrame.to_stata` which prevented buffers from being directly written to (:issue:`21041`)
8585
-
8686

8787
Plotting

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1774,8 +1774,11 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
17741774
17751775
Parameters
17761776
----------
1777-
fname : str or buffer
1778-
String path of file-like object.
1777+
fname : path (string), buffer or path object
1778+
string, path object (pathlib.Path or py._path.local.LocalPath) or
1779+
object implementing a binary write() functions. If using a buffer
1780+
then the buffer will not be automatically closed after the file
1781+
data has been written.
17791782
convert_dates : dict
17801783
Dictionary mapping columns containing datetime types to stata
17811784
internal format to use when writing the dates. Options are 'tc',

pandas/io/stata.py

+34-11
Original file line numberDiff line numberDiff line change
@@ -1758,11 +1758,25 @@ def value_labels(self):
17581758
return self.value_label_dict
17591759

17601760

1761-
def _open_file_binary_write(fname, encoding):
1761+
def _open_file_binary_write(fname):
1762+
"""
1763+
Open a binary file or no-op if file-like
1764+
1765+
Parameters
1766+
----------
1767+
fname : string path, path object or buffer
1768+
1769+
Returns
1770+
-------
1771+
file : file-like object
1772+
File object supporting write
1773+
own : bool
1774+
True if the file was created, otherwise False
1775+
"""
17621776
if hasattr(fname, 'write'):
17631777
# if 'b' not in fname.mode:
1764-
return fname
1765-
return open(fname, "wb")
1778+
return fname, False
1779+
return open(fname, "wb"), True
17661780

17671781

17681782
def _set_endianness(endianness):
@@ -1899,7 +1913,9 @@ class StataWriter(StataParser):
18991913
----------
19001914
fname : path (string), buffer or path object
19011915
string, path object (pathlib.Path or py._path.local.LocalPath) or
1902-
object implementing a binary write() functions.
1916+
object implementing a binary write() functions. If using a buffer
1917+
then the buffer will not be automatically closed after the file
1918+
is written.
19031919
19041920
.. versionadded:: 0.23.0 support for pathlib, py.path.
19051921
@@ -1970,6 +1986,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
19701986
self._time_stamp = time_stamp
19711987
self._data_label = data_label
19721988
self._variable_labels = variable_labels
1989+
self._own_file = True
19731990
# attach nobs, nvars, data, varlist, typlist
19741991
self._prepare_pandas(data)
19751992

@@ -2183,9 +2200,7 @@ def _prepare_pandas(self, data):
21832200
self.fmtlist[key] = self._convert_dates[key]
21842201

21852202
def write_file(self):
2186-
self._file = _open_file_binary_write(
2187-
self._fname, self._encoding or self._default_encoding
2188-
)
2203+
self._file, self._own_file = _open_file_binary_write(self._fname)
21892204
try:
21902205
self._write_header(time_stamp=self._time_stamp,
21912206
data_label=self._data_label)
@@ -2205,7 +2220,13 @@ def write_file(self):
22052220
self._write_file_close_tag()
22062221
self._write_map()
22072222
finally:
2208-
self._file.close()
2223+
try:
2224+
# Some file-like objects might not support flush
2225+
self._file.flush()
2226+
except AttributeError:
2227+
pass
2228+
if self._own_file:
2229+
self._file.close()
22092230

22102231
def _write_map(self):
22112232
"""No-op, future compatibility"""
@@ -2374,7 +2395,7 @@ def _prepare_data(self):
23742395

23752396
def _write_data(self):
23762397
data = self.data
2377-
data.tofile(self._file)
2398+
self._file.write(data.tobytes())
23782399

23792400
def _null_terminate(self, s, as_string=False):
23802401
null_byte = '\x00'
@@ -2641,7 +2662,9 @@ class StataWriter117(StataWriter):
26412662
----------
26422663
fname : path (string), buffer or path object
26432664
string, path object (pathlib.Path or py._path.local.LocalPath) or
2644-
object implementing a binary write() functions.
2665+
object implementing a binary write() functions. If using a buffer
2666+
then the buffer will not be automatically closed after the file
2667+
is written.
26452668
data : DataFrame
26462669
Input to save
26472670
convert_dates : dict
@@ -2879,7 +2902,7 @@ def _write_data(self):
28792902
self._update_map('data')
28802903
data = self.data
28812904
self._file.write(b'<data>')
2882-
data.tofile(self._file)
2905+
self._file.write(data.tobytes())
28832906
self._file.write(b'</data>')
28842907

28852908
def _write_strls(self):

pandas/tests/io/test_stata.py

+15
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=E1101
33

44
import datetime as dt
5+
import io
56
import os
67
import struct
78
import warnings
@@ -1473,3 +1474,17 @@ def test_invalid_date_conversion(self):
14731474
with pytest.raises(ValueError):
14741475
original.to_stata(path,
14751476
convert_dates={'wrong_name': 'tc'})
1477+
1478+
@pytest.mark.parametrize('version', [114, 117])
1479+
def test_nonfile_writing(self, version):
1480+
# GH 21041
1481+
bio = io.BytesIO()
1482+
df = tm.makeDataFrame()
1483+
df.index.name = 'index'
1484+
with tm.ensure_clean() as path:
1485+
df.to_stata(bio, version=version)
1486+
bio.seek(0)
1487+
with open(path, 'wb') as dta:
1488+
dta.write(bio.read())
1489+
reread = pd.read_stata(path, index_col='index')
1490+
tm.assert_frame_equal(df, reread)

0 commit comments

Comments
 (0)