Skip to content

Commit 4618e70

Browse files
committed
Merge pull request #3935 from PKEuS/master
FIX: StataReader
2 parents c6a9f2e + 8402674 commit 4618e70

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

pandas/io/stata.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ def _null_terminate(self, s):
407407

408408
def _next(self):
409409
typlist = self.typlist
410-
if self._has_string_data:
410+
if self.has_string_data:
411411
data = [None] * self.nvar
412412
for i in range(len(data)):
413413
if type(typlist[i]) is int:
@@ -523,7 +523,8 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None):
523523
for i in cols_:
524524
if self.dtyplist[i] is not None:
525525
col = data.columns[i]
526-
data[col] = Series(data[col], data[col].index, self.dtyplist[i])
526+
if data[col].dtype is not np.dtype(object):
527+
data[col] = Series(data[col], data[col].index, self.dtyplist[i])
527528

528529
if convert_dates:
529530
cols = np.where(map(lambda x: x in _date_formats, self.fmtlist))[0]
@@ -856,7 +857,7 @@ def _write_data_nodates(self):
856857
typ = ord(typlist[i])
857858
if typ <= 244: # we've got a string
858859
if len(var) < typ:
859-
var = _pad_bytes(self._decode_bytes(var), len(var) + 1)
860+
var = _pad_bytes(var, typ)
860861
self._write(var)
861862
else:
862863
try:
@@ -884,15 +885,13 @@ def _write_data_dates(self):
884885
if i in convert_dates:
885886
var = _datetime_to_stata_elapsed(var, self.fmtlist[i])
886887
if typ <= 244: # we've got a string
887-
if isnull(var):
888-
var = "" # missing string
889888
if len(var) < typ:
890-
var = _pad_bytes(var, len(var) + 1)
889+
var = _pad_bytes(var, typ)
891890
self._write(var)
892891
else:
893892
if isnull(var): # this only matters for floats
894893
var = MISSING_VALUES[typ]
895-
self._write(struct.pack(byteorder+TYPE_MAP[typ], var))
894+
self._file.write(struct.pack(byteorder+TYPE_MAP[typ], var))
896895

897896
def _null_terminate(self, s, as_string=False):
898897
null_byte = '\x00'

pandas/io/tests/test_stata.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@
33
from datetime import datetime
44
import os
55
import unittest
6-
import sys
76
import warnings
87
import nose
98

109
import numpy as np
1110

12-
from pandas.core.frame import DataFrame
11+
from pandas.core.frame import DataFrame, Series
1312
from pandas.io.parsers import read_csv
1413
from pandas.io.stata import read_stata, StataReader, StataWriter
1514
import pandas.util.testing as tm
1615
from pandas.util.testing import ensure_clean
1716
from pandas.util.misc import is_little_endian
1817

18+
1919
class StataTests(unittest.TestCase):
2020

2121
def setUp(self):
@@ -35,6 +35,7 @@ def setUp(self):
3535
self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
3636
self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
3737
self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
38+
self.dta10 = os.path.join(self.dirpath, 'stata10.dta')
3839

3940
def read_dta(self, file):
4041
return read_stata(file, convert_dates=True)
@@ -189,9 +190,24 @@ def test_read_dta9(self):
189190
decimal=3
190191
)
191192

193+
def test_read_dta10(self):
194+
original = DataFrame(
195+
data=
196+
[
197+
["string", "object", 1, 1.1, np.datetime64('2003-12-25')]
198+
],
199+
columns=['string', 'object', 'integer', 'float', 'datetime'])
200+
original["object"] = Series(original["object"], dtype=object)
201+
original.index.name = 'index'
202+
203+
with ensure_clean(self.dta10) as path:
204+
original.to_stata(path, {'datetime': 'tc'}, False)
205+
written_and_read_again = self.read_dta(path)
206+
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
207+
192208
def test_stata_doc_examples(self):
193209
with ensure_clean(self.dta5) as path:
194-
df = DataFrame(np.random.randn(10,2),columns=list('AB'))
210+
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
195211
df.to_stata(path)
196212

197213
if __name__ == '__main__':

0 commit comments

Comments
 (0)