Skip to content

Commit 10b9020

Browse files
committed
StataWriter: Replace missing values in string columns by an empty string
1 parent 7d5fde7 commit 10b9020

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,8 @@ Bug Fixes
302302
- Bug in ``obj.blocks`` on sparse containers dropping all but the last items of same for dtype (:issue:`6748`)
303303
- Bug in unpickling ``NaT (NaTType)`` (:issue:`4606`)
304304
- Bug in setting a tz-aware index directly via ``.index`` (:issue:`6785`)
305+
- StataWriter replaces missing values in string columns by empty string (:issue:`6802`)
306+
305307

306308
pandas 0.13.1
307309
-------------

pandas/io/stata.py

+2
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,8 @@ def _write_data_nodates(self):
13191319
for i, var in enumerate(row):
13201320
typ = ord(typlist[i])
13211321
if typ <= 244: # we've got a string
1322+
if var is None or var == np.nan:
1323+
var = _pad_bytes('', typ)
13221324
if len(var) < typ:
13231325
var = _pad_bytes(var, typ)
13241326
self._write(var)

pandas/io/tests/test_stata.py

+10
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,16 @@ def test_date_export_formats(self):
508508
tm.assert_frame_equal(written_and_read_again.set_index('index'),
509509
expected)
510510

511+
def test_write_missing_strings(self):
512+
original = DataFrame([["1"], [None]], columns=["foo"])
513+
expected = DataFrame([["1"], [""]], columns=["foo"])
514+
expected.index.name = 'index'
515+
with tm.ensure_clean() as path:
516+
original.to_stata(path)
517+
written_and_read_again = self.read_dta(path)
518+
tm.assert_frame_equal(written_and_read_again.set_index('index'),
519+
expected)
520+
511521

512522
if __name__ == '__main__':
513523
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)