Skip to content

Commit 9f33bc4

Browse files
committed
Merge branch 'StataParser' of https://github.com/PKEuS/pandas into PKEuS-StataParser
Conflicts: doc/source/release.rst
2 parents ba1cd18 + 10b9020 commit 9f33bc4

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ Bug Fixes
290290
- Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`)
291291
- Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could exported using the
292292
wrong data types and missing values (:issue:`6335`)
293+
- StataWriter replaces missing values in string columns by empty string (:issue:`6802`)
293294
- Inconsistent types in Timestamp addition/subtraction (:issue:`6543`)
294295
- Bug in preserving frequency across Timestamp addition/subtraction (:issue:`4547`)
295296
- Bug in indexing: empty list lookup caused ``IndexError`` exceptions (:issue:`6536`, :issue:`6551`)
@@ -314,6 +315,7 @@ Bug Fixes
314315
as regexs even when ``regex=False`` (:issue:`6777`).
315316
- Bug in timedelta ops on 32-bit platforms (:issue:`6808`)
316317
- Bug in setting a tz-aware index directly via ``.index`` (:issue:`6785`)
318+
<<<<<<< HEAD
317319
- Bug in expressions.py where numexpr would try to evaluate arithmetic ops
318320
(:issue:`6762`).
319321
- Bug in Makefile where it didn't remove Cython generated C files with ``make

pandas/io/stata.py

+2
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,8 @@ def _write_data_nodates(self):
13191319
for i, var in enumerate(row):
13201320
typ = ord(typlist[i])
13211321
if typ <= 244: # we've got a string
1322+
if var is None or var == np.nan:
1323+
var = _pad_bytes('', typ)
13221324
if len(var) < typ:
13231325
var = _pad_bytes(var, typ)
13241326
self._write(var)

pandas/io/tests/test_stata.py

+10
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,16 @@ def test_date_export_formats(self):
508508
tm.assert_frame_equal(written_and_read_again.set_index('index'),
509509
expected)
510510

511+
def test_write_missing_strings(self):
512+
original = DataFrame([["1"], [None]], columns=["foo"])
513+
expected = DataFrame([["1"], [""]], columns=["foo"])
514+
expected.index.name = 'index'
515+
with tm.ensure_clean() as path:
516+
original.to_stata(path)
517+
written_and_read_again = self.read_dta(path)
518+
tm.assert_frame_equal(written_and_read_again.set_index('index'),
519+
expected)
520+
511521

512522
if __name__ == '__main__':
513523
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)