Skip to content

Commit 587e5d7

Browse files
author
Jesse
committed
ENH: Update DataFrame.to_stata to handle pd.NA and None values in strL columns
1 parent 38dd653 commit 587e5d7

File tree

3 files changed

+17
-2
lines changed

3 files changed

+17
-2
lines changed

doc/source/whatsnew/v2.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ MultiIndex
150150
I/O
151151
^^^
152152
- :meth:`DataFrame.to_excel` was storing decimals as strings instead of numbers (:issue:`49598`)
153+
- :meth:`DataFrame.to_stata` no longer throws a ``TypeError('encoding without a string argument')`` when exporting a column containing both long strings (Stata strL) and :class:`pd.NA` values (:issue:`23633`)
153154
-
154155

155156
Period

pandas/io/stata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3196,8 +3196,8 @@ def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]:
31963196
for o, (idx, row) in enumerate(selected.iterrows()):
31973197
for j, (col, v) in enumerate(col_index):
31983198
val = row[col]
3199-
# Allow columns with mixed str and None (GH 23633)
3200-
val = "" if val is None else val
3199+
# Allow columns with mixed str and None or pd.NA (GH 23633)
3200+
val = "" if (val is None) or isna(val) else val
32013201
key = gso_table.get(val, None)
32023202
if key is None:
32033203
# Stata prefers human numbers

pandas/tests/io/test_stata.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2587,3 +2587,17 @@ def test_many_strl(temp_file, version):
25872587
lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))]
25882588
value_labels = {"col": {i: lbls[i] for i in range(n)}}
25892589
df.to_stata(temp_file, value_labels=value_labels, version=version)
2590+
2591+
2592+
@pytest.mark.parametrize("version", [117, 118, 119, None])
2593+
def test_strl_missings(temp_file, version):
2594+
# GH 23633
2595+
# Check that strl supports None and pd.NA
2596+
df = DataFrame(
2597+
[
2598+
{"str1": "string" * 500, "number": 0},
2599+
{"str1": None, "number": 1},
2600+
{"str1": pd.NA, "number": 1},
2601+
]
2602+
)
2603+
df.to_stata(temp_file, version=version)

0 commit comments

Comments
 (0)