diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 184ca581902ee..ac7b489721cd9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -733,6 +733,7 @@ I/O - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`) +- Bug in :meth:`DataFrame.to_stata` when exporting a column containing both long strings (Stata strL) and :class:`pd.NA` values (:issue:`23633`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`) - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 34d95fb59a21c..cd290710ddbaa 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -3196,8 +3196,8 @@ def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: for o, (idx, row) in enumerate(selected.iterrows()): for j, (col, v) in enumerate(col_index): val = row[col] - # Allow columns with mixed str and None (GH 23633) - val = "" if val is None else val + # Allow columns with mixed str and None or pd.NA (GH 23633) + val = "" if isna(val) else val key = gso_table.get(val, None) if key is None: # Stata prefers human numbers diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 9288b98d79fbe..e73de78847c8f 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2587,3 +2587,17 @@ def test_many_strl(temp_file, version): lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))] value_labels = {"col": {i: lbls[i] for i in range(n)}} df.to_stata(temp_file, value_labels=value_labels, version=version) + + +@pytest.mark.parametrize("version", [117, 118, 119, None]) +def test_strl_missings(temp_file, version): + # GH 23633 + # Check that strl supports None and pd.NA + df = DataFrame( + [ + {"str1": "string" * 500, "number": 0}, + {"str1": None, "number": 1}, + {"str1": pd.NA, "number": 1}, + ] + ) + df.to_stata(temp_file, version=version)