From 4a9db3d61883408981dcaeed8926b67d6c689f1c Mon Sep 17 00:00:00 2001
From: eirki <eirik.b.stavest@gmail.com>
Date: Thu, 9 Jun 2022 21:16:05 +0200
Subject: [PATCH] BUG: StataWriterUTF8 removing some valid characters in column
 names (#47276)

---
 doc/source/whatsnew/v1.5.0.rst |  1 +
 pandas/io/stata.py             | 16 ++++++++++------
 pandas/tests/io/test_stata.py  |  4 ++--
 3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 1b079217f64ea..3348cab0325de 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -845,6 +845,7 @@ I/O
 - :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
 - Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`)
 - Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
+- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index b8d56172027e1..1a230f4ae4164 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -3646,12 +3646,16 @@ def _validate_variable_name(self, name: str) -> str:
         # High code points appear to be acceptable
         for c in name:
             if (
-                ord(c) < 128
-                and (c < "A" or c > "Z")
-                and (c < "a" or c > "z")
-                and (c < "0" or c > "9")
-                and c != "_"
-            ) or 128 <= ord(c) < 256:
+                (
+                    ord(c) < 128
+                    and (c < "A" or c > "Z")
+                    and (c < "a" or c > "z")
+                    and (c < "0" or c > "9")
+                    and c != "_"
+                )
+                or 128 <= ord(c) < 192
+                or c in {"×", "÷"}
+            ):
                 name = name.replace(c, "_")
 
         return name
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 377b8758c250e..f06bf0035c7dc 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -1786,11 +1786,11 @@ def test_utf8_writer(self, version):
                 [2.0, 2, "ᴮ", ""],
                 [3.0, 3, "ᴰ", None],
             ],
-            columns=["a", "β", "ĉ", "strls"],
+            columns=["Å", "β", "ĉ", "strls"],
         )
         data["ᴐᴬᵀ"] = cat
         variable_labels = {
-            "a": "apple",
+            "Å": "apple",
             "β": "ᵈᵉᵊ",
             "ĉ": "ᴎტჄႲႳႴႶႺ",
             "strls": "Long Strings",