pandas-dev · TomAugspurger · May 1, 2018 · Apr 20, 2018 · Apr 30, 2018 · Apr 30, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -450,6 +450,8 @@ Other Enhancements
 - Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from
   the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ
   library. (:issue:`20564`)
+- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``.  This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`)
+
 
 .. _whatsnew_0230.api_breaking:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1769,27 +1769,28 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
 
     def to_stata(self, fname, convert_dates=None, write_index=True,
                  encoding="latin-1", byteorder=None, time_stamp=None,
-                 data_label=None, variable_labels=None):
+                 data_label=None, variable_labels=None, version=114,
+                 convert_strl=None):
         """
-        A class for writing Stata binary dta files from array-like objects
+        Export Stata binary dta files.
 
         Parameters
         ----------
         fname : str or buffer
-            String path of file-like object
+            String path of file-like object.
         convert_dates : dict
             Dictionary mapping columns containing datetime types to stata
             internal format to use when writing the dates. Options are 'tc',
             'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer
             or a name. Datetime columns that do not have a conversion type
             specified will be converted to 'tc'. Raises NotImplementedError if
-            a datetime column has timezone information
+            a datetime column has timezone information.
         write_index : bool
             Write the index to Stata dataset.
         encoding : str
-            Default is latin-1. Unicode is not supported
+            Default is latin-1. Unicode is not supported.
         byteorder : str
-            Can be ">", "<", "little", or "big". default is `sys.byteorder`
+            Can be ">", "<", "little", or "big". default is `sys.byteorder`.
         time_stamp : datetime
             A datetime to use as file creation date.  Default is the current
             time.
@@ -1801,6 +1802,23 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
 
             .. versionadded:: 0.19.0
 
+        version : {114, 117}
+            Version to use in the output dta file.  Version 114 can be used
+            read by Stata 10 and later.  Version 117 can be read by Stata 13
+            or later. Version 114 limits string variables to 244 characters or
+            fewer while 117 allows strings with lengths up to 2,000,000
+            characters.
+
+            .. versionadded:: 0.23.0
+
+        convert_strl : list, optional
+            List of column names to convert to string columns to Stata StrL
+            format. Only available if version is 117.  Storing strings in the
+            StrL format can produce smaller dta files if strings have more than
+            8 characters and values are repeated.
+
+            .. versionadded:: 0.23.0
+
         Raises
         ------
         NotImplementedError
@@ -1814,6 +1832,12 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
 
             .. versionadded:: 0.19.0
 
+        See Also
+        --------
+        pandas.read_stata : Import Stata data files
+        pandas.io.stata.StataWriter : low-level writer for Stata data files
+        pandas.io.stata.StataWriter117 : low-level writer for version 117 files
+
         Examples
         --------
         >>> data.to_stata('./data_file.dta')
@@ -1832,12 +1856,23 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
         >>> writer = StataWriter('./date_data_file.dta', data, {2 : 'tw'})
         >>> writer.write_file()
         """
-        from pandas.io.stata import StataWriter
-        writer = StataWriter(fname, self, convert_dates=convert_dates,
+        kwargs = {}
+        if version not in (114, 117):
+            raise ValueError('Only formats 114 and 117 supported.')
+        if version == 114:
+            if convert_strl is not None:
+                raise ValueError('strl support is only available when using '
+                                 'format 117')
+            from pandas.io.stata import StataWriter as statawriter
+        else:
+            from pandas.io.stata import StataWriter117 as statawriter
+            kwargs['convert_strl'] = convert_strl
+
+        writer = statawriter(fname, self, convert_dates=convert_dates,
                              encoding=encoding, byteorder=byteorder,
                              time_stamp=time_stamp, data_label=data_label,
                              write_index=write_index,
-                             variable_labels=variable_labels)
+                             variable_labels=variable_labels, **kwargs)
         writer.write_file()
 
     def to_feather(self, fname):