DOC: Clean up doc strings

bashtage · bashtage · commit 900c9f7f7715 · 2018-05-01T08:33:24.000+01:00
Fix typo
Enhance compliance of related docstrings usign validator
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1772,25 +1772,25 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
                  data_label=None, variable_labels=None, version=114,
                  convert_strl=None):
         """
-        A class for writing Stata binary dta files from array-like objects
+        Export Stata binary dta files.
 
         Parameters
         ----------
         fname : str or buffer
-            String path of file-like object
+            String path of file-like object.
         convert_dates : dict
             Dictionary mapping columns containing datetime types to stata
             internal format to use when writing the dates. Options are 'tc',
             'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer
             or a name. Datetime columns that do not have a conversion type
             specified will be converted to 'tc'. Raises NotImplementedError if
-            a datetime column has timezone information
+            a datetime column has timezone information.
         write_index : bool
             Write the index to Stata dataset.
         encoding : str
-            Default is latin-1. Unicode is not supported
+            Default is latin-1. Unicode is not supported.
         byteorder : str
-            Can be ">", "<", "little", or "big". default is `sys.byteorder`
+            Can be ">", "<", "little", or "big". default is `sys.byteorder`.
         time_stamp : datetime
             A datetime to use as file creation date.  Default is the current
             time.
@@ -1803,7 +1803,7 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
             .. versionadded:: 0.19.0
 
         version : {114, 117}
-            dta version to use in the output file.  Version 114 can be used
+            Version to use in the output dta file.  Version 114 can be used
             read by Stata 10 and later.  Version 117 can be read by Stata 13
             or later. Version 114 limits string variables to 244 characters or
             fewer while 117 allows strings with lengths up to 2,000,000
@@ -1813,7 +1813,7 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
 
         convert_strl : list, optional
             List of column names to convert to string columns to Stata StrL
-            format. Only available if version is 117.  Storign strings in the
+            format. Only available if version is 117.  Storing strings in the
             StrL format can produce smaller dta files if strings have more than
             8 characters and values are repeated.
 
@@ -1832,6 +1832,12 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
 
             .. versionadded:: 0.19.0
 
+        See Also
+        --------
+        pandas.read_stata : Import Stata data files
+        pandas.io.stata.StataWriter : low-level writer for Stata data files
+        pandas.io.stata.StataWriter117 : low-level writer for version 117 files
+
         Examples
         --------
         >>> data.to_stata('./data_file.dta')
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -45,17 +45,17 @@
 
 _statafile_processing_params1 = """\
 convert_dates : boolean, defaults to True
-    Convert date variables to DataFrame time values
+    Convert date variables to DataFrame time values.
 convert_categoricals : boolean, defaults to True
-    Read value labels and convert columns to Categorical/Factor variables"""
+    Read value labels and convert columns to Categorical/Factor variables."""
 
 _encoding_params = """\
 encoding : string, None or encoding
     Encoding used to parse the files. None defaults to latin-1."""
 
 _statafile_processing_params2 = """\
 index_col : string, optional, default: None
-    Column to set as index
+    Column to set as index.
 convert_missing : boolean, defaults to False
     Flag indicating whether to convert missing values to their Stata
     representations.  If False, missing values are replaced with nan.
@@ -64,28 +64,29 @@
     StataMissingValue objects.
 preserve_dtypes : boolean, defaults to True
     Preserve Stata datatypes. If False, numeric data are upcast to pandas
-    default types for foreign data (float64 or int64)
+    default types for foreign data (float64 or int64).
 columns : list or None
     Columns to retain.  Columns will be returned in the given order.  None
-    returns all columns
+    returns all columns.
 order_categoricals : boolean, defaults to True
     Flag indicating whether converted categorical data are ordered."""
 
 _chunksize_params = """\
 chunksize : int, default None
     Return StataReader object for iterations, returns chunks with
-    given number of lines"""
+    given number of lines."""
 
 _iterator_params = """\
 iterator : boolean, default False
-    Return StataReader object"""
+    Return StataReader object."""
 
-_read_stata_doc = """Read Stata file into DataFrame
+_read_stata_doc = """
+Read Stata file into DataFrame.
 
 Parameters
 ----------
 filepath_or_buffer : string or file-like object
-    Path to .dta file or object implementing a binary read() functions
+    Path to .dta file or object implementing a binary read() functions.
 %s
 %s
 %s
@@ -96,17 +97,23 @@
 -------
 DataFrame or StataReader
 
+See Also
+--------
+pandas.io.stata.StataReader : low-level reader for Stata data files
+pandas.DataFrame.to_stata: export Stata data files
+
 Examples
 --------
 Read a Stata dta file:
 
->>> df = pandas.read_stata('filename.dta')
+>>> import pandas as pd
+>>> df = pd.read_stata('filename.dta')
 
 Read a Stata dta file in 10,000 line chunks:
 
->>> itr = pandas.read_stata('filename.dta', chunksize=10000)
+>>> itr = pd.read_stata('filename.dta', chunksize=10000)
 >>> for chunk in itr:
->>>     do_something(chunk)
+...     do_something(chunk)
 """ % (_statafile_processing_params1, _encoding_params,
        _statafile_processing_params2, _chunksize_params,
        _iterator_params)
@@ -2472,7 +2479,7 @@ def __init__(self, df, columns, version=117, byteorder=None):
 
         self.df = df
         self.columns = columns
-        self._gso_table = OrderedDict((('', 0),))
+        self._gso_table = OrderedDict((('', (0, 0)),))
         if byteorder is None:
             byteorder = sys.byteorder
         self._byteorder = _set_endianness(byteorder)
@@ -2674,15 +2681,16 @@ class StataWriter117(StataWriter):
     Examples
     --------
     >>> import pandas as pd
-    >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b'])
+    >>> from pandas.io.stata import StataWriter117
+    >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b', 'c'])
     >>> writer = StataWriter117('./data_file.dta', data)
     >>> writer.write_file()
 
-    Or with dates
-    >>> from datetime import datetime
+    Or with long strings stored in strl format
+
     >>> data = pd.DataFrame([['A relatively long string'], [''], ['']],
     ...                     columns=['strls'])
-    >>> writer = StataWriter117('./date_data_file.dta', data,
+    >>> writer = StataWriter117('./data_file_with_long_strings.dta', data,
     ...                         convert_strl=['strls'])
     >>> writer.write_file()
     """
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -1378,7 +1378,11 @@ def test_date_parsing_ignores_format_details(self, column):
     def test_writer_117(self):
         original = DataFrame(data=[['string', 'object', 1, 1, 1, 1.1, 1.1,
                                     np.datetime64('2003-12-25'),
-                                    'a', 'a' * 2045, 'a' * 5000, 'a']],
+                                    'a', 'a' * 2045, 'a' * 5000, 'a'],
+                                   ['string-1', 'object-1', 1, 1, 1, 1.1, 1.1,
+                                    np.datetime64('2003-12-26'),
+                                    'b', 'b' * 2045, '', '']
+                                   ],
                              columns=['string', 'object', 'int8', 'int16',
                                       'int32', 'float32', 'float64',
                                       'datetime',