@@ -432,7 +432,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
432
432
d = parse_dates_safe (dates , year = True )
433
433
conv_dates = d .year
434
434
else :
435
- raise ValueError ("fmt %s not understood " % fmt )
435
+ raise ValueError ("Format %s is not a known Stata date format " % fmt )
436
436
437
437
conv_dates = Series (conv_dates , dtype = np .float64 )
438
438
missing_value = struct .unpack ('<d' , b'\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \x7f ' )[0 ]
@@ -1709,7 +1709,7 @@ def _convert_datetime_to_stata_type(fmt):
1709
1709
"%tq" , "th" , "%th" , "ty" , "%ty" ]:
1710
1710
return np .float64 # Stata expects doubles for SIFs
1711
1711
else :
1712
- raise ValueError ( "fmt %s not understood " % fmt )
1712
+ raise NotImplementedError ( "Format %s not implemented " % fmt )
1713
1713
1714
1714
1715
1715
def _maybe_convert_to_int_keys (convert_dates , varlist ):
@@ -1721,9 +1721,8 @@ def _maybe_convert_to_int_keys(convert_dates, varlist):
1721
1721
new_dict .update ({varlist .index (key ): convert_dates [key ]})
1722
1722
else :
1723
1723
if not isinstance (key , int ):
1724
- raise ValueError (
1725
- "convert_dates key is not in varlist and is not an int"
1726
- )
1724
+ raise ValueError ("convert_dates key must be a "
1725
+ "column or an integer" )
1727
1726
new_dict .update ({key : convert_dates [key ]})
1728
1727
return new_dict
1729
1728
@@ -1763,8 +1762,7 @@ def _dtype_to_stata_type(dtype, column):
1763
1762
elif dtype == np .int8 :
1764
1763
return chr (251 )
1765
1764
else : # pragma : no cover
1766
- raise ValueError ("Data type %s not currently understood. "
1767
- "Please report an error to the developers." % dtype )
1765
+ raise NotImplementedError ("Data type %s not supported." % dtype )
1768
1766
1769
1767
1770
1768
def _dtype_to_default_stata_fmt (dtype , column ):
@@ -1801,35 +1799,36 @@ def _dtype_to_default_stata_fmt(dtype, column):
1801
1799
elif dtype == np .int8 or dtype == np .int16 :
1802
1800
return "%8.0g"
1803
1801
else : # pragma : no cover
1804
- raise ValueError ("Data type %s not currently understood. "
1805
- "Please report an error to the developers." % dtype )
1802
+ raise NotImplementedError ("Data type %s not supported." % dtype )
1806
1803
1807
1804
1808
1805
class StataWriter (StataParser ):
1809
1806
"""
1810
- A class for writing Stata binary dta files from array-like objects
1807
+ A class for writing Stata binary dta files
1811
1808
1812
1809
Parameters
1813
1810
----------
1814
- fname : file path or buffer
1815
- Where to save the dta file.
1816
- data : array-like
1817
- Array-like input to save. Pandas objects are also accepted.
1811
+ fname : str or buffer
1812
+ String path of file-like object
1813
+ data : DataFrame
1814
+ Input to save
1818
1815
convert_dates : dict
1819
- Dictionary mapping column of datetime types to the stata internal
1820
- format that you want to use for the dates. Options are
1821
- 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either a
1822
- number or a name.
1816
+ Dictionary mapping columns containing datetime types to stata internal
1817
+ format to use when wirting the dates. Options are 'tc', 'td', 'tm',
1818
+ 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name.
1819
+ Datetime columns that do not have a conversion type specified will be
1820
+ converted to 'tc'. Raises NotImplementedError if a datetime column has
1821
+ timezone information
1822
+ write_index : bool
1823
+ Write the index to Stata dataset.
1823
1824
encoding : str
1824
- Default is latin-1. Note that Stata does not support unicode.
1825
+ Default is latin-1. Unicode is not supported
1825
1826
byteorder : str
1826
- Can be ">", "<", "little", or "big". The default is None which uses
1827
- `sys.byteorder`
1827
+ Can be ">", "<", "little", or "big". default is `sys.byteorder`
1828
1828
time_stamp : datetime
1829
- A date time to use when writing the file. Can be None, in which
1830
- case the current time is used.
1829
+ A datetime to use as file creation date. Default is the current time
1831
1830
dataset_label : str
1832
- A label for the data set. Should be 80 characters or smaller.
1831
+ A label for the data set. Must be 80 characters or smaller.
1833
1832
1834
1833
.. versionadded:: 0.19.0
1835
1834
@@ -1843,6 +1842,17 @@ class StataWriter(StataParser):
1843
1842
The StataWriter instance has a write_file method, which will
1844
1843
write the file to the given `fname`.
1845
1844
1845
+ Raises
1846
+ ------
1847
+ NotImplementedError
1848
+ * If datetimes contain timezone information
1849
+ ValueError
1850
+ * Columns listed in convert_dates are noth either datetime64[ns]
1851
+ or datetime.datetime
1852
+ * Column dtype is not representable in Stata
1853
+ * Column listed in convert_dates is not in DataFrame
1854
+ * Categorical label contains more than 32,000 characters
1855
+
1846
1856
Examples
1847
1857
--------
1848
1858
>>> import pandas as pd
@@ -1861,7 +1871,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True,
1861
1871
encoding = "latin-1" , byteorder = None , time_stamp = None ,
1862
1872
data_label = None , variable_labels = None ):
1863
1873
super (StataWriter , self ).__init__ (encoding )
1864
- self ._convert_dates = convert_dates
1874
+ self ._convert_dates = {} if convert_dates is None else convert_dates
1865
1875
self ._write_index = write_index
1866
1876
self ._time_stamp = time_stamp
1867
1877
self ._data_label = data_label
@@ -2041,15 +2051,22 @@ def _prepare_pandas(self, data):
2041
2051
self .varlist = data .columns .tolist ()
2042
2052
2043
2053
dtypes = data .dtypes
2044
- if self ._convert_dates is not None :
2045
- self ._convert_dates = _maybe_convert_to_int_keys (
2046
- self ._convert_dates , self .varlist
2054
+
2055
+ # Ensure all date columns are converted
2056
+ for col in data :
2057
+ if col in self ._convert_dates :
2058
+ continue
2059
+ if is_datetime64_dtype (data [col ]):
2060
+ self ._convert_dates [col ] = 'tc'
2061
+
2062
+ self ._convert_dates = _maybe_convert_to_int_keys (self ._convert_dates ,
2063
+ self .varlist )
2064
+ for key in self ._convert_dates :
2065
+ new_type = _convert_datetime_to_stata_type (
2066
+ self ._convert_dates [key ]
2047
2067
)
2048
- for key in self ._convert_dates :
2049
- new_type = _convert_datetime_to_stata_type (
2050
- self ._convert_dates [key ]
2051
- )
2052
- dtypes [key ] = np .dtype (new_type )
2068
+ dtypes [key ] = np .dtype (new_type )
2069
+
2053
2070
self .typlist = []
2054
2071
self .fmtlist = []
2055
2072
for col , dtype in dtypes .iteritems ():
0 commit comments