@@ -206,6 +206,7 @@ class InvalidColumnName(Warning):
206
206
underscores, no Stata reserved words)
207
207
"""
208
208
209
+
209
210
def _cast_to_stata_types (data ):
210
211
"""Checks the dtypes of the columns of a pandas DataFrame for
211
212
compatibility with the data types and ranges supported by Stata, and
@@ -218,18 +219,44 @@ def _cast_to_stata_types(data):
218
219
219
220
Notes
220
221
-----
221
- Numeric columns must be one of int8, int16, int32, float32 or float64, with
222
- some additional value restrictions on the integer data types . int8 and
223
- int16 columns are checked for violations of the value restrictions and
222
+ Numeric columns in Stata must be one of int8, int16, int32, float32 or
223
+ float64, with some additional value restrictions. int8 and int16 columns
224
+ are checked for violations of the value restrictions and
224
225
upcast if needed. int64 data is not usable in Stata, and so it is
225
226
downcast to int32 whenever the value are in the int32 range, and
226
227
sidecast to float64 when larger than this range. If the int64 values
227
228
are outside of the range of those perfectly representable as float64 values,
228
229
a warning is raised.
230
+
231
+ bool columns are cast to int8. uint colums are converted to int of the same
232
+ size if there is no loss in precision, other wise are upcast to a larger
233
+ type. uint64 is currently not supported since it is concerted to object in
234
+ a DataFrame.
229
235
"""
230
236
ws = ''
237
+ # original, if small, if large
238
+ conversion_data = ((np .bool , np .int8 , np .int8 ),
239
+ (np .uint8 , np .int8 , np .int16 ),
240
+ (np .uint16 , np .int16 , np .int32 ),
241
+ (np .uint32 , np .int32 , np .int64 ))
242
+
231
243
for col in data :
232
244
dtype = data [col ].dtype
245
+ # Cast from unsupported types to supported types
246
+ for c_data in conversion_data :
247
+ if dtype == c_data [0 ]:
248
+ if data [col ].max () <= np .iinfo (c_data [1 ]).max :
249
+ dtype = c_data [1 ]
250
+ else :
251
+ dtype = c_data [2 ]
252
+ if c_data [2 ] == np .float64 : # Warn if necessary
253
+ if data [col ].max () >= 2 * 53 :
254
+ ws = precision_loss_doc % ('uint64' , 'float64' )
255
+
256
+ data [col ] = data [col ].astype (dtype )
257
+
258
+
259
+ # Check values and upcast if necessary
233
260
if dtype == np .int8 :
234
261
if data [col ].max () > 100 or data [col ].min () < - 127 :
235
262
data [col ] = data [col ].astype (np .int16 )
@@ -241,7 +268,7 @@ def _cast_to_stata_types(data):
241
268
data [col ] = data [col ].astype (np .int32 )
242
269
else :
243
270
data [col ] = data [col ].astype (np .float64 )
244
- if data [col ].max () < = 2 * 53 or data [col ].min () > = - 2 ** 53 :
271
+ if data [col ].max () > = 2 ** 53 or data [col ].min () < = - 2 ** 53 :
245
272
ws = precision_loss_doc % ('int64' , 'float64' )
246
273
247
274
if ws :
0 commit comments