-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
to_stata uint16 #7397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
to_stata uint16 #7397
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -230,13 +230,13 @@ def _cast_to_stata_types(data): | |
ws = '' | ||
for col in data: | ||
dtype = data[col].dtype | ||
if dtype == np.int8: | ||
if dtype in (np.int8, np.uint8): | ||
if data[col].max() > 100 or data[col].min() < -127: | ||
data[col] = data[col].astype(np.int16) | ||
elif dtype == np.int16: | ||
elif dtype in (np.int16, np.uint16): | ||
if data[col].max() > 32740 or data[col].min() < -32767: | ||
data[col] = data[col].astype(np.int32) | ||
elif dtype == np.int64: | ||
elif dtype in (np.int32, np.uint32, np.int64, np.uint64): | ||
if data[col].max() <= 2147483620 and data[col].min() >= -2147483647: | ||
data[col] = data[col].astype(np.int32) | ||
else: | ||
|
@@ -990,11 +990,11 @@ def _dtype_to_stata_type(dtype): | |
return chr(255) | ||
elif dtype == np.float32: | ||
return chr(254) | ||
elif dtype == np.int32: | ||
elif dtype in (np.int32, np.uint32): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not safe and can lead to data loss. Only supported Stata data types should be used here, and Stata does not support unsigned types. The correct method is to first perform all casting and then use only the case, Stata-safe data types when writing the data type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change should not be necessary since this function is called after
|
||
return chr(253) | ||
elif dtype == np.int16: | ||
elif dtype in (np.int16, np.uint16): | ||
return chr(252) | ||
elif dtype == np.int8: | ||
elif dtype in (np.int8, np.uint8): | ||
return chr(251) | ||
else: # pragma : no cover | ||
raise ValueError("Data type %s not currently understood. " | ||
|
@@ -1023,9 +1023,9 @@ def _dtype_to_default_stata_fmt(dtype): | |
return "%10.0g" | ||
elif dtype == np.float32: | ||
return "%9.0g" | ||
elif dtype == np.int32: | ||
elif dtype in (np.int32, np.uint32): | ||
return "%12.0g" | ||
elif dtype == np.int8 or dtype == np.int16: | ||
elif dtype in (np.int8, np.uint8, np.int16, np.uint16): | ||
return "%8.0g" | ||
else: # pragma : no cover | ||
raise ValueError("Data type %s not currently understood. " | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not the correct behavior for this function. This function ensures that all datatypes after it is run have a trivial mapping to Stata data types. It would be simplest to simply upcast
uint
s to the next largestint
which is always safe and then the other changes in the commit are not needed.Something simple like