Skip to content

Commit b6f6432

Browse files
committed
ENH: Explicit range checking when writing Stata
Add explicit error checking for out-of-range doubles when writing Stata files closes pandas-dev#14618
1 parent 06b35db commit b6f6432

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,5 @@ Performance Improvements
8080

8181
Bug Fixes
8282
~~~~~~~~~
83+
84+
- Explicit check in ``to_stata`` and ````StataWriter `` for out-of-range values when writing doubles (:issue:`14618`)

pandas/io/stata.py

+21
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,24 @@ def _cast_to_stata_types(data):
550550
return data
551551

552552

553+
def _check_double_values(data):
554+
"""Checks double columns against the range supported by Stata
555+
556+
Parameters
557+
----------
558+
data : DataFrame
559+
The DataFrame to check
560+
"""
561+
DOUBLE_MAX = struct.unpack('<d', b'\x00\x00\x00\x00\x00\x00\xe0\x7f')[0]
562+
for col in data:
563+
if data[col].dtype == np.double:
564+
value = data[col].max()
565+
if value > DOUBLE_MAX:
566+
msg = 'Column {0} has a maximum value ({1}) outside the ' \
567+
'range supported by Stata ({1})'
568+
raise ValueError(msg.format(col, value, DOUBLE_MAX))
569+
570+
553571
class StataValueLabel(object):
554572
"""
555573
Parse a categorical column and prepare formatted output
@@ -2050,6 +2068,9 @@ def _prepare_pandas(self, data):
20502068
# Check columns for compatibility with stata, upcast if necessary
20512069
data = _cast_to_stata_types(data)
20522070

2071+
# Check for values outside the range supported for doubles
2072+
_check_double_values(data)
2073+
20532074
# Replace NaNs with Stata missing values
20542075
data = self._replace_nans(data)
20552076

pandas/io/tests/test_stata.py

+12
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,18 @@ def test_stata_111(self):
12341234
original = original[['y', 'x', 'w', 'z']]
12351235
tm.assert_frame_equal(original, df)
12361236

1237+
def test_out_of_range_double(self):
1238+
# GH 14618
1239+
df = DataFrame({'ColumnOk': [0.0,
1240+
np.finfo(np.double).eps,
1241+
4.49423283715579e+307],
1242+
'ColumnTooBig': [0.0,
1243+
np.finfo(np.double).eps,
1244+
np.finfo(np.double).max]})
1245+
with tm.assertRaises(ValueError) as cm:
1246+
with tm.ensure_clean() as path:
1247+
df.to_stata(path)
1248+
tm.assertTrue('ColumnTooBig' in cm.exception)
12371249

12381250
if __name__ == '__main__':
12391251
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)