Skip to content

Commit 8c547c4

Browse files
committed
ENH: Explicit range checking when writing Stata
Add explicit error checking for out-of-range doubles when writing Stata files closes pandas-dev#14618
1 parent 06b35db commit 8c547c4

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,5 @@ Performance Improvements
8080

8181
Bug Fixes
8282
~~~~~~~~~
83+
84+
- Explicit check in ``to_stata`` and ````StataWriter `` for out-of-range values when writing doubles (:issue:`14618`)

pandas/io/stata.py

+20
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,23 @@ def _cast_to_stata_types(data):
549549

550550
return data
551551

552+
def _check_double_values(data):
553+
"""Checks double columns against the range supported by Stata
554+
555+
Parameters
556+
----------
557+
data : DataFrame
558+
The DataFrame to check
559+
"""
560+
DOUBLE_MAX = struct.unpack('<d', b'\x00\x00\x00\x00\x00\x00\xe0\x7f')[0]
561+
for col in data:
562+
if data[col].dtype == np.double:
563+
value = data[col].max()
564+
if value > DOUBLE_MAX:
565+
msg = 'Column {0} has a maximum value ({1}) outside the ' \
566+
'range supported by Stata ({1})'
567+
raise ValueError(msg.format(col, value, DOUBLE_MAX))
568+
552569

553570
class StataValueLabel(object):
554571
"""
@@ -2050,6 +2067,9 @@ def _prepare_pandas(self, data):
20502067
# Check columns for compatibility with stata, upcast if necessary
20512068
data = _cast_to_stata_types(data)
20522069

2070+
# Check for values outside the range supported for doubles
2071+
_check_double_values(data)
2072+
20532073
# Replace NaNs with Stata missing values
20542074
data = self._replace_nans(data)
20552075

pandas/io/tests/test_stata.py

+8
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,14 @@ def test_stata_111(self):
12341234
original = original[['y', 'x', 'w', 'z']]
12351235
tm.assert_frame_equal(original, df)
12361236

1237+
def test_out_of_range_double(self):
1238+
# GH 14618
1239+
df = DataFrame({'ColumnOk': [0.0, np.finfo(np.double).eps, 4.49423283715579e+307]
1240+
,'ColumnTooBig': [0.0, np.finfo(np.double).eps, np.finfo(np.double).max]})
1241+
with tm.assertRaises(ValueError) as cm:
1242+
with tm.ensure_clean() as path:
1243+
df.to_stata(path)
1244+
tm.assertTrue('ColumnTooBig' in cm.exception)
12371245

12381246
if __name__ == '__main__':
12391247
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)