Skip to content

Commit db89413

Browse files
committed
ENH: Explicit range checking of floats when writing Stata
Add explicit error checking for out-of-range doubles when writing Stata files Upcasts float32 to float64 if out-of-range values encountered closes pandas-dev#14618
1 parent 06b35db commit db89413

File tree

3 files changed

+47
-2
lines changed

3 files changed

+47
-2
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,5 @@ Performance Improvements
8080

8181
Bug Fixes
8282
~~~~~~~~~
83+
84+
- Explicit check in ``to_stata`` and ````StataWriter `` for out-of-range values when writing doubles (:issue:`14618`)

pandas/io/stata.py

+13
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,9 @@ def _cast_to_stata_types(data):
511511
(np.uint16, np.int16, np.int32),
512512
(np.uint32, np.int32, np.int64))
513513

514+
float32_max = struct.unpack('<f', b'\xff\xff\xff\x7e')[0]
515+
float64_max = struct.unpack('<d', b'\xff\xff\xff\xff\xff\xff\xdf\x7f')[0]
516+
514517
for col in data:
515518
dtype = data[col].dtype
516519
# Cast from unsupported types to supported types
@@ -541,6 +544,15 @@ def _cast_to_stata_types(data):
541544
data[col] = data[col].astype(np.float64)
542545
if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53:
543546
ws = precision_loss_doc % ('int64', 'float64')
547+
elif dtype == np.float32:
548+
if data[col].max() > float32_max:
549+
data[col] = data[col].astype(np.float64)
550+
elif dtype == np.float64:
551+
value = data[col].max()
552+
if value > float64_max:
553+
msg = 'Column {0} has a maximum value ({1}) outside the ' \
554+
'range supported by Stata ({1})'
555+
raise ValueError(msg.format(col, value, float64_max))
544556

545557
if ws:
546558
import warnings
@@ -2048,6 +2060,7 @@ def _prepare_pandas(self, data):
20482060
data = self._check_column_names(data)
20492061

20502062
# Check columns for compatibility with stata, upcast if necessary
2063+
# Raise if outside the supported range
20512064
data = _cast_to_stata_types(data)
20522065

20532066
# Replace NaNs with Stata missing values

pandas/io/tests/test_stata.py

+32-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
import nose
1313
import numpy as np
14-
from pandas.tslib import NaT
15-
1614
import pandas as pd
1715
import pandas.util.testing as tm
1816
from pandas import compat
@@ -21,6 +19,7 @@
2119
from pandas.io.parsers import read_csv
2220
from pandas.io.stata import (read_stata, StataReader, InvalidColumnName,
2321
PossiblePrecisionLoss, StataMissingValue)
22+
from pandas.tslib import NaT
2423
from pandas.types.common import is_categorical_dtype
2524

2625

@@ -1234,6 +1233,37 @@ def test_stata_111(self):
12341233
original = original[['y', 'x', 'w', 'z']]
12351234
tm.assert_frame_equal(original, df)
12361235

1236+
def test_out_of_range_double(self):
1237+
# GH 14618
1238+
df = DataFrame({'ColumnOk': [0.0,
1239+
np.finfo(np.double).eps,
1240+
4.49423283715579e+307],
1241+
'ColumnTooBig': [0.0,
1242+
np.finfo(np.double).eps,
1243+
np.finfo(np.double).max]})
1244+
with tm.assertRaises(ValueError) as cm:
1245+
with tm.ensure_clean() as path:
1246+
df.to_stata(path)
1247+
tm.assertTrue('ColumnTooBig' in cm.exception)
1248+
1249+
def test_out_of_range_float(self):
1250+
original = DataFrame({'ColumnOk': [0.0,
1251+
np.finfo(np.float32).eps,
1252+
np.finfo(np.float32).max / 10.0],
1253+
'ColumnTooBig': [0.0,
1254+
np.finfo(np.float32).eps,
1255+
np.finfo(np.float32).max]})
1256+
original.index.name = 'index'
1257+
for col in original:
1258+
original[col] = original[col].astype(np.float32)
1259+
1260+
with tm.ensure_clean() as path:
1261+
original.to_stata(path)
1262+
reread = read_stata(path)
1263+
original['ColumnTooBig'] = original['ColumnTooBig'].astype(
1264+
np.float64)
1265+
tm.assert_frame_equal(original,
1266+
reread.set_index('index'))
12371267

12381268
if __name__ == '__main__':
12391269
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)