pandas-dev · bashtage · Nov 10, 2016 · jreback · Nov 15, 2016 · jreback
diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
@@ -25,3 +25,4 @@ Bug Fixes
 
 - compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
 - allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)
+- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -80,3 +80,4 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -511,6 +511,9 @@ def _cast_to_stata_types(data):
                        (np.uint16, np.int16, np.int32),
                        (np.uint32, np.int32, np.int64))
 
+    float32_max = struct.unpack('<f', b'\xff\xff\xff\x7e')[0]
+    float64_max = struct.unpack('<d', b'\xff\xff\xff\xff\xff\xff\xdf\x7f')[0]
+
     for col in data:
         dtype = data[col].dtype
         # Cast from unsupported types to supported types
@@ -541,6 +544,19 @@ def _cast_to_stata_types(data):
                 data[col] = data[col].astype(np.float64)
                 if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53:
                     ws = precision_loss_doc % ('int64', 'float64')
+        elif dtype in (np.float32, np.float64):
+            value = data[col].max()
+            if np.isinf(value):
+                msg = 'Column {0} has a maximum value of infinity which is ' \
+                      'outside the range supported by Stata.'
+                raise ValueError(msg.format(col))
+            if dtype == np.float32 and value > float32_max:
+                data[col] = data[col].astype(np.float64)
+            elif dtype == np.float64:
+                if value > float64_max:
+                    msg = 'Column {0} has a maximum value ({1}) outside the ' \
+                          'range supported by Stata ({1})'
+                    raise ValueError(msg.format(col, value, float64_max))
 
     if ws:
         import warnings
@@ -2048,6 +2064,7 @@ def _prepare_pandas(self, data):
         data = self._check_column_names(data)
 
         # Check columns for compatibility with stata, upcast if necessary
+        # Raise if outside the supported range
         data = _cast_to_stata_types(data)
 
         # Replace NaNs with Stata missing values

diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
@@ -11,8 +11,6 @@
 
 import nose
 import numpy as np
-from pandas.tslib import NaT
-
 import pandas as pd
 import pandas.util.testing as tm
 from pandas import compat
@@ -21,6 +19,7 @@
 from pandas.io.parsers import read_csv
 from pandas.io.stata import (read_stata, StataReader, InvalidColumnName,
                              PossiblePrecisionLoss, StataMissingValue)
+from pandas.tslib import NaT
 from pandas.types.common import is_categorical_dtype
 
 
@@ -1234,6 +1233,52 @@ def test_stata_111(self):
         original = original[['y', 'x', 'w', 'z']]
         tm.assert_frame_equal(original, df)
 
+    def test_out_of_range_double(self):
+        # GH 14618
+        df = DataFrame({'ColumnOk': [0.0,
+                                     np.finfo(np.double).eps,
+                                     4.49423283715579e+307],
+                        'ColumnTooBig': [0.0,
+                                         np.finfo(np.double).eps,
+                                         np.finfo(np.double).max]})
+        with tm.assertRaises(ValueError) as cm:
+            with tm.ensure_clean() as path:
+                df.to_stata(path)
+            tm.assertTrue('ColumnTooBig' in cm.exception)
+
+        df.loc[2, 'ColumnTooBig'] = np.inf
+        with tm.assertRaises(ValueError) as cm:
+            with tm.ensure_clean() as path:
+                df.to_stata(path)
+            tm.assertTrue('ColumnTooBig' in cm.exception)
+            tm.assertTrue('infinity' in cm.exception)
+
+    def test_out_of_range_float(self):
+        original = DataFrame({'ColumnOk': [0.0,
+                                           np.finfo(np.float32).eps,
+                                           np.finfo(np.float32).max / 10.0],
+                              'ColumnTooBig': [0.0,
+                                               np.finfo(np.float32).eps,
+                                               np.finfo(np.float32).max]})
+        original.index.name = 'index'
+        for col in original:
+            original[col] = original[col].astype(np.float32)
+
+        with tm.ensure_clean() as path:
+            original.to_stata(path)
+            reread = read_stata(path)
+            original['ColumnTooBig'] = original['ColumnTooBig'].astype(
+                np.float64)
+            tm.assert_frame_equal(original,
+                                  reread.set_index('index'))
+
+        original.loc[2, 'ColumnTooBig'] = np.inf
+        with tm.assertRaises(ValueError) as cm:
+            with tm.ensure_clean() as path:
+                original.to_stata(path)
+            tm.assertTrue('ColumnTooBig' in cm.exception)
+            tm.assertTrue('infinity' in cm.exception)
+
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,3 +25,4 @@ Bug Fixes

		- compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
		- allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)
		- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -80,3 +80,4 @@ Performance Improvements

		Bug Fixes
		~~~~~~~~~

Copy link Contributor jreback Nov 15, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. you can move to 0.19.2