diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index c540319e..6600e52c 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,15 @@ Changelog ========= +.. _changelog-0.5.1: + +0.5.1 / (Unreleased) +-------------------- + +- Use general float with 15 decimal digit precision when writing to local + CSV buffer in ``to_gbq``. This prevents numerical overflow in certain + edge cases. (:issue:`192`) + .. _changelog-0.5.0: 0.5.0 / 2018-06-15 diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index a7d53f89..436eb2d1 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -15,7 +15,7 @@ def encode_chunk(dataframe): csv_buffer = six.StringIO() dataframe.to_csv( csv_buffer, index=False, header=False, encoding='utf-8', - date_format='%Y-%m-%d %H:%M:%S.%f') + float_format='%.15g', date_format='%Y-%m-%d %H:%M:%S.%f') # Convert to a BytesIO buffer so that unicode text is properly handled. # See: https://github.com/pydata/pandas-gbq/issues/106 diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 398a01ae..fdbedc46 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -4,6 +4,7 @@ import pandas from pandas_gbq import load +from io import StringIO def test_encode_chunk_with_unicode(): @@ -20,6 +21,20 @@ def test_encode_chunk_with_unicode(): assert u'信用卡' in csv_string +def test_encode_chunk_with_floats(): + """Test that floats in a dataframe are encoded with at most 15 significant + figures. + + See: https://github.com/pydata/pandas-gbq/issues/192 + """ + input_csv = StringIO(u'01/01/17 23:00,1.05148,1.05153,1.05148,1.05153,4') + df = pandas.read_csv(input_csv, header=None) + csv_buffer = load.encode_chunk(df) + csv_bytes = csv_buffer.read() + csv_string = csv_bytes.decode('utf-8') + assert '1.05153' in csv_string + + def test_encode_chunks_splits_dataframe(): df = pandas.DataFrame(numpy.random.randn(6, 4), index=range(6)) chunks = list(load.encode_chunks(df, chunksize=2))