diff --git a/conftest.py b/conftest.py index 7f9a6721..b5803f37 100644 --- a/conftest.py +++ b/conftest.py @@ -1,4 +1,4 @@ -"""Shared pytest fixtures for system tests.""" +"""Shared pytest fixtures for `tests/system` and `samples/tests` tests.""" import os import os.path diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 46570643..d4c52044 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,17 @@ Changelog ========= +.. _changelog-0.14.1: + +0.14.1 / TBD +------------ + +Bug fixes +~~~~~~~~~ + +- Encode floating point values with greater precision. (:issue:`326`) + + .. _changelog-0.14.0: 0.14.0 / 2020-10-05 diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 04b32efa..ec00d4a1 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -19,7 +19,7 @@ def encode_chunk(dataframe): index=False, header=False, encoding="utf-8", - float_format="%.15g", + float_format="%.17g", date_format="%Y-%m-%d %H:%M:%S.%f", ) diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py new file mode 100644 index 00000000..ca5e406a --- /dev/null +++ b/tests/system/test_to_gbq.py @@ -0,0 +1,49 @@ +import functools +import pandas +import pandas.testing + +import pytest + + +pytest.importorskip("google.cloud.bigquery", minversion="1.24.0") + + +@pytest.fixture +def method_under_test(credentials): + import pandas_gbq + + return functools.partial(pandas_gbq.to_gbq, credentials=credentials) + + +def test_float_round_trip( + method_under_test, random_dataset_id, bigquery_client +): + """Ensure that 64-bit floating point numbers are unchanged. + + See: https://github.com/pydata/pandas-gbq/issues/326 + """ + + table_id = "{}.float_round_trip".format(random_dataset_id) + input_floats = pandas.Series( + [ + 0.14285714285714285, + 0.4406779661016949, + 1.05148, + 1.05153, + 1.8571428571428572, + 2.718281828459045, + 3.141592653589793, + 2.0988936657440586e43, + ], + name="float_col", + ) + df = pandas.DataFrame({"float_col": input_floats}) + method_under_test(df, table_id) + + round_trip = bigquery_client.list_rows(table_id).to_dataframe() + round_trip_floats = round_trip["float_col"].sort_values() + pandas.testing.assert_series_equal( + round_trip_floats, + input_floats, + check_exact=True, + ) diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 9be8fe89..7ed463c1 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import textwrap from io import StringIO import numpy @@ -24,17 +25,32 @@ def test_encode_chunk_with_unicode(): def test_encode_chunk_with_floats(): - """Test that floats in a dataframe are encoded with at most 15 significant + """Test that floats in a dataframe are encoded with at most 17 significant figures. - See: https://github.com/pydata/pandas-gbq/issues/192 + See: https://github.com/pydata/pandas-gbq/issues/192 and + https://github.com/pydata/pandas-gbq/issues/326 """ - input_csv = StringIO(u"01/01/17 23:00,1.05148,1.05153,1.05148,1.05153,4") - df = pandas.read_csv(input_csv, header=None) - csv_buffer = load.encode_chunk(df) - csv_bytes = csv_buffer.read() - csv_string = csv_bytes.decode("utf-8") - assert "1.05153" in csv_string + input_csv = textwrap.dedent( + """01/01/17 23:00,0.14285714285714285,4 + 01/02/17 22:00,1.05148,3 + 01/03/17 21:00,1.05153,2 + 01/04/17 20:00,3.141592653589793,1 + 01/05/17 19:00,2.0988936657440586e+43,0 + """ + ) + input_df = pandas.read_csv( + StringIO(input_csv), header=None, float_precision="round_trip" + ) + csv_buffer = load.encode_chunk(input_df) + round_trip = pandas.read_csv( + csv_buffer, header=None, float_precision="round_trip" + ) + pandas.testing.assert_frame_equal( + round_trip, + input_df, + check_exact=True, + ) def test_encode_chunk_with_newlines():