Skip to content

test: upload DATE column with various dtypes #420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 16, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]

DEFAULT_PYTHON_VERSION = "3.8"
SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]

CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()

Expand Down
8 changes: 6 additions & 2 deletions owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,16 @@
# ----------------------------------------------------------------------------

extras = ["tqdm"]
extras_by_python = {
"3.9": ["tqdm", "db-dtypes"],
}
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.9"],
system_test_python_versions=["3.7", "3.8", "3.9"],
unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
cov_level=86,
unit_test_extras=extras,
system_test_extras=extras,
system_test_extras_by_python=extras_by_python,
intersphinx_dependencies={
"pandas": "https://pandas.pydata.org/pandas-docs/stable/",
"pydata-google-auth": "https://pydata-google-auth.readthedocs.io/en/latest/",
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
# https://github.com/pydata/pandas-gbq/issues/343
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*",
]
extras = {"tqdm": "tqdm>=4.23.0"}
extras = {
"tqdm": "tqdm>=4.23.0",
"db-dtypes": "db-dtypes >=0.3.0,<2.0.0",
}

# Setup boilerplate below this line.

Expand Down
108 changes: 99 additions & 9 deletions tests/system/test_to_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,20 @@
import pandas.testing
import pytest

try:
import db_dtypes
except ImportError:
db_dtypes = None


pytest.importorskip("google.cloud.bigquery", minversion="1.24.0")


@pytest.fixture(params=["default", "load_parquet", "load_csv"])
def api_method(request):
return request.param


@pytest.fixture
def method_under_test(credentials, project_id):
import pandas_gbq
Expand All @@ -23,7 +33,7 @@ def method_under_test(credentials, project_id):


@pytest.mark.parametrize(
["input_series"],
["input_series", "skip_csv"],
[
# Ensure that 64-bit floating point numbers are unchanged.
# See: https://github.com/pydata/pandas-gbq/issues/326
Expand All @@ -41,17 +51,13 @@ def method_under_test(credentials, project_id):
],
name="test_col",
),
False,
),
(
pandas.Series(
[
"abc",
"defg",
# Ensure that empty strings are written as empty string,
# not NULL. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/366
"",
None,
# Ensure that unicode characters are encoded. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/106
"信用卡",
Expand All @@ -60,23 +66,107 @@ def method_under_test(credentials, project_id):
],
name="test_col",
),
False,
),
(
pandas.Series(
[
"abc",
"defg",
# Ensure that empty strings are written as empty string,
# not NULL. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/366
"",
None,
],
name="empty_strings",
),
True,
),
],
)
def test_series_round_trip(
method_under_test, random_dataset_id, bigquery_client, input_series
method_under_test,
random_dataset_id,
bigquery_client,
input_series,
api_method,
skip_csv,
):
if api_method == "load_csv" and skip_csv:
pytest.skip("Loading with CSV not supported.")
table_id = f"{random_dataset_id}.round_trip_{random.randrange(1_000_000)}"
input_series = input_series.sort_values().reset_index(drop=True)
df = pandas.DataFrame(
# Some errors only occur in multi-column dataframes. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/366
{"test_col": input_series, "test_col2": input_series}
)
method_under_test(df, table_id)
method_under_test(df, table_id, api_method=api_method)

round_trip = bigquery_client.list_rows(table_id).to_dataframe()
round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True)
pandas.testing.assert_series_equal(
round_trip_series, input_series, check_exact=True,
round_trip_series, input_series, check_exact=True, check_names=False,
)


DATAFRAME_ROUND_TRIPS = [
# Ensure that a DATE column can be written with datetime64[ns] dtype
# data. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/362
(
pandas.DataFrame(
{
"date_col": pandas.Series(
["2021-04-17", "1999-12-31", "2038-01-19"], dtype="datetime64[ns]",
),
}
),
[{"name": "date_col", "type": "DATE"}],
True,
),
]
if db_dtypes is not None:
DATAFRAME_ROUND_TRIPS.append(
(
pandas.DataFrame(
{
"date_col": pandas.Series(
["2021-04-17", "1999-12-31", "2038-01-19"], dtype="dbdate",
),
}
),
[{"name": "date_col", "type": "DATE"}],
False,
)
)


@pytest.mark.parametrize(
["input_df", "table_schema", "skip_csv"], DATAFRAME_ROUND_TRIPS
)
def test_dataframe_round_trip_with_table_schema(
method_under_test,
random_dataset_id,
bigquery_client,
input_df,
table_schema,
api_method,
skip_csv,
):
if api_method == "load_csv" and skip_csv:
pytest.skip("Loading with CSV not supported.")
table_id = f"{random_dataset_id}.round_trip_w_schema_{random.randrange(1_000_000)}"
method_under_test(
input_df, table_id, table_schema=table_schema, api_method=api_method
)
round_trip = bigquery_client.list_rows(table_id).to_dataframe(
dtypes=dict(zip(input_df.columns, input_df.dtypes))
)
# TODO: Need to sort by row number before comparing.
pandas.testing.assert_frame_equal(input_df, round_trip)
# round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True)
# pandas.testing.assert_series_equal(
# round_trip_series, input_series, check_exact=True,
# )