Skip to content

Commit a9075df

Browse files
committed
boost coverage
1 parent 6704991 commit a9075df

File tree

3 files changed

+64
-1
lines changed

3 files changed

+64
-1
lines changed

pandas_gbq/load.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ def load_csv_from_file(
185185
chunksize: Optional[int],
186186
schema: Optional[Dict[str, Any]],
187187
):
188+
"""Manually encode a DataFrame to CSV and use the buffer in a load job.
189+
190+
This method is needed for writing with google-cloud-bigquery versions that
191+
don't implment load_table_from_dataframe with the CSV serialization format.
192+
"""
188193
if schema is None:
189194
schema = pandas_gbq.schema.generate_bq_schema(dataframe)
190195

@@ -203,7 +208,7 @@ def load_chunk(chunk, job_config):
203208
finally:
204209
chunk_buffer.close()
205210

206-
return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
211+
return load_csv(dataframe, chunksize, bq_schema, load_chunk)
207212

208213

209214
def load_chunks(

pandas_gbq/schema.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ def generate_bq_schema(dataframe, default_type="STRING"):
101101
"S": "STRING",
102102
"U": "STRING",
103103
"M": "TIMESTAMP",
104+
# TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
105+
# localized.
104106
}
105107

106108
fields = []

tests/unit/test_load.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,62 @@ def test_encode_chunks_with_chunksize_none():
9595
assert len(chunk.index) == 6
9696

9797

98+
def test_load_csv_from_file_generates_schema(mock_bigquery_client):
99+
import google.cloud.bigquery
100+
101+
df = pandas.DataFrame(
102+
{
103+
"int_col": [1, 2, 3],
104+
"bool_col": [True, False, True],
105+
"float_col": [0.0, 1.25, -2.75],
106+
"string_col": ["a", "b", "c"],
107+
"datetime_col": pandas.Series(
108+
[
109+
"2021-12-21 13:28:40.123789",
110+
"2000-01-01 11:10:09",
111+
"2040-10-31 23:59:59.999999",
112+
],
113+
dtype="datetime64[ns]",
114+
),
115+
"timestamp_col": pandas.Series(
116+
[
117+
"2021-12-21 13:28:40.123789",
118+
"2000-01-01 11:10:09",
119+
"2040-10-31 23:59:59.999999",
120+
],
121+
dtype="datetime64[ns]",
122+
).dt.tz_localize(datetime.timezone.utc),
123+
}
124+
)
125+
destination = google.cloud.bigquery.TableReference.from_string(
126+
"my-project.my_dataset.my_table"
127+
)
128+
129+
_ = list(
130+
load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None)
131+
)
132+
133+
mock_load = mock_bigquery_client.load_table_from_file
134+
assert mock_load.called
135+
_, kwargs = mock_load.call_args
136+
assert "job_config" in kwargs
137+
sent_schema = kwargs["job_config"].schema
138+
assert sent_schema[0].name == "int_col"
139+
assert sent_schema[0].field_type == "INTEGER"
140+
assert sent_schema[1].name == "bool_col"
141+
assert sent_schema[1].field_type == "BOOLEAN"
142+
assert sent_schema[2].name == "float_col"
143+
assert sent_schema[2].field_type == "FLOAT"
144+
assert sent_schema[3].name == "string_col"
145+
assert sent_schema[3].field_type == "STRING"
146+
# TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
147+
# localized.
148+
assert sent_schema[4].name == "datetime_col"
149+
assert sent_schema[4].field_type == "TIMESTAMP"
150+
assert sent_schema[5].name == "timestamp_col"
151+
assert sent_schema[5].field_type == "TIMESTAMP"
152+
153+
98154
@pytest.mark.parametrize(
99155
["bigquery_has_from_dataframe_with_csv", "api_method"],
100156
[(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")],

0 commit comments

Comments
 (0)