Skip to content

Commit 045cecf

Browse files
committed
Allow newlines in data passed to to_gbq()
1 parent 5d0346a commit 045cecf

File tree

4 files changed

+52
-0
lines changed

4 files changed

+52
-0
lines changed

docs/source/changelog.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Changelog
22
=========
33

4+
- Allow newlines in data passed to ``to_gbq``. (:issue:`180`)
5+
46
.. _changelog-0.7.0:
57

68
0.7.0 / 2018-10-19

pandas_gbq/load.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def load_chunks(
6161
job_config = bigquery.LoadJobConfig()
6262
job_config.write_disposition = "WRITE_APPEND"
6363
job_config.source_format = "CSV"
64+
job_config.allow_quoted_newlines = True
6465

6566
if schema is None:
6667
schema = pandas_gbq.schema.generate_bq_schema(dataframe)

tests/system/test_gbq.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,39 @@ def test_upload_mixed_float_and_int(self, project_id):
11671167

11681168
assert len(result_df) == test_size
11691169

1170+
def test_upload_data_with_newlines(self, project_id):
1171+
test_id = "data_with_newlines"
1172+
test_size = 2
1173+
df = DataFrame(
1174+
{
1175+
"s": ["abcd", "ef\ngh"],
1176+
}
1177+
)
1178+
1179+
gbq.to_gbq(
1180+
df,
1181+
self.destination_table + test_id,
1182+
project_id=project_id,
1183+
private_key=self.credentials,
1184+
)
1185+
1186+
result_df = gbq.read_gbq(
1187+
"SELECT * FROM {0}".format(self.destination_table + test_id),
1188+
project_id=project_id,
1189+
private_key=self.credentials,
1190+
dialect="legacy",
1191+
)
1192+
1193+
assert len(result_df) == test_size
1194+
1195+
if sys.version_info.major < 3:
1196+
pytest.skip(msg="Unicode comparison in Py2 not working")
1197+
1198+
result = result_df["s"].sort_values()
1199+
expected = df["s"].sort_values()
1200+
1201+
tm.assert_numpy_array_equal(expected.values, result.values)
1202+
11701203
def test_upload_data_flexible_column_order(self, project_id):
11711204
test_id = "13"
11721205
test_size = 10

tests/unit/test_load.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,22 @@ def test_encode_chunk_with_floats():
3737
assert "1.05153" in csv_string
3838

3939

40+
def test_encode_chunk_with_newlines():
41+
"""See: https://github.com/pydata/pandas-gbq/issues/180
42+
"""
43+
df = pandas.DataFrame(
44+
{
45+
"s": ["abcd", "ef\ngh", "ij\r\nkl"],
46+
}
47+
)
48+
csv_buffer = load.encode_chunk(df)
49+
csv_bytes = csv_buffer.read()
50+
csv_string = csv_bytes.decode("utf-8")
51+
assert 'abcd' in csv_string
52+
assert '"ef\ngh"' in csv_string
53+
assert '"ij\r\nkl"' in csv_string
54+
55+
4056
def test_encode_chunks_splits_dataframe():
4157
df = pandas.DataFrame(numpy.random.randn(6, 4), index=range(6))
4258
chunks = list(load.encode_chunks(df, chunksize=2))

0 commit comments

Comments
 (0)