Skip to content

Commit acfac47

Browse files
committed
handle repeated fields in schema
1 parent bb68875 commit acfac47

File tree

2 files changed

+18
-11
lines changed

2 files changed

+18
-11
lines changed

pandas_gbq/gbq.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import time
55
import warnings
6+
from collections import OrderedDict
67
from datetime import datetime
78

89
import numpy as np
@@ -442,19 +443,25 @@ def _get_credentials_file():
442443
'PANDAS_GBQ_CREDENTIALS_FILE')
443444

444445

445-
def _parse_data(schema, rows):
446+
def _parse_schema(schema_fields):
446447
# see:
447448
# http://pandas.pydata.org/pandas-docs/dev/missing_data.html
448449
# #missing-data-casting-rules-and-indexing
449450
dtype_map = {'FLOAT': np.dtype(float),
450451
'TIMESTAMP': 'M8[ns]'}
451452

452-
fields = schema['fields']
453+
for field in schema_fields:
454+
name = str(field['name'])
455+
if field['mode'].upper() == 'REPEATED':
456+
yield name, object
457+
else:
458+
dtype = dtype_map.get(field['type'].upper(), object)
459+
yield name, dtype
460+
461+
462+
def _parse_data(schema, rows):
453463

454-
column_dtypes = {
455-
str(field['name']):
456-
dtype_map.get(field['type'].upper(), object) for field in fields
457-
}
464+
column_dtypes = OrderedDict(_parse_schema(schema['fields']))
458465

459466
df = DataFrame(data=(iter(r) for r in rows), columns=column_dtypes.keys())
460467
for column in df:

tests/system/test_gbq.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,14 @@
1313

1414
from pandas_gbq import gbq
1515

16-
1716
TABLE_ID = 'new_test'
1817

1918

2019
def _get_dataset_prefix_random():
2120
return ''.join(['pandas_gbq_', str(randint(1, 100000))])
2221

2322

24-
@pytest.fixture(autouse=True, scope='module')
25-
def _test_imports():
23+
def test_imports():
2624
try:
2725
import pkg_resources # noqa
2826
except ImportError:
@@ -392,7 +390,8 @@ def test_bad_project_id(self):
392390
with pytest.raises(gbq.GenericGBQException):
393391
gbq.read_gbq('SELCET * FROM [publicdata:samples.shakespeare]',
394392
project_id='not-my-project',
395-
private_key=self.credentials)
393+
private_key=self.credentials,
394+
dialect='legacy')
396395

397396
def test_bad_table_name(self, project_id):
398397
with pytest.raises(gbq.GenericGBQException):
@@ -427,7 +426,7 @@ def test_zero_rows(self, project_id):
427426
('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')])
428427
expected_result = DataFrame(
429428
page_array, columns=['title', 'id', 'is_bot', 'ts'])
430-
tm.assert_frame_equal(df, expected_result)
429+
tm.assert_frame_equal(df, expected_result, check_index_type=False)
431430

432431
def test_legacy_sql(self, project_id):
433432
legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"
@@ -655,6 +654,7 @@ def test_array_agg(self, project_id):
655654
tm.assert_frame_equal(df, DataFrame([["a", [1, 3]], ["b", [2]]],
656655
columns=["letter", "numbers"]))
657656

657+
# @pytest.mark.xfail
658658
def test_array_of_floats(self, private_key_path, project_id):
659659
query = """select [1.1, 2.2, 3.3] as a, 4 as b"""
660660
df = gbq.read_gbq(query, project_id=project_id,

0 commit comments

Comments
 (0)