handle repeated fields in schema

max-sixty · max-sixty · commit acfac4769a7e · 2018-08-22T02:18:25.000-04:00
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
@@ -3,6 +3,7 @@
 import os
 import time
 import warnings
+from collections import OrderedDict
 from datetime import datetime
 
 import numpy as np
@@ -442,19 +443,25 @@ def _get_credentials_file():
         'PANDAS_GBQ_CREDENTIALS_FILE')
 
 
-def _parse_data(schema, rows):
+def _parse_schema(schema_fields):
     # see:
     # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     # #missing-data-casting-rules-and-indexing
     dtype_map = {'FLOAT': np.dtype(float),
                  'TIMESTAMP': 'M8[ns]'}
 
-    fields = schema['fields']
+    for field in schema_fields:
+        name = str(field['name'])
+        if field['mode'].upper() == 'REPEATED':
+            yield name, object
+        else:
+            dtype = dtype_map.get(field['type'].upper(), object)
+            yield name, dtype
+
+
+def _parse_data(schema, rows):
 
-    column_dtypes = {
-        str(field['name']):
-        dtype_map.get(field['type'].upper(), object) for field in fields
-    }
+    column_dtypes = OrderedDict(_parse_schema(schema['fields']))
 
     df = DataFrame(data=(iter(r) for r in rows), columns=column_dtypes.keys())
     for column in df:
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
@@ -13,16 +13,14 @@
 
 from pandas_gbq import gbq
 
-
 TABLE_ID = 'new_test'
 
 
 def _get_dataset_prefix_random():
     return ''.join(['pandas_gbq_', str(randint(1, 100000))])
 
 
-@pytest.fixture(autouse=True, scope='module')
-def _test_imports():
+def test_imports():
     try:
         import pkg_resources  # noqa
     except ImportError:
@@ -392,7 +390,8 @@ def test_bad_project_id(self):
         with pytest.raises(gbq.GenericGBQException):
             gbq.read_gbq('SELCET * FROM [publicdata:samples.shakespeare]',
                          project_id='not-my-project',
-                         private_key=self.credentials)
+                         private_key=self.credentials,
+                         dialect='legacy')
 
     def test_bad_table_name(self, project_id):
         with pytest.raises(gbq.GenericGBQException):
@@ -427,7 +426,7 @@ def test_zero_rows(self, project_id):
                          ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')])
         expected_result = DataFrame(
             page_array, columns=['title', 'id', 'is_bot', 'ts'])
-        tm.assert_frame_equal(df, expected_result)
+        tm.assert_frame_equal(df, expected_result, check_index_type=False)
 
     def test_legacy_sql(self, project_id):
         legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"
@@ -655,6 +654,7 @@ def test_array_agg(self, project_id):
         tm.assert_frame_equal(df, DataFrame([["a", [1, 3]], ["b", [2]]],
                                             columns=["letter", "numbers"]))
 
+    # @pytest.mark.xfail
     def test_array_of_floats(self, private_key_path, project_id):
         query = """select [1.1, 2.2, 3.3] as a, 4 as b"""
         df = gbq.read_gbq(query, project_id=project_id,