1
1
from typing import Dict , Tuple , List , Optional , Any , Union , Sequence
2
2
3
3
import pandas
4
- import pyarrow
4
+ try :
5
+ import pyarrow
6
+ except ImportError :
7
+ pyarrow = None
5
8
import requests
6
9
import json
7
10
import os
22
25
ParamEscaper ,
23
26
inject_parameters ,
24
27
transform_paramstyle ,
28
+ ColumnTable ,
29
+ ColumnQueue
25
30
)
26
31
from databricks .sql .parameters .native import (
27
32
DbsqlParameterBase ,
@@ -991,14 +996,14 @@ def fetchmany(self, size: int) -> List[Row]:
991
996
else :
992
997
raise Error ("There is no active result set" )
993
998
994
- def fetchall_arrow (self ) -> pyarrow .Table :
999
+ def fetchall_arrow (self ) -> " pyarrow.Table" :
995
1000
self ._check_not_closed ()
996
1001
if self .active_result_set :
997
1002
return self .active_result_set .fetchall_arrow ()
998
1003
else :
999
1004
raise Error ("There is no active result set" )
1000
1005
1001
- def fetchmany_arrow (self , size ) -> pyarrow .Table :
1006
+ def fetchmany_arrow (self , size ) -> " pyarrow.Table" :
1002
1007
self ._check_not_closed ()
1003
1008
if self .active_result_set :
1004
1009
return self .active_result_set .fetchmany_arrow (size )
@@ -1143,6 +1148,18 @@ def _fill_results_buffer(self):
1143
1148
self .results = results
1144
1149
self .has_more_rows = has_more_rows
1145
1150
1151
+ def _convert_columnar_table (self , table ):
1152
+ column_names = [c [0 ] for c in self .description ]
1153
+ ResultRow = Row (* column_names )
1154
+ result = []
1155
+ for row_index in range (table .num_rows ):
1156
+ curr_row = []
1157
+ for col_index in range (table .num_columns ):
1158
+ curr_row .append (table .get_item (col_index , row_index ))
1159
+ result .append (ResultRow (* curr_row ))
1160
+
1161
+ return result
1162
+
1146
1163
def _convert_arrow_table (self , table ):
1147
1164
column_names = [c [0 ] for c in self .description ]
1148
1165
ResultRow = Row (* column_names )
@@ -1185,7 +1202,7 @@ def _convert_arrow_table(self, table):
1185
1202
def rownumber (self ):
1186
1203
return self ._next_row_index
1187
1204
1188
- def fetchmany_arrow (self , size : int ) -> pyarrow .Table :
1205
+ def fetchmany_arrow (self , size : int ) -> " pyarrow.Table" :
1189
1206
"""
1190
1207
Fetch the next set of rows of a query result, returning a PyArrow table.
1191
1208
@@ -1210,7 +1227,46 @@ def fetchmany_arrow(self, size: int) -> pyarrow.Table:
1210
1227
1211
1228
return results
1212
1229
1213
- def fetchall_arrow (self ) -> pyarrow .Table :
1230
+ def merge_columnar (self , result1 , result2 ):
1231
+ """
1232
+ Function to merge / combining the columnar results into a single result
1233
+ :param result1:
1234
+ :param result2:
1235
+ :return:
1236
+ """
1237
+
1238
+ if result1 .column_names != result2 .column_names :
1239
+ raise ValueError ("The columns in the results don't match" )
1240
+
1241
+ merged_result = [result1 .column_table [i ] + result2 .column_table [i ] for i in range (result1 .num_columns )]
1242
+ return ColumnTable (merged_result , result1 .column_names )
1243
+
1244
+ def fetchmany_columnar (self , size : int ):
1245
+ """
1246
+ Fetch the next set of rows of a query result, returning a Columnar Table.
1247
+ An empty sequence is returned when no more rows are available.
1248
+ """
1249
+ if size < 0 :
1250
+ raise ValueError ("size argument for fetchmany is %s but must be >= 0" , size )
1251
+
1252
+ results = self .results .next_n_rows (size )
1253
+ n_remaining_rows = size - results .num_rows
1254
+ self ._next_row_index += results .num_rows
1255
+
1256
+ while (
1257
+ n_remaining_rows > 0
1258
+ and not self .has_been_closed_server_side
1259
+ and self .has_more_rows
1260
+ ):
1261
+ self ._fill_results_buffer ()
1262
+ partial_results = self .results .next_n_rows (n_remaining_rows )
1263
+ results = self .merge_columnar (results , partial_results )
1264
+ n_remaining_rows -= partial_results .num_rows
1265
+ self ._next_row_index += partial_results .num_rows
1266
+
1267
+ return results
1268
+
1269
+ def fetchall_arrow (self ) -> "pyarrow.Table" :
1214
1270
"""Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
1215
1271
results = self .results .remaining_rows ()
1216
1272
self ._next_row_index += results .num_rows
@@ -1223,12 +1279,30 @@ def fetchall_arrow(self) -> pyarrow.Table:
1223
1279
1224
1280
return results
1225
1281
1282
+ def fetchall_columnar (self ):
1283
+ """Fetch all (remaining) rows of a query result, returning them as a Columnar table."""
1284
+ results = self .results .remaining_rows ()
1285
+ self ._next_row_index += results .num_rows
1286
+
1287
+ while not self .has_been_closed_server_side and self .has_more_rows :
1288
+ self ._fill_results_buffer ()
1289
+ partial_results = self .results .remaining_rows ()
1290
+ results = self .merge_columnar (results , partial_results )
1291
+ self ._next_row_index += partial_results .num_rows
1292
+
1293
+ return results
1294
+
1226
1295
def fetchone (self ) -> Optional [Row ]:
1227
1296
"""
1228
1297
Fetch the next row of a query result set, returning a single sequence,
1229
1298
or None when no more data is available.
1230
1299
"""
1231
- res = self ._convert_arrow_table (self .fetchmany_arrow (1 ))
1300
+
1301
+ if isinstance (self .results , ColumnQueue ):
1302
+ res = self ._convert_columnar_table (self .fetchmany_columnar (1 ))
1303
+ else :
1304
+ res = self ._convert_arrow_table (self .fetchmany_arrow (1 ))
1305
+
1232
1306
if len (res ) > 0 :
1233
1307
return res [0 ]
1234
1308
else :
@@ -1238,15 +1312,21 @@ def fetchall(self) -> List[Row]:
1238
1312
"""
1239
1313
Fetch all (remaining) rows of a query result, returning them as a list of rows.
1240
1314
"""
1241
- return self ._convert_arrow_table (self .fetchall_arrow ())
1315
+ if isinstance (self .results , ColumnQueue ):
1316
+ return self ._convert_columnar_table (self .fetchall_columnar ())
1317
+ else :
1318
+ return self ._convert_arrow_table (self .fetchall_arrow ())
1242
1319
1243
1320
def fetchmany (self , size : int ) -> List [Row ]:
1244
1321
"""
1245
1322
Fetch the next set of rows of a query result, returning a list of rows.
1246
1323
1247
1324
An empty sequence is returned when no more rows are available.
1248
1325
"""
1249
- return self ._convert_arrow_table (self .fetchmany_arrow (size ))
1326
+ if isinstance (self .results , ColumnQueue ):
1327
+ return self ._convert_columnar_table (self .fetchmany_columnar (size ))
1328
+ else :
1329
+ return self ._convert_arrow_table (self .fetchmany_arrow (size ))
1250
1330
1251
1331
def close (self ) -> None :
1252
1332
"""
0 commit comments