@@ -32,7 +32,7 @@ class DatabaseError(IOError):
32
32
33
33
34
34
#------------------------------------------------------------------------------
35
- # Helper functions
35
+ #--- Helper functions
36
36
37
37
_SQLALCHEMY_INSTALLED = None
38
38
@@ -115,6 +115,21 @@ def _parse_date_columns(data_frame, parse_dates):
115
115
return data_frame
116
116
117
117
118
+ def _wrap_result (data , columns , index_col = None , coerce_float = True ,
119
+ parse_dates = None ):
120
+ """Wrap result set of query in a DataFrame """
121
+
122
+ frame = DataFrame .from_records (data , columns = columns ,
123
+ coerce_float = coerce_float )
124
+
125
+ _parse_date_columns (frame , parse_dates )
126
+
127
+ if index_col is not None :
128
+ frame .set_index (index_col , inplace = True )
129
+
130
+ return frame
131
+
132
+
118
133
def execute (sql , con , cur = None , params = None ):
119
134
"""
120
135
Execute the given SQL query using the provided connection object.
@@ -262,7 +277,8 @@ def uquery(sql, con=None, cur=None, retry=True, params=None):
262
277
#--- Read and write to DataFrames
263
278
264
279
def read_sql_table (table_name , con , schema = None , index_col = None ,
265
- coerce_float = True , parse_dates = None , columns = None ):
280
+ coerce_float = True , parse_dates = None , columns = None ,
281
+ chunksize = None ):
266
282
"""Read SQL database table into a DataFrame.
267
283
268
284
Given a table name and an SQLAlchemy engine, returns a DataFrame.
@@ -293,6 +309,9 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
293
309
such as SQLite
294
310
columns : list
295
311
List of column names to select from sql table
312
+ chunksize : int, default None
313
+ If specified, return an iterator where `chunksize` is the number of
314
+ rows to include in each chunk.
296
315
297
316
Returns
298
317
-------
@@ -318,7 +337,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
318
337
pandas_sql = SQLDatabase (con , meta = meta )
319
338
table = pandas_sql .read_table (
320
339
table_name , index_col = index_col , coerce_float = coerce_float ,
321
- parse_dates = parse_dates , columns = columns )
340
+ parse_dates = parse_dates , columns = columns , chunksize = chunksize )
322
341
323
342
if table is not None :
324
343
return table
@@ -327,7 +346,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
327
346
328
347
329
348
def read_sql_query (sql , con , index_col = None , coerce_float = True , params = None ,
330
- parse_dates = None ):
349
+ parse_dates = None , chunksize = None ):
331
350
"""Read SQL query into a DataFrame.
332
351
333
352
Returns a DataFrame corresponding to the result set of the query
@@ -362,6 +381,9 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
362
381
to the keyword arguments of :func:`pandas.to_datetime`
363
382
Especially useful with databases without native Datetime support,
364
383
such as SQLite
384
+ chunksize : int, default None
385
+ If specified, return an iterator where `chunksize` is the number of
386
+ rows to include in each chunk.
365
387
366
388
Returns
367
389
-------
@@ -376,11 +398,11 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
376
398
pandas_sql = pandasSQL_builder (con )
377
399
return pandas_sql .read_query (
378
400
sql , index_col = index_col , params = params , coerce_float = coerce_float ,
379
- parse_dates = parse_dates )
401
+ parse_dates = parse_dates , chunksize = chunksize )
380
402
381
403
382
404
def read_sql (sql , con , index_col = None , coerce_float = True , params = None ,
383
- parse_dates = None , columns = None ):
405
+ parse_dates = None , columns = None , chunksize = None ):
384
406
"""
385
407
Read SQL query or database table into a DataFrame.
386
408
@@ -415,6 +437,9 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
415
437
columns : list
416
438
List of column names to select from sql table (only used when reading
417
439
a table).
440
+ chunksize : int, default None
441
+ If specified, return an iterator where `chunksize` is the
442
+ number of rows to include in each chunk.
418
443
419
444
Returns
420
445
-------
@@ -438,7 +463,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
438
463
if isinstance (pandas_sql , SQLiteDatabase ):
439
464
return pandas_sql .read_query (
440
465
sql , index_col = index_col , params = params ,
441
- coerce_float = coerce_float , parse_dates = parse_dates )
466
+ coerce_float = coerce_float , parse_dates = parse_dates ,
467
+ chunksize = chunksize )
442
468
443
469
try :
444
470
_is_table_name = pandas_sql .has_table (sql )
@@ -449,11 +475,12 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
449
475
pandas_sql .meta .reflect (only = [sql ])
450
476
return pandas_sql .read_table (
451
477
sql , index_col = index_col , coerce_float = coerce_float ,
452
- parse_dates = parse_dates , columns = columns )
478
+ parse_dates = parse_dates , columns = columns , chunksize = chunksize )
453
479
else :
454
480
return pandas_sql .read_query (
455
481
sql , index_col = index_col , params = params ,
456
- coerce_float = coerce_float , parse_dates = parse_dates )
482
+ coerce_float = coerce_float , parse_dates = parse_dates ,
483
+ chunksize = chunksize )
457
484
458
485
459
486
def to_sql (frame , name , con , flavor = 'sqlite' , schema = None , if_exists = 'fail' ,
@@ -684,7 +711,27 @@ def insert(self, chunksize=None):
684
711
chunk_iter = zip (* [arr [start_i :end_i ] for arr in data_list ])
685
712
self ._execute_insert (conn , keys , chunk_iter )
686
713
687
- def read (self , coerce_float = True , parse_dates = None , columns = None ):
714
+ def _query_iterator (self , result , chunksize , columns , coerce_float = True ,
715
+ parse_dates = None ):
716
+ """Return generator through chunked result set"""
717
+
718
+ while True :
719
+ data = result .fetchmany (chunksize )
720
+ if not data :
721
+ break
722
+ else :
723
+ self .frame = DataFrame .from_records (
724
+ data , columns = columns , coerce_float = coerce_float )
725
+
726
+ self ._harmonize_columns (parse_dates = parse_dates )
727
+
728
+ if self .index is not None :
729
+ self .frame .set_index (self .index , inplace = True )
730
+
731
+ yield self .frame
732
+
733
+ def read (self , coerce_float = True , parse_dates = None , columns = None ,
734
+ chunksize = None ):
688
735
689
736
if columns is not None and len (columns ) > 0 :
690
737
from sqlalchemy import select
@@ -696,18 +743,23 @@ def read(self, coerce_float=True, parse_dates=None, columns=None):
696
743
sql_select = self .table .select ()
697
744
698
745
result = self .pd_sql .execute (sql_select )
699
- data = result .fetchall ()
700
746
column_names = result .keys ()
701
747
702
- self .frame = DataFrame .from_records (
703
- data , columns = column_names , coerce_float = coerce_float )
748
+ if chunksize is not None :
749
+ return self ._query_iterator (result , chunksize , column_names ,
750
+ coerce_float = coerce_float ,
751
+ parse_dates = parse_dates )
752
+ else :
753
+ data = result .fetchall ()
754
+ self .frame = DataFrame .from_records (
755
+ data , columns = column_names , coerce_float = coerce_float )
704
756
705
- self ._harmonize_columns (parse_dates = parse_dates )
757
+ self ._harmonize_columns (parse_dates = parse_dates )
706
758
707
- if self .index is not None :
708
- self .frame .set_index (self .index , inplace = True )
759
+ if self .index is not None :
760
+ self .frame .set_index (self .index , inplace = True )
709
761
710
- return self .frame
762
+ return self .frame
711
763
712
764
def _index_name (self , index , index_label ):
713
765
# for writing: index=True to include index in sql table
@@ -898,8 +950,8 @@ class SQLDatabase(PandasSQL):
898
950
Parameters
899
951
----------
900
952
engine : SQLAlchemy engine
901
- Engine to connect with the database. Using SQLAlchemy makes it possible to use any DB supported by that
902
- library.
953
+ Engine to connect with the database. Using SQLAlchemy makes it
954
+ possible to use any DB supported by that library.
903
955
schema : string, default None
904
956
Name of SQL schema in database to write to (if database flavor
905
957
supports this). If None, use default schema (default).
@@ -926,25 +978,27 @@ def execute(self, *args, **kwargs):
926
978
return self .engine .execute (* args , ** kwargs )
927
979
928
980
def read_table (self , table_name , index_col = None , coerce_float = True ,
929
- parse_dates = None , columns = None , schema = None ):
981
+ parse_dates = None , columns = None , schema = None ,
982
+ chunksize = None ):
930
983
"""Read SQL database table into a DataFrame.
931
-
984
+
932
985
Parameters
933
986
----------
934
987
table_name : string
935
988
Name of SQL table in database
936
989
index_col : string, optional
937
990
Column to set as index
938
991
coerce_float : boolean, default True
939
- Attempt to convert values to non-string, non-numeric objects (like
940
- decimal.Decimal) to floating point. Can result in loss of Precision.
992
+ Attempt to convert values to non-string, non-numeric objects
993
+ (like decimal.Decimal) to floating point. This can result in
994
+ loss of precision.
941
995
parse_dates : list or dict
942
996
- List of column names to parse as dates
943
997
- Dict of ``{column_name: format string}`` where format string is
944
998
strftime compatible in case of parsing string times or is one of
945
999
(D, s, ns, ms, us) in case of parsing integer timestamps
946
- - Dict of ``{column_name: arg dict }``, where the arg dict corresponds
947
- to the keyword arguments of :func:`pandas.to_datetime`
1000
+ - Dict of ``{column_name: arg}``, where the arg corresponds
1001
+ to the keyword arguments of :func:`pandas.to_datetime`.
948
1002
Especially useful with databases without native Datetime support,
949
1003
such as SQLite
950
1004
columns : list
@@ -953,6 +1007,9 @@ def read_table(self, table_name, index_col=None, coerce_float=True,
953
1007
Name of SQL schema in database to query (if database flavor
954
1008
supports this). If specified, this overwrites the default
955
1009
schema of the SQLDatabase object.
1010
+ chunksize : int, default None
1011
+ If specified, return an iterator where `chunksize` is the number
1012
+ of rows to include in each chunk.
956
1013
957
1014
Returns
958
1015
-------
@@ -966,10 +1023,25 @@ def read_table(self, table_name, index_col=None, coerce_float=True,
966
1023
"""
967
1024
table = SQLTable (table_name , self , index = index_col , schema = schema )
968
1025
return table .read (coerce_float = coerce_float ,
969
- parse_dates = parse_dates , columns = columns )
970
-
1026
+ parse_dates = parse_dates , columns = columns ,
1027
+ chunksize = chunksize )
1028
+
1029
+ @staticmethod
1030
+ def _query_iterator (result , chunksize , columns , index_col = None ,
1031
+ coerce_float = True , parse_dates = None ):
1032
+ """Return generator through chunked result set"""
1033
+
1034
+ while True :
1035
+ data = result .fetchmany (chunksize )
1036
+ if not data :
1037
+ break
1038
+ else :
1039
+ yield _wrap_result (data , columns , index_col = index_col ,
1040
+ coerce_float = coerce_float ,
1041
+ parse_dates = parse_dates )
1042
+
971
1043
def read_query (self , sql , index_col = None , coerce_float = True ,
972
- parse_dates = None , params = None ):
1044
+ parse_dates = None , params = None , chunksize = None ):
973
1045
"""Read SQL query into a DataFrame.
974
1046
975
1047
Parameters
@@ -1006,30 +1078,31 @@ def read_query(self, sql, index_col=None, coerce_float=True,
1006
1078
read_sql_table : Read SQL database table into a DataFrame
1007
1079
read_sql
1008
1080
1009
- """
1081
+ """
1010
1082
args = _convert_params (sql , params )
1011
1083
1012
1084
result = self .execute (* args )
1013
- data = result .fetchall ()
1014
1085
columns = result .keys ()
1015
1086
1016
- data_frame = DataFrame .from_records (
1017
- data , columns = columns , coerce_float = coerce_float )
1018
-
1019
- _parse_date_columns (data_frame , parse_dates )
1020
-
1021
- if index_col is not None :
1022
- data_frame .set_index (index_col , inplace = True )
1087
+ if chunksize is not None :
1088
+ return self ._query_iterator (result , chunksize , columns ,
1089
+ index_col = index_col ,
1090
+ coerce_float = coerce_float ,
1091
+ parse_dates = parse_dates )
1092
+ else :
1093
+ data = result .fetchall ()
1094
+ frame = _wrap_result (data , columns , index_col = index_col ,
1095
+ coerce_float = coerce_float ,
1096
+ parse_dates = parse_dates )
1097
+ return frame
1023
1098
1024
- return data_frame
1025
-
1026
1099
read_sql = read_query
1027
1100
1028
1101
def to_sql (self , frame , name , if_exists = 'fail' , index = True ,
1029
1102
index_label = None , schema = None , chunksize = None ):
1030
1103
"""
1031
1104
Write records stored in a DataFrame to a SQL database.
1032
-
1105
+
1033
1106
Parameters
1034
1107
----------
1035
1108
frame : DataFrame
@@ -1308,23 +1381,42 @@ def execute(self, *args, **kwargs):
1308
1381
ex = DatabaseError ("Execution failed on sql '%s': %s" % (args [0 ], exc ))
1309
1382
raise_with_traceback (ex )
1310
1383
1384
+ @staticmethod
1385
+ def _query_iterator (cursor , chunksize , columns , index_col = None ,
1386
+ coerce_float = True , parse_dates = None ):
1387
+ """Return generator through chunked result set"""
1388
+
1389
+ while True :
1390
+ data = cursor .fetchmany (chunksize )
1391
+ if not data :
1392
+ cursor .close ()
1393
+ break
1394
+ else :
1395
+ yield _wrap_result (data , columns , index_col = index_col ,
1396
+ coerce_float = coerce_float ,
1397
+ parse_dates = parse_dates )
1398
+
1311
1399
def read_query (self , sql , index_col = None , coerce_float = True , params = None ,
1312
- parse_dates = None ):
1400
+ parse_dates = None , chunksize = None ):
1401
+
1313
1402
args = _convert_params (sql , params )
1314
1403
cursor = self .execute (* args )
1315
1404
columns = [col_desc [0 ] for col_desc in cursor .description ]
1316
- data = self ._fetchall_as_list (cursor )
1317
- cursor .close ()
1318
1405
1319
- data_frame = DataFrame .from_records (
1320
- data , columns = columns , coerce_float = coerce_float )
1406
+ if chunksize is not None :
1407
+ return self ._query_iterator (cursor , chunksize , columns ,
1408
+ index_col = index_col ,
1409
+ coerce_float = coerce_float ,
1410
+ parse_dates = parse_dates )
1411
+ else :
1412
+ data = self ._fetchall_as_list (cursor )
1413
+ cursor .close ()
1321
1414
1322
- _parse_date_columns (data_frame , parse_dates )
1415
+ frame = _wrap_result (data , columns , index_col = index_col ,
1416
+ coerce_float = coerce_float ,
1417
+ parse_dates = parse_dates )
1418
+ return frame
1323
1419
1324
- if index_col is not None :
1325
- data_frame .set_index (index_col , inplace = True )
1326
- return data_frame
1327
-
1328
1420
def _fetchall_as_list (self , cur ):
1329
1421
result = cur .fetchall ()
1330
1422
if not isinstance (result , list ):
0 commit comments