15
15
import pandas .core .common as com
16
16
from pandas .compat import lzip , map , zip , raise_with_traceback , string_types
17
17
from pandas .core .api import DataFrame , Series
18
+ from pandas .core .common import notnull , isnull
18
19
from pandas .core .base import PandasObject
19
20
from pandas .tseries .tools import to_datetime
20
21
@@ -598,12 +599,6 @@ def create(self):
598
599
def insert_statement (self ):
599
600
return self .table .insert ()
600
601
601
- def maybe_asscalar (self , i ):
602
- try :
603
- return np .asscalar (i )
604
- except AttributeError :
605
- return i
606
-
607
602
def insert_data (self ):
608
603
if self .index is not None :
609
604
temp = self .frame .copy ()
@@ -615,16 +610,36 @@ def insert_data(self):
615
610
"duplicate name in index/columns: {0}" .format (err ))
616
611
else :
617
612
temp = self .frame
613
+
614
+ column_names = list (map (str , temp .columns ))
615
+ ncols = len (column_names )
616
+ data_list = [None ] * ncols
617
+ blocks = temp ._data .blocks
618
+
619
+ for i in range (len (blocks )):
620
+ b = blocks [i ]
621
+ if b .is_datetime :
622
+ # convert to microsecond resolution so this yields datetime.datetime
623
+ d = b .values .astype ('M8[us]' ).astype (object )
624
+ else :
625
+ d = np .array (b .values , dtype = object )
626
+
627
+ # replace NaN with None
628
+ if b ._can_hold_na :
629
+ mask = isnull (d )
630
+ d [mask ] = None
618
631
619
- return temp
632
+ for col_loc , col in zip (b .mgr_locs , d ):
633
+ data_list [col_loc ] = col
634
+
635
+ return column_names , data_list
620
636
621
637
def insert (self , chunksize = None ):
622
638
623
639
ins = self .insert_statement ()
624
- temp = self .insert_data ()
625
- keys = list (map (str , temp .columns ))
640
+ keys , data_list = self .insert_data ()
626
641
627
- nrows = len (temp )
642
+ nrows = len (self . frame )
628
643
if chunksize is None :
629
644
chunksize = nrows
630
645
chunks = int (nrows / chunksize ) + 1
@@ -636,12 +651,11 @@ def insert(self, chunksize=None):
636
651
end_i = min ((i + 1 ) * chunksize , nrows )
637
652
if start_i >= end_i :
638
653
break
639
- data_list = []
640
- for t in temp .iloc [start_i :end_i ].itertuples ():
641
- data = dict ((k , self .maybe_asscalar (v ))
642
- for k , v in zip (keys , t [1 :]))
643
- data_list .append (data )
644
- con .execute (ins , data_list )
654
+
655
+ chunk_list = [arr [start_i :end_i ] for arr in data_list ]
656
+ insert_list = [dict ((k , v ) for k , v in zip (keys , row ))
657
+ for row in zip (* chunk_list )]
658
+ con .execute (ins , insert_list )
645
659
646
660
def read (self , coerce_float = True , parse_dates = None , columns = None ):
647
661
@@ -758,12 +772,12 @@ def _harmonize_columns(self, parse_dates=None):
758
772
759
773
elif col_type is float :
760
774
# floats support NA, can always convert!
761
- self .frame [col_name ].astype (col_type , copy = False )
775
+ self .frame [col_name ] = df_col .astype (col_type , copy = False )
762
776
763
777
elif len (df_col ) == df_col .count ():
764
778
# No NA values, can convert ints and bools
765
- if col_type is int or col_type is bool :
766
- self .frame [col_name ].astype (col_type , copy = False )
779
+ if col_type is np . dtype ( 'int64' ) or col_type is bool :
780
+ self .frame [col_name ] = df_col .astype (col_type , copy = False )
767
781
768
782
# Handle date parsing
769
783
if col_name in parse_dates :
@@ -813,7 +827,7 @@ def _numpy_type(self, sqltype):
813
827
return float
814
828
if isinstance (sqltype , Integer ):
815
829
# TODO: Refine integer size.
816
- return int
830
+ return np . dtype ( 'int64' )
817
831
if isinstance (sqltype , DateTime ):
818
832
# Caution: np.datetime64 is also a subclass of np.number.
819
833
return datetime
@@ -1008,9 +1022,9 @@ def insert_statement(self):
1008
1022
def insert (self , chunksize = None ):
1009
1023
1010
1024
ins = self .insert_statement ()
1011
- temp = self .insert_data ()
1025
+ keys , data_list = self .insert_data ()
1012
1026
1013
- nrows = len (temp )
1027
+ nrows = len (self . frame )
1014
1028
if chunksize is None :
1015
1029
chunksize = nrows
1016
1030
chunks = int (nrows / chunksize ) + 1
@@ -1021,13 +1035,11 @@ def insert(self, chunksize=None):
1021
1035
end_i = min ((i + 1 ) * chunksize , nrows )
1022
1036
if start_i >= end_i :
1023
1037
break
1024
- data_list = []
1025
- for t in temp .iloc [start_i :end_i ].itertuples ():
1026
- data = tuple ((self .maybe_asscalar (v ) for v in t [1 :]))
1027
- data_list .append (data )
1028
-
1038
+ chunk_list = [arr [start_i :end_i ] for arr in data_list ]
1039
+ insert_list = [tuple ((v for v in row ))
1040
+ for row in zip (* chunk_list )]
1029
1041
cur = self .pd_sql .con .cursor ()
1030
- cur .executemany (ins , data_list )
1042
+ cur .executemany (ins , insert_list )
1031
1043
cur .close ()
1032
1044
1033
1045
def _create_table_setup (self ):
0 commit comments