Skip to content

Commit dc59749

Browse files
Merge pull request #6591 from jorisvandenbossche/sql-itertuples
BUG: replace iterrows with itertuples in sql insert (GH6509)
2 parents fe509a8 + 1127d70 commit dc59749

File tree

4 files changed

+34
-11
lines changed

4 files changed

+34
-11
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ Bug Fixes
227227
- Series.quantile raising on an ``object`` dtype (:issue:`6555`)
228228
- Bug in ``.xs`` with a ``nan`` in level when dropped (:issue:`6574`)
229229
- Bug in fillna with method = 'bfill/ffill' and ``datetime64[ns]`` dtype (:issue:`6587`)
230+
- Bug in sql writing with mixed dtypes possibly leading to data loss (:issue:`6509`)
231+
230232

231233
pandas 0.13.1
232234
-------------

pandas/io/sql.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -423,16 +423,17 @@ def insert(self):
423423
ins = self.insert_statement()
424424
data_list = []
425425
# to avoid if check for every row
426+
keys = self.frame.columns
426427
if self.index is not None:
427-
for t in self.frame.iterrows():
428+
for t in self.frame.itertuples():
428429
data = dict((k, self.maybe_asscalar(v))
429-
for k, v in t[1].iteritems())
430+
for k, v in zip(keys, t[1:]))
430431
data[self.index] = self.maybe_asscalar(t[0])
431432
data_list.append(data)
432433
else:
433-
for t in self.frame.iterrows():
434+
for t in self.frame.itertuples():
434435
data = dict((k, self.maybe_asscalar(v))
435-
for k, v in t[1].iteritems())
436+
for k, v in zip(keys, t[1:]))
436437
data_list.append(data)
437438
self.pd_sql.execute(ins, data_list)
438439

@@ -758,8 +759,8 @@ def insert_statement(self):
758759
def insert(self):
759760
ins = self.insert_statement()
760761
cur = self.pd_sql.con.cursor()
761-
for r in self.frame.iterrows():
762-
data = [self.maybe_asscalar(v) for v in r[1].values]
762+
for r in self.frame.itertuples():
763+
data = [self.maybe_asscalar(v) for v in r[1:]]
763764
if self.index is not None:
764765
data.insert(0, self.maybe_asscalar(r[0]))
765766
cur.execute(ins, tuple(data))

pandas/io/tests/test_sql.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import nose
88
import numpy as np
99

10-
from pandas import DataFrame
10+
from pandas import DataFrame, Series
1111
from pandas.compat import range, lrange, iteritems
1212
#from pandas.core.datetools import format as date_format
1313

@@ -554,6 +554,18 @@ def test_date_parsing(self):
554554
self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
555555
"IntDateCol loaded with incorrect type")
556556

557+
def test_mixed_dtype_insert(self):
558+
# see GH6509
559+
s1 = Series(2**25 + 1,dtype=np.int32)
560+
s2 = Series(0.0,dtype=np.float32)
561+
df = DataFrame({'s1': s1, 's2': s2})
562+
563+
# write and read again
564+
df.to_sql("test_read_write", self.conn, index=False)
565+
df2 = sql.read_table("test_read_write", self.conn)
566+
567+
tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
568+
557569

558570
class TestSQLAlchemy(_TestSQLAlchemy):
559571
"""

pandas/util/testing.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -499,12 +499,18 @@ def is_sorted(seq):
499499
def assert_series_equal(left, right, check_dtype=True,
500500
check_index_type=False,
501501
check_series_type=False,
502-
check_less_precise=False):
502+
check_less_precise=False,
503+
check_exact=False):
503504
if check_series_type:
504505
assert_isinstance(left, type(right))
505506
if check_dtype:
506507
assert_attr_equal('dtype', left, right)
507-
assert_almost_equal(left.values, right.values, check_less_precise)
508+
if check_exact:
509+
if not np.array_equal(left.values, right.values):
510+
raise AssertionError('{0} is not equal to {1}.'.format(left.values,
511+
right.values))
512+
else:
513+
assert_almost_equal(left.values, right.values, check_less_precise)
508514
if check_less_precise:
509515
assert_almost_equal(
510516
left.index.values, right.index.values, check_less_precise)
@@ -522,7 +528,8 @@ def assert_frame_equal(left, right, check_dtype=True,
522528
check_frame_type=False,
523529
check_less_precise=False,
524530
check_names=True,
525-
by_blocks=False):
531+
by_blocks=False,
532+
check_exact=False):
526533
if check_frame_type:
527534
assert_isinstance(left, type(right))
528535
assert_isinstance(left, DataFrame)
@@ -555,7 +562,8 @@ def assert_frame_equal(left, right, check_dtype=True,
555562
assert_series_equal(lcol, rcol,
556563
check_dtype=check_dtype,
557564
check_index_type=check_index_type,
558-
check_less_precise=check_less_precise)
565+
check_less_precise=check_less_precise,
566+
check_exact=check_exact)
559567

560568
if check_index_type:
561569
assert_isinstance(left.index, type(right.index))

0 commit comments

Comments
 (0)