From 0c14a1aedc5bcc24123d3a41d7c9342a9ecb2da8 Mon Sep 17 00:00:00 2001 From: davidshinn Date: Fri, 19 Jul 2013 23:18:54 -0400 Subject: [PATCH 1/7] ENH/TST: Raise error if invalid value passed to if_exists argument The if_exists argument in io.sql.write_frame needed data validation because the logic of the function implicitly used 'append' if the argument value was any string that was not either 'fail' or 'replace'. I added a new unit test to support the requirement. --- pandas/io/sql.py | 4 ++++ pandas/io/tests/test_sql.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 11b139b620175..588ffa4b93933 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -196,6 +196,10 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): if_exists='append' else: if_exists='fail' + + if if_exists not in ('fail', 'replace', 'append'): + raise ValueError, "'%s' is not valid for if_exists" % if_exists + exists = table_exists(name, con, flavor) if if_exists == 'fail' and exists: raise ValueError, "Table '%s' already exists." % name diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 5b23bf173ec4e..0035bd2b79236 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -240,6 +240,24 @@ def test_onecolumn_of_integer(self): result = sql.read_frame("select * from mono_df",con_x) tm.assert_frame_equal(result,mono_df) + def test_if_exists(self): + df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']}) + df_if_exists_2 = DataFrame({'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']}) + table_name = 'table_if_exists' + + # test if invalid value for if_exists raises appropriate error + self.assertRaises(ValueError, + sql.write_frame, + frame=df_if_exists_1, + con=self.db, + name=table_name, + flavor='sqlite', + if_exists='notvalidvalue') + if sql.table_exists(table_name, self.db, flavor='sqlite'): + cur = self.db.cursor() + cur.execute("DROP TABLE %s" % table_name) + cur.close() + class TestMySQL(unittest.TestCase): From 28f9b74de206f1a7b5ff1d6e19da7db52badb623 Mon Sep 17 00:00:00 2001 From: davidshinn Date: Fri, 19 Jul 2013 23:44:41 -0400 Subject: [PATCH 2/7] BUG: Fix if_exists='replace' functionality in io.sql.write_frame This should resolve issues #2971 and #4110 --- pandas/io/sql.py | 4 ++-- pandas/io/tests/test_sql.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 588ffa4b93933..ec04a175b2669 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -207,13 +207,13 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): #create or drop-recreate if necessary create = None if exists and if_exists == 'replace': - create = "DROP TABLE %s" % name + create = "DROP TABLE %s;" % name + get_schema(frame, name, flavor) elif not exists: create = get_schema(frame, name, flavor) if create is not None: cur = con.cursor() - cur.execute(create) + cur.executescript(create) cur.close() cur = con.cursor() diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 0035bd2b79236..9240081858bc1 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -244,6 +244,7 @@ def test_if_exists(self): df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']}) df_if_exists_2 = DataFrame({'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']}) table_name = 'table_if_exists' + sql_select = "SELECT * FROM %s" % table_name # test if invalid value for if_exists raises appropriate error self.assertRaises(ValueError, @@ -258,6 +259,19 @@ def test_if_exists(self): cur.execute("DROP TABLE %s" % table_name) cur.close() + # test if_exists='replace' + sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, + flavor='sqlite', if_exists='replace') + sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, + flavor='sqlite', if_exists='replace') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(3, 'C'), (4, 'D'), (5, 'E')]) + if sql.table_exists(table_name, self.db, flavor='sqlite'): + cur = self.db.cursor() + cur.execute("DROP TABLE %s" % table_name) + cur.close() + + class TestMySQL(unittest.TestCase): From 1bc927d38878b55ee3b4a68e54337d2210c3f2c1 Mon Sep 17 00:00:00 2001 From: davidshinn Date: Fri, 19 Jul 2013 23:50:38 -0400 Subject: [PATCH 3/7] CLN: Refactor in between test clean ups to be more DRY --- pandas/io/tests/test_sql.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9240081858bc1..b2a585e801275 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -246,6 +246,16 @@ def test_if_exists(self): table_name = 'table_if_exists' sql_select = "SELECT * FROM %s" % table_name + def clean_up(test_table_to_drop): + """ + Drops tables created from individual tests + so no dependencies arise from sequential tests + """ + if sql.table_exists(test_table_to_drop, self.db, flavor='sqlite'): + cur = self.db.cursor() + cur.execute("DROP TABLE %s" % test_table_to_drop) + cur.close() + # test if invalid value for if_exists raises appropriate error self.assertRaises(ValueError, sql.write_frame, @@ -254,10 +264,7 @@ def test_if_exists(self): name=table_name, flavor='sqlite', if_exists='notvalidvalue') - if sql.table_exists(table_name, self.db, flavor='sqlite'): - cur = self.db.cursor() - cur.execute("DROP TABLE %s" % table_name) - cur.close() + clean_up(table_name) # test if_exists='replace' sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, @@ -266,10 +273,7 @@ def test_if_exists(self): flavor='sqlite', if_exists='replace') self.assertEqual(sql.tquery(sql_select, con=self.db), [(3, 'C'), (4, 'D'), (5, 'E')]) - if sql.table_exists(table_name, self.db, flavor='sqlite'): - cur = self.db.cursor() - cur.execute("DROP TABLE %s" % table_name) - cur.close() + clean_up(table_name) From 5a2d51db897a72f6b7f516990faacae4671b239e Mon Sep 17 00:00:00 2001 From: davidshinn Date: Sat, 20 Jul 2013 00:01:03 -0400 Subject: [PATCH 4/7] TST: Complete test coverage for if_exists uses in io.sql.write_frame --- pandas/io/tests/test_sql.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index b2a585e801275..8ff24b1db21d8 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -266,15 +266,38 @@ def clean_up(test_table_to_drop): if_exists='notvalidvalue') clean_up(table_name) + # test if_exists='fail' + sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, + flavor='sqlite', if_exists='fail') + self.assertRaises(ValueError, + sql.write_frame, + frame=df_if_exists_1, + con=self.db, + name=table_name, + flavor='sqlite', + if_exists='fail') + # test if_exists='replace' sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, flavor='sqlite', if_exists='replace') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(1, 'A'), (2, 'B')]) sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, flavor='sqlite', if_exists='replace') self.assertEqual(sql.tquery(sql_select, con=self.db), [(3, 'C'), (4, 'D'), (5, 'E')]) clean_up(table_name) + # test if_exists='append' + sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, + flavor='sqlite', if_exists='fail') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(1, 'A'), (2, 'B')]) + sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, + flavor='sqlite', if_exists='append') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) + clean_up(table_name) class TestMySQL(unittest.TestCase): From e2f7c3a207541caa744f5d402508d85eafa2389f Mon Sep 17 00:00:00 2001 From: davidshinn Date: Sat, 20 Jul 2013 00:21:56 -0400 Subject: [PATCH 5/7] CLN: Refactor to make interaction between exists and if_exists clearer This refactor results in the function logic being clearer, since if_exists is only relevant when exists is True, the program flow is better served to have if_exists control flow only when exists is True --- pandas/io/sql.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ec04a175b2669..5cb9f7a73ce4b 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -201,14 +201,15 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): raise ValueError, "'%s' is not valid for if_exists" % if_exists exists = table_exists(name, con, flavor) - if if_exists == 'fail' and exists: - raise ValueError, "Table '%s' already exists." % name - #create or drop-recreate if necessary + # creation/replacement dependent on the table existing and if_exist criteria create = None - if exists and if_exists == 'replace': - create = "DROP TABLE %s;" % name + get_schema(frame, name, flavor) - elif not exists: + if exists: + if if_exists == 'fail': + raise ValueError, "Table '%s' already exists." % name + elif if_exists == 'replace': + create = "DROP TABLE %s;" % name + get_schema(frame, name, flavor) + else: create = get_schema(frame, name, flavor) if create is not None: From 5eec80206e4e13e7a46767adccdc84d5b10abf02 Mon Sep 17 00:00:00 2001 From: davidshinn Date: Sat, 20 Jul 2013 22:49:25 -0400 Subject: [PATCH 6/7] BUG: Fix regression introduced by c28f11a0041a9f3b25f33b0539e42fa802b1d8d4 sqlite3 convenience function executescript not available in other database flavors. --- pandas/io/sql.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5cb9f7a73ce4b..a7588cc741352 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -208,13 +208,16 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): if if_exists == 'fail': raise ValueError, "Table '%s' already exists." % name elif if_exists == 'replace': - create = "DROP TABLE %s;" % name + get_schema(frame, name, flavor) + cur = con.cursor() + cur.execute("DROP TABLE %s;" % name) + cur.close() + create = get_schema(frame, name, flavor) else: create = get_schema(frame, name, flavor) if create is not None: cur = con.cursor() - cur.executescript(create) + cur.execute(create) cur.close() cur = con.cursor() From 1f983174471b408a1049e8a5c37ff5023ded411f Mon Sep 17 00:00:00 2001 From: davidshinn Date: Sat, 27 Jul 2013 22:47:10 -0400 Subject: [PATCH 7/7] TST: Adding if_exist test for mysql flavor --- pandas/io/tests/test_sql.py | 60 +++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 8ff24b1db21d8..0d4cd9b52023d 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -542,6 +542,66 @@ def test_keyword_as_column_names(self): sql.write_frame(df, con = self.db, name = 'testkeywords', if_exists='replace', flavor='mysql') + def test_if_exists(self): + _skip_if_no_MySQLdb() + df_if_exists_1 = DataFrame({'col1': [1, 2], 'col2': ['A', 'B']}) + df_if_exists_2 = DataFrame({'col1': [3, 4, 5], 'col2': ['C', 'D', 'E']}) + table_name = 'table_if_exists' + sql_select = "SELECT * FROM %s" % table_name + + def clean_up(test_table_to_drop): + """ + Drops tables created from individual tests + so no dependencies arise from sequential tests + """ + if sql.table_exists(test_table_to_drop, self.db, flavor='mysql'): + cur = self.db.cursor() + cur.execute("DROP TABLE %s" % test_table_to_drop) + cur.close() + + # test if invalid value for if_exists raises appropriate error + self.assertRaises(ValueError, + sql.write_frame, + frame=df_if_exists_1, + con=self.db, + name=table_name, + flavor='mysql', + if_exists='notvalidvalue') + clean_up(table_name) + + # test if_exists='fail' + sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, + flavor='mysql', if_exists='fail') + self.assertRaises(ValueError, + sql.write_frame, + frame=df_if_exists_1, + con=self.db, + name=table_name, + flavor='mysql', + if_exists='fail') + + # test if_exists='replace' + sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, + flavor='mysql', if_exists='replace') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(1, 'A'), (2, 'B')]) + sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, + flavor='mysql', if_exists='replace') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(3, 'C'), (4, 'D'), (5, 'E')]) + clean_up(table_name) + + # test if_exists='append' + sql.write_frame(frame=df_if_exists_1, con=self.db, name=table_name, + flavor='mysql', if_exists='fail') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(1, 'A'), (2, 'B')]) + sql.write_frame(frame=df_if_exists_2, con=self.db, name=table_name, + flavor='mysql', if_exists='append') + self.assertEqual(sql.tquery(sql_select, con=self.db), + [(1, 'A'), (2, 'B'), (3, 'C'), (4, 'D'), (5, 'E')]) + clean_up(table_name) + if __name__ == '__main__': # unittest.main()