From ed726cd2ed094a29bf9b1038a4536e0e0efabfe3 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 21 May 2016 21:53:14 +0530 Subject: [PATCH 1/6] ENH: Allow to_sql to recognize single sql type #11886 --- doc/source/whatsnew/v0.18.2.txt | 2 +- pandas/io/sql.py | 20 +++++++++++++++----- pandas/io/tests/test_sql.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index dfb5ebc9379b1..6dc4db97d287a 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -89,7 +89,7 @@ Other enhancements - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) - +- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`). .. _whatsnew_0182.api: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 324988360c9fe..eef1b12eb47f8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -18,6 +18,7 @@ string_types, text_type) from pandas.core.api import DataFrame, Series from pandas.core.common import isnull +from pandas.core.generic import is_dictlike from pandas.core.base import PandasObject from pandas.types.api import DatetimeTZDtype from pandas.tseries.tools import to_datetime @@ -550,9 +551,10 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQLtype or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. + If all columns are of the same type, one single value can be used. """ if if_exists not in ('fail', 'replace', 'append'): @@ -1231,11 +1233,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type. + be a SQLAlchemy type. If all columns are of the same type, one + single value can be used. """ + if dtype and not is_dictlike(dtype): + dtype = {col_name : dtype for col_name in frame} + if dtype is not None: from sqlalchemy.types import to_instance, TypeEngine for col, my_type in dtype.items(): @@ -1644,11 +1650,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should - be a string. + be a string. If all columns are of the same type, one single value + can be used. """ + if dtype and not is_dictlike(dtype): + dtype = {col_name : dtype for col_name in frame} + if dtype is not None: for col, my_type in dtype.items(): if not isinstance(my_type, str): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9a995c17f0445..9aa3fa7330c4b 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1552,6 +1552,21 @@ def test_dtype(self): self.assertTrue(isinstance(sqltype, sqlalchemy.String)) self.assertEqual(sqltype.length, 10) + def test_to_sql_save_indexgle_dtype(self): + self.drop('single_dtype_test') + cols = ['A','B'] + data = [('a','b'), + ('c','d')] + df = DataFrame(data,columns=cols) + df.to_sql('single_dtype_test',self.conn,dtype=sqlalchemy.TEXT) + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + sqltypea = meta.tables['single_dtype_test'].columns['A'].type + sqltypeb = meta.tables['single_dtype_test'].columns['B'].type + self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) + self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) + self.drop_table('single_dtype_test') + def test_notnull_dtype(self): cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), @@ -2025,6 +2040,19 @@ def test_dtype(self): self.assertRaises(ValueError, df.to_sql, 'error', self.conn, dtype={'B': bool}) + def test_to_sql_single_dtype(self): + if self.flavor == 'mysql': + raise nose.SkipTest('Not applicable to MySQL legacy') + self.drop_table('single_dtype_test') + cols = ['A','B'] + data = [('a','b'), + ('c','d')] + df = DataFrame(data,columns=cols) + df.to_sql('single_dtype_test',self.conn,dtype='STRING') + self.assertEqual(self._get_sqlite_column_type('single_dtype_test','A'),'STRING') + self.assertEqual(self._get_sqlite_column_type('single_dtype_test','B'),'STRING') + self.drop_table('single_dtype_test') + def test_notnull_dtype(self): if self.flavor == 'mysql': raise nose.SkipTest('Not applicable to MySQL legacy') From 1a73316b343a47e62eb1b148c59fba8ee8799786 Mon Sep 17 00:00:00 2001 From: Camilo Cota Date: Sun, 22 May 2016 15:44:12 -0400 Subject: [PATCH 2/6] ENH: support decimal option in PythonParser #12933 closes #12933 Author: Camilo Cota Closes #13189 from camilocot/12933 and squashes the following commits: 465272e [Camilo Cota] Benchmark decimal option in read_csv for c engine 9f42d0c [Camilo Cota] double backticks around decimal and engine='python' dc8ca62 [Camilo Cota] fix test_empty_decimal_marker comment 49613fe [Camilo Cota] Assert read_csv error message in test_empty_decimal_marker d821052 [Camilo Cota] fix test_empty_decimal_marker comment f71509d [Camilo Cota] Include descritive what's new line 803356e [Camilo Cota] set nonnum regex in init method 1472d80 [Camilo Cota] Include the issue number in what's new b560fda [Camilo Cota] Fix what's new dc7acd1 [Camilo Cota] ENH: support decimal option in PythonParser #12933 --- pandas/io/tests/parser/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 2be0c4edb8f5d..0a138bce247dd 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1285,6 +1285,7 @@ def test_euro_decimal_format(self): self.assertEqual(df2['Number1'].dtype, float) self.assertEqual(df2['Number2'].dtype, float) self.assertEqual(df2['Number3'].dtype, float) +<<<<<<< HEAD def test_read_duplicate_names(self): # See gh-7160 @@ -1323,3 +1324,5 @@ def test_inf_parsing(self): # TODO: remove condition when 'na_filter' is supported for Python df = self.read_csv(StringIO(data), index_col=0, na_filter=False) tm.assert_almost_equal(df['A'].values, expected.values) +======= +>>>>>>> ENH: support decimal option in PythonParser #12933 From 20f0c213d6151596710708124aa4a5d08fc70fdb Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 21 May 2016 21:53:14 +0530 Subject: [PATCH 3/6] ENH: Allow to_sql to recognize single sql type #11886 PEP #3 --- doc/source/whatsnew/v0.18.2.txt | 2 ++ pandas/io/sql.py | 13 +++++++------ pandas/io/tests/test_sql.py | 32 ++++++++++++++++---------------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 6dc4db97d287a..f71ee1e1369bb 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -74,6 +74,8 @@ Other enhancements pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) +- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`). + - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`) - ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index eef1b12eb47f8..1e9771b140ff2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1233,14 +1233,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : single SQL type or dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default + None Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type. If all columns are of the same type, one + be a SQLAlchemy type. If all columns are of the same type, one single value can be used. """ if dtype and not is_dictlike(dtype): - dtype = {col_name : dtype for col_name in frame} + dtype = {col_name: dtype for col_name in frame} if dtype is not None: from sqlalchemy.types import to_instance, TypeEngine @@ -1650,15 +1651,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : single SQL type or dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default + None Optional specifying the datatype for columns. The SQL type should be a string. If all columns are of the same type, one single value can be used. """ if dtype and not is_dictlike(dtype): - dtype = {col_name : dtype for col_name in frame} - + dtype = {col_name: dtype for col_name in frame} if dtype is not None: for col, my_type in dtype.items(): if not isinstance(my_type, str): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9aa3fa7330c4b..621c34ff75ce8 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1552,20 +1552,18 @@ def test_dtype(self): self.assertTrue(isinstance(sqltype, sqlalchemy.String)) self.assertEqual(sqltype.length, 10) - def test_to_sql_save_indexgle_dtype(self): - self.drop('single_dtype_test') - cols = ['A','B'] - data = [('a','b'), - ('c','d')] - df = DataFrame(data,columns=cols) - df.to_sql('single_dtype_test',self.conn,dtype=sqlalchemy.TEXT) + def test_to_sql_single_dtype(self): + cols = ['A', 'B'] + data = [('a', 'b'), + ('c', 'd')] + df = DataFrame(data, columns=cols) + df.to_sql('single_dtype_test', self.conn, dtype=sqlalchemy.TEXT) meta = sqlalchemy.schema.MetaData(bind=self.conn) meta.reflect() sqltypea = meta.tables['single_dtype_test'].columns['A'].type sqltypeb = meta.tables['single_dtype_test'].columns['B'].type self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) - self.drop_table('single_dtype_test') def test_notnull_dtype(self): cols = {'Bool': Series([True, None]), @@ -2044,15 +2042,17 @@ def test_to_sql_single_dtype(self): if self.flavor == 'mysql': raise nose.SkipTest('Not applicable to MySQL legacy') self.drop_table('single_dtype_test') - cols = ['A','B'] - data = [('a','b'), - ('c','d')] - df = DataFrame(data,columns=cols) - df.to_sql('single_dtype_test',self.conn,dtype='STRING') - self.assertEqual(self._get_sqlite_column_type('single_dtype_test','A'),'STRING') - self.assertEqual(self._get_sqlite_column_type('single_dtype_test','B'),'STRING') + cols = ['A', 'B'] + data = [('a', 'b'), + ('c', 'd')] + df = DataFrame(data, columns=cols) + df.to_sql('single_dtype_test', self.conn, dtype='STRING') + self.assertEqual( + self._get_sqlite_column_type('single_dtype_test', 'A'), 'STRING') + self.assertEqual( + self._get_sqlite_column_type('single_dtype_test', 'B'), 'STRING') self.drop_table('single_dtype_test') - + def test_notnull_dtype(self): if self.flavor == 'mysql': raise nose.SkipTest('Not applicable to MySQL legacy') From a9798f9402c45cbd81a2f813db5df4ca5810fa5f Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 28 May 2016 08:28:07 +0530 Subject: [PATCH 4/6] parser --- pandas/io/tests/parser/common.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 0a138bce247dd..6248e83a494e2 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1285,7 +1285,6 @@ def test_euro_decimal_format(self): self.assertEqual(df2['Number1'].dtype, float) self.assertEqual(df2['Number2'].dtype, float) self.assertEqual(df2['Number3'].dtype, float) -<<<<<<< HEAD def test_read_duplicate_names(self): # See gh-7160 @@ -1303,17 +1302,17 @@ def test_read_duplicate_names(self): def test_inf_parsing(self): data = """\ -,A -a,inf -b,-inf -c,+Inf -d,-Inf -e,INF -f,-INF -g,+INf -h,-INf -i,inF -j,-inF""" + ,A + a,inf + b,-inf + c,+Inf + d,-Inf + e,INF + f,-INF + g,+INf + h,-INf + i,inF + j,-inF""" inf = float('inf') expected = Series([inf, -inf] * 5) @@ -1323,6 +1322,4 @@ def test_inf_parsing(self): if self.engine == 'c': # TODO: remove condition when 'na_filter' is supported for Python df = self.read_csv(StringIO(data), index_col=0, na_filter=False) - tm.assert_almost_equal(df['A'].values, expected.values) -======= ->>>>>>> ENH: support decimal option in PythonParser #12933 + tm.assert_almost_equal(df['A'].values, expected.values) \ No newline at end of file From 639f1503ff210bc875d1695ee4820a948732d588 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 28 May 2016 09:16:58 +0530 Subject: [PATCH 5/6] Lint --- pandas/io/tests/parser/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 6248e83a494e2..a292c0fe04f40 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1322,4 +1322,4 @@ def test_inf_parsing(self): if self.engine == 'c': # TODO: remove condition when 'na_filter' is supported for Python df = self.read_csv(StringIO(data), index_col=0, na_filter=False) - tm.assert_almost_equal(df['A'].values, expected.values) \ No newline at end of file + tm.assert_almost_equal(df['A'].values, expected.values) From 9d7c7f6b694890e7e76a7cbd4a728a448f1fed07 Mon Sep 17 00:00:00 2001 From: RahulHP Date: Sat, 21 May 2016 21:53:14 +0530 Subject: [PATCH 6/6] ENH: Allow to_sql to recognize single sql type #11886 ENH: support decimal option in PythonParser #12933 closes #12933 Author: Camilo Cota Closes #13189 from camilocot/12933 and squashes the following commits: 465272e [Camilo Cota] Benchmark decimal option in read_csv for c engine 9f42d0c [Camilo Cota] double backticks around decimal and engine='python' dc8ca62 [Camilo Cota] fix test_empty_decimal_marker comment 49613fe [Camilo Cota] Assert read_csv error message in test_empty_decimal_marker d821052 [Camilo Cota] fix test_empty_decimal_marker comment f71509d [Camilo Cota] Include descritive what's new line 803356e [Camilo Cota] set nonnum regex in init method 1472d80 [Camilo Cota] Include the issue number in what's new b560fda [Camilo Cota] Fix what's new dc7acd1 [Camilo Cota] ENH: support decimal option in PythonParser #12933 ENH: Allow to_sql to recognize single sql type #11886 PEP #3 --- doc/source/whatsnew/v0.18.2.txt | 4 +++- pandas/io/sql.py | 21 ++++++++++++++++----- pandas/io/tests/parser/common.py | 22 +++++++++++----------- pandas/io/tests/test_sql.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index dfb5ebc9379b1..f71ee1e1369bb 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -74,6 +74,8 @@ Other enhancements pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) +- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`). + - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`) - ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) @@ -89,7 +91,7 @@ Other enhancements - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) - +- ``DataFrame.to_sql `` now allows a single value as the SQL type for all columns (:issue:`11886`). .. _whatsnew_0182.api: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 324988360c9fe..1e9771b140ff2 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -18,6 +18,7 @@ string_types, text_type) from pandas.core.api import DataFrame, Series from pandas.core.common import isnull +from pandas.core.generic import is_dictlike from pandas.core.base import PandasObject from pandas.types.api import DatetimeTZDtype from pandas.tseries.tools import to_datetime @@ -550,9 +551,10 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQLtype or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. + If all columns are of the same type, one single value can be used. """ if if_exists not in ('fail', 'replace', 'append'): @@ -1231,11 +1233,16 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default + None Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type. + be a SQLAlchemy type. If all columns are of the same type, one + single value can be used. """ + if dtype and not is_dictlike(dtype): + dtype = {col_name: dtype for col_name in frame} + if dtype is not None: from sqlalchemy.types import to_instance, TypeEngine for col, my_type in dtype.items(): @@ -1644,11 +1651,15 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None + dtype : single SQL type or dict of column name to SQL type, default + None Optional specifying the datatype for columns. The SQL type should - be a string. + be a string. If all columns are of the same type, one single value + can be used. """ + if dtype and not is_dictlike(dtype): + dtype = {col_name: dtype for col_name in frame} if dtype is not None: for col, my_type in dtype.items(): if not isinstance(my_type, str): diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py index 2be0c4edb8f5d..a292c0fe04f40 100644 --- a/pandas/io/tests/parser/common.py +++ b/pandas/io/tests/parser/common.py @@ -1302,17 +1302,17 @@ def test_read_duplicate_names(self): def test_inf_parsing(self): data = """\ -,A -a,inf -b,-inf -c,+Inf -d,-Inf -e,INF -f,-INF -g,+INf -h,-INf -i,inF -j,-inF""" + ,A + a,inf + b,-inf + c,+Inf + d,-Inf + e,INF + f,-INF + g,+INf + h,-INf + i,inF + j,-inF""" inf = float('inf') expected = Series([inf, -inf] * 5) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9a995c17f0445..621c34ff75ce8 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1552,6 +1552,19 @@ def test_dtype(self): self.assertTrue(isinstance(sqltype, sqlalchemy.String)) self.assertEqual(sqltype.length, 10) + def test_to_sql_single_dtype(self): + cols = ['A', 'B'] + data = [('a', 'b'), + ('c', 'd')] + df = DataFrame(data, columns=cols) + df.to_sql('single_dtype_test', self.conn, dtype=sqlalchemy.TEXT) + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + sqltypea = meta.tables['single_dtype_test'].columns['A'].type + sqltypeb = meta.tables['single_dtype_test'].columns['B'].type + self.assertTrue(isinstance(sqltypea, sqlalchemy.TEXT)) + self.assertTrue(isinstance(sqltypeb, sqlalchemy.TEXT)) + def test_notnull_dtype(self): cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), @@ -2025,6 +2038,21 @@ def test_dtype(self): self.assertRaises(ValueError, df.to_sql, 'error', self.conn, dtype={'B': bool}) + def test_to_sql_single_dtype(self): + if self.flavor == 'mysql': + raise nose.SkipTest('Not applicable to MySQL legacy') + self.drop_table('single_dtype_test') + cols = ['A', 'B'] + data = [('a', 'b'), + ('c', 'd')] + df = DataFrame(data, columns=cols) + df.to_sql('single_dtype_test', self.conn, dtype='STRING') + self.assertEqual( + self._get_sqlite_column_type('single_dtype_test', 'A'), 'STRING') + self.assertEqual( + self._get_sqlite_column_type('single_dtype_test', 'B'), 'STRING') + self.drop_table('single_dtype_test') + def test_notnull_dtype(self): if self.flavor == 'mysql': raise nose.SkipTest('Not applicable to MySQL legacy')