From f012f07861546204e9b45e5c001abe34609190c4 Mon Sep 17 00:00:00 2001 From: Vraj Mohan Date: Wed, 30 Jan 2019 07:26:49 -0800 Subject: [PATCH 1/6] ENH: Support index=True for io.sql.get_schema Closes pandas-dev/pandas#9084 - Decided to keep the default as `index=False` to keep the API consistent. `to_sql` has `index=True`. - Tempted to name the parameter `include_dataframe_index` as "index" has a different meaning in a SQL context. --- pandas/io/sql.py | 15 +++++++++------ pandas/tests/io/test_sql.py | 13 +++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index aaface5415384..0a2f874b4b812 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1223,8 +1223,9 @@ def drop_table(self, table_name, schema=None): self.get_table(table_name, schema).drop() self.meta.clear() - def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): - table = SQLTable(table_name, self, frame=frame, index=False, keys=keys, + def _create_sql_schema(self, frame, table_name, keys=None, dtype=None, + index=False): + table = SQLTable(table_name, self, frame=frame, index=index, keys=keys, dtype=dtype) return str(table.sql_schema()) @@ -1565,13 +1566,14 @@ def drop_table(self, name, schema=None): name=_get_valid_sqlite_name(name)) self.execute(drop_sql) - def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): - table = SQLiteTable(table_name, self, frame=frame, index=False, + def _create_sql_schema(self, frame, table_name, keys=None, dtype=None, + index=False): + table = SQLiteTable(table_name, self, frame=frame, index=index, keys=keys, dtype=dtype) return str(table.sql_schema()) -def get_schema(frame, name, keys=None, con=None, dtype=None): +def get_schema(frame, name, keys=None, con=None, dtype=None, index=False): """ Get the SQL db table schema for the given frame. @@ -1593,4 +1595,5 @@ def get_schema(frame, name, keys=None, con=None, dtype=None): """ pandas_sql = pandasSQL_builder(con=con) - return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype) + return pandas_sql._create_sql_schema( + frame, name, keys=keys, dtype=dtype, index=index) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 75a6d8d009083..feca940166d65 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -823,6 +823,19 @@ def test_get_schema_keys(self): constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' assert constraint_sentence in create_sql + def test_get_schema_with_index(self): + frame = DataFrame({ + 'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']), + 'two': pd.Series([1, 2, 3], index=['a', 'b', 'c']) + }) + frame.index.name = 'alphabet' + + create_sql = sql.get_schema(frame, 'test', con=self.conn) + assert 'alphabet' not in create_sql + + create_sql = sql.get_schema(frame, 'test', con=self.conn, index=True) + assert 'alphabet' in create_sql + def test_chunksize_read(self): df = DataFrame(np.random.randn(22, 5), columns=list('abcde')) df.to_sql('test_chunksize', self.conn, index=False) From 3515ab4465c834b25fe61fee4341c32706d817f8 Mon Sep 17 00:00:00 2001 From: Vraj Mohan Date: Wed, 30 Jan 2019 08:28:01 -0800 Subject: [PATCH 2/6] Update whatsnew --- doc/source/whatsnew/v0.24.1.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 222963a7ff71a..be875caf3daca 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -31,6 +31,12 @@ Fixed Regressions Enhancements ^^^^^^^^^^^^ +.. _whatsnew_0241.enhancements.get_schema: + +``get_schema`` Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`get_schema` now accepts an `index` parameter (default: `False`) that includes the index in the generated schema. (:issue:`9084`) .. _whatsnew_0241.bug_fixes: From 09be25d4aa2457c35d41a4390510586bcadfca01 Mon Sep 17 00:00:00 2001 From: Vraj Mohan Date: Thu, 31 Jan 2019 05:57:10 -0800 Subject: [PATCH 3/6] Add parameter description to docstring --- pandas/io/sql.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0a2f874b4b812..4faf517049375 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1591,6 +1591,8 @@ def get_schema(frame, name, keys=None, con=None, dtype=None, index=False): dtype : dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. + index : boolean, default False + include DataFrame index as a column """ From da48c3a086b7c81648d30fcd18f77422abd1f9b3 Mon Sep 17 00:00:00 2001 From: Vraj Mohan Date: Thu, 31 Jan 2019 15:17:23 -0800 Subject: [PATCH 4/6] Use pytest.parameterize to test all the cases --- pandas/tests/io/test_sql.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index feca940166d65..e37921441596b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -823,18 +823,20 @@ def test_get_schema_keys(self): constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' assert constraint_sentence in create_sql - def test_get_schema_with_index(self): + @pytest.mark.parametrize("index_arg, expected", [ + ({}, False), + ({"index": False}, False), + ({"index": True}, True), + ]) + def test_get_schema_with_index(self, index_arg, expected): frame = DataFrame({ 'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']), 'two': pd.Series([1, 2, 3], index=['a', 'b', 'c']) }) frame.index.name = 'alphabet' - create_sql = sql.get_schema(frame, 'test', con=self.conn) - assert 'alphabet' not in create_sql - - create_sql = sql.get_schema(frame, 'test', con=self.conn, index=True) - assert 'alphabet' in create_sql + create_sql = sql.get_schema(frame, 'test', con=self.conn, **index_arg) + assert ('alphabet' in create_sql) == expected def test_chunksize_read(self): df = DataFrame(np.random.randn(22, 5), columns=list('abcde')) From 57070526cf2c43f62fcba8d73aacec4664d328b8 Mon Sep 17 00:00:00 2001 From: Vraj Mohan Date: Thu, 31 Jan 2019 16:27:51 -0800 Subject: [PATCH 5/6] Fix language in docstring --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 4faf517049375..7e4cefddc2746 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1592,7 +1592,7 @@ def get_schema(frame, name, keys=None, con=None, dtype=None, index=False): Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. index : boolean, default False - include DataFrame index as a column + Whether to include DataFrame index as a column """ From 722dc566dc77535d1c48a657dac4246206074829 Mon Sep 17 00:00:00 2001 From: Vraj Mohan Date: Thu, 31 Jan 2019 16:28:26 -0800 Subject: [PATCH 6/6] Move whatsnew entry to correct location --- doc/source/whatsnew/v0.24.1.rst | 7 ------- doc/source/whatsnew/v0.25.0.rst | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index be875caf3daca..0923b05d41479 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -31,13 +31,6 @@ Fixed Regressions Enhancements ^^^^^^^^^^^^ -.. _whatsnew_0241.enhancements.get_schema: - -``get_schema`` Enhancements -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:func:`get_schema` now accepts an `index` parameter (default: `False`) that includes the index in the generated schema. (:issue:`9084`) - .. _whatsnew_0241.bug_fixes: Bug Fixes diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 939fb8b9415bd..052f052420e41 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -165,7 +165,7 @@ MultiIndex I/O ^^^ -- +- :func:`get_schema` now accepts an `index` parameter (default: `False`) that includes the index in the generated schema. (:issue:`9084`) - -