diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst index 0037d4a4410c3..9502d319075ce 100644 --- a/doc/source/reference/io.rst +++ b/doc/source/reference/io.rst @@ -126,6 +126,7 @@ SQL read_sql_table read_sql_query read_sql + io.sql.get_schema Google BigQuery ~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3c436b55c19c2..0558f296a2a08 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -730,6 +730,7 @@ I/O - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`) - :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`) - Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`) +- :meth:`~pandas.io.sql.get_schema` now accepts ``index`` parameter to include index of the DataFrame in the schema (:issue:`9084`) Period ^^^^^^ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5678133d5a706..c766c04224cec 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1462,12 +1462,13 @@ def _create_sql_schema( keys: Optional[List[str]] = None, dtype: Optional[dict] = None, schema: Optional[str] = None, + index: Optional[bool] = False, ): table = SQLTable( table_name, self, frame=frame, - index=False, + index=index, keys=keys, dtype=dtype, schema=schema, @@ -1862,12 +1863,20 @@ def drop_table(self, name, schema=None): drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" self.execute(drop_sql) - def _create_sql_schema(self, frame, table_name, keys=None, dtype=None, schema=None): + def _create_sql_schema( + self, + frame, + table_name, + keys=None, + dtype=None, + schema=None, + index=False, + ): table = SQLiteTable( table_name, self, frame=frame, - index=False, + index=index, keys=keys, dtype=dtype, schema=schema, @@ -1875,7 +1884,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None, schema=No return str(table.sql_schema()) -def get_schema(frame, name, keys=None, con=None, dtype=None, schema=None): +def get_schema(frame, name, keys=None, con=None, dtype=None, schema=None, index=False): """ Get the SQL db table schema for the given frame. @@ -1886,19 +1895,28 @@ def get_schema(frame, name, keys=None, con=None, dtype=None, schema=None): name of SQL table keys : string or sequence, default: None columns to use a primary key - con: an open SQL database connection object or a SQLAlchemy connectable + con : an open SQL database connection object or a SQLAlchemy connectable Using SQLAlchemy makes it possible to use any DB supported by that library, default: None If a DBAPI2 object, only sqlite3 is supported. dtype : dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type, or a string for sqlite3 fallback connection. - schema: str, default: None + schema : str, default: None Optional specifying the schema to be used in creating the table. .. versionadded:: 1.2.0 + index : boolean, default: False + Whether to include the index of the DataFrame in the sql schema. + + .. versionadded:: 1.2.0 + + Returns + ------- + string + The SQL schema for the given frame. """ pandas_sql = pandasSQL_builder(con=con) return pandas_sql._create_sql_schema( - frame, name, keys=keys, dtype=dtype, schema=schema + frame, name, keys=keys, dtype=dtype, schema=schema, index=index ) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0195b61d13798..86b1f7a2bea3c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -894,6 +894,20 @@ def test_get_schema_keys(self): constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' assert constraint_sentence in create_sql + def test_get_schema_with_index(self): + # GH 9084 + df = DataFrame({"one": [1, 2, 3], "two": [1, 2, 3]}, index=list("abc")) + + schema_without_index = sql.get_schema(df, "test", con=self.conn) + assert "index TEXT" not in schema_without_index + + schema_with_index = sql.get_schema(df, "test", index=True, con=self.conn) + assert '"index" TEXT' in schema_with_index + + df.index.name = "new_index" + schema_with_index_rename = sql.get_schema(df, "test", index=True, con=self.conn) + assert df.index.name in schema_with_index_rename + def test_chunksize_read(self): df = DataFrame(np.random.randn(22, 5), columns=list("abcde")) df.to_sql("test_chunksize", self.conn, index=False)