Skip to content

BUG: read_sql_table unable to read views #54185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ I/O
- Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`)
- Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`)
- Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`)
- Bug in :func:`read_sql_table` raising an exception when reading a view (:issue:`52969`)
- Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
- Bug in :func:`read_xml` stripping whitespace in string data (:issue:`53811`)
- Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)
Expand Down
16 changes: 13 additions & 3 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1655,7 +1655,7 @@ def read_table(
SQLDatabase.read_query

"""
self.meta.reflect(bind=self.con, only=[table_name])
self.meta.reflect(bind=self.con, only=[table_name], views=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

guessing this isnt too expensive performance wise that we will now start reflecting all tables by default (include views/mat views) just thinking if this will impact performance for current users.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I'm assuming it's negligible relative to the query or IO, especially since the old implementation was able to access views too

table = SQLTable(table_name, self, index=index_col, schema=schema)
if chunksize is not None:
self.returns_generator = True
Expand Down Expand Up @@ -1989,7 +1989,9 @@ def get_table(self, table_name: str, schema: str | None = None) -> Table:
def drop_table(self, table_name: str, schema: str | None = None) -> None:
schema = schema or self.meta.schema
if self.has_table(table_name, schema):
self.meta.reflect(bind=self.con, only=[table_name], schema=schema)
self.meta.reflect(
bind=self.con, only=[table_name], schema=schema, views=True
)
with self.run_transaction():
self.get_table(table_name, schema).drop(bind=self.con)
self.meta.clear()
Expand Down Expand Up @@ -2411,7 +2413,15 @@ def to_sql(

def has_table(self, name: str, schema: str | None = None) -> bool:
wld = "?"
query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};"
query = f"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is something like this equivalent here opposed to the sql?

self.meta.reflect(bind=self.con)
return name in metadata.tables.keys()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I believe so https://www.sqlite.org/schematab.html

Since this is the sqlite3 backend unfortunately we can't meta.reflect

SELECT
name
FROM
sqlite_master
WHERE
type IN ('table', 'view')
AND name={wld};
"""

return len(self.execute(query, [name]).fetchall()) > 0

Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from io import StringIO
from pathlib import Path
import sqlite3
import uuid

import numpy as np
import pytest
Expand Down Expand Up @@ -932,6 +933,68 @@ def insert_on_conflict(table, conn, keys, data_iter):
pandasSQL.drop_table("test_insert_conflict")


@pytest.mark.db
@pytest.mark.parametrize("conn", postgresql_connectable)
def test_read_view_postgres(conn, request):
# GH 52969
conn = request.getfixturevalue(conn)

from sqlalchemy.engine import Engine
from sqlalchemy.sql import text

table_name = f"group_{uuid.uuid4().hex}"
view_name = f"group_view_{uuid.uuid4().hex}"

sql_stmt = text(
f"""
CREATE TABLE {table_name} (
group_id INTEGER,
name TEXT
);
INSERT INTO {table_name} VALUES
(1, 'name');
CREATE VIEW {view_name}
AS
SELECT * FROM {table_name};
"""
)
if isinstance(conn, Engine):
with conn.connect() as con:
with con.begin():
con.execute(sql_stmt)
else:
with conn.begin():
conn.execute(sql_stmt)
result = read_sql_table(view_name, conn)
expected = DataFrame({"group_id": [1], "name": "name"})
tm.assert_frame_equal(result, expected)


def test_read_view_sqlite(sqlite_buildin):
# GH 52969
create_table = """
CREATE TABLE groups (
group_id INTEGER,
name TEXT
);
"""
insert_into = """
INSERT INTO groups VALUES
(1, 'name');
"""
create_view = """
CREATE VIEW group_view
AS
SELECT * FROM groups;
"""
sqlite_buildin.execute(create_table)
sqlite_buildin.execute(insert_into)
sqlite_buildin.execute(create_view)
result = pd.read_sql("SELECT * FROM group_view", sqlite_buildin)
expected = DataFrame({"group_id": [1], "name": "name"})
tm.assert_frame_equal(result, expected)


def test_execute_typeerror(sqlite_iris_engine):
with pytest.raises(TypeError, match="pandas.io.sql.execute requires a connection"):
with tm.assert_produces_warning(
Expand Down