Version 0.24.0 breaks read_sql compatibility with read_sql_query #24988

Shellcat-Zero · 2019-01-28T23:47:56Z

The following read_sql_query() works:

import pandas as pd
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://'+name+':'+pw+'@'+server+'/?charset=utf8')
sql = 'select * from MyDatabase.my_temp_table'
df = pd.read_sql_query(sql,engine)

The same statements now fail in read_sql():

import pandas as pd
from sqlalchemy import create_engine

engine = create_engine('mysql+pymysql://'+name+':'+pw+'@'+server+'/?charset=utf8')
sql = 'select * from MyDatabase.my_temp_table'
df = pd.read_sql(sql,engine)
---------------------------------------------------------------------------
InternalError                             Traceback (most recent call last)
~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1235                     self.dialect.do_execute(
-> 1236                         cursor, statement, parameters, context
   1237                     )

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    535     def do_execute(self, cursor, statement, parameters, context=None):
--> 536         cursor.execute(statement, parameters)
    537

~/miniconda3/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args)
    169
--> 170         result = self._query(query)
    171         self._executed = query

~/miniconda3/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q)
    327         self._clear_result()
--> 328         conn.query(q)
    329         self._do_get_result()

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    515         self._execute_command(COMMAND.COM_QUERY, sql)
--> 516         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    517         return self._affected_rows

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    726             result = MySQLResult(self)
--> 727             result.read()
    728         self._result = result

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in read(self)
   1065         try:
-> 1066             first_packet = self.connection._read_packet()
   1067

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    682         packet = packet_type(buff, self.encoding)
--> 683         packet.check_error()
    684         return packet

~/miniconda3/lib/python3.6/site-packages/pymysql/protocol.py in check_error(self)
    219             if DEBUG: print("errno =", errno)
--> 220             err.raise_mysql_exception(self._data)
    221

~/miniconda3/lib/python3.6/site-packages/pymysql/err.py in raise_mysql_exception(data)
    108     errorclass = error_map.get(errno, InternalError)
--> 109     raise errorclass(errno, errval)

InternalError: (1046, 'No database selected')

The above exception was the direct cause of the following exception:

InternalError                             Traceback (most recent call last)
<ipython-input-5-ebe084ae005c> in <module>
----> 1 df = pd.read_sql(sql,engine)

~/miniconda3/lib/python3.6/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
    381
    382     try:
--> 383         _is_table_name = pandas_sql.has_table(sql)
    384     except (ImportError, AttributeError):
    385         _is_table_name = False

~/miniconda3/lib/python3.6/site-packages/pandas/io/sql.py in has_table(self, name, schema)
   1198             self.connectable.dialect.has_table,
   1199             name,
-> 1200             schema or self.meta.schema,
   1201         )
   1202

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in run_callable(self, callable_, *args, **kwargs)
   2123         """
   2124         with self.contextual_connect() as conn:
-> 2125             return conn.run_callable(callable_, *args, **kwargs)
   2126
   2127     def execute(self, statement, *multiparams, **params):

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in run_callable(self, callable_, *args, **kwargs)
   1594
   1595         """
-> 1596         return callable_(self, *args, **kwargs)
   1597
   1598     def _run_visitor(self, visitorcallable, element, **kwargs):

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/dialects/mysql/base.py in has_table(self, connection, table_name, schema)
   2254                 rs = connection.execution_options(
   2255                     skip_user_error_events=True
-> 2256                 ).execute(st)
   2257                 have = rs.fetchone() is not None
   2258                 rs.close()

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object_, *multiparams, **params)
    972         """
    973         if isinstance(object_, util.string_types[0]):
--> 974             return self._execute_text(object_, multiparams, params)
    975         try:
    976             meth = object_._execute_on_connection

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_text(self, statement, multiparams, params)
   1145             parameters,
   1146             statement,
-> 1147             parameters,
   1148         )
   1149         if self._has_events or self.engine._has_events:

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1238         except BaseException as e:
   1239             self._handle_dbapi_exception(
-> 1240                 e, statement, parameters, cursor, context
   1241             )
   1242

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1456                 util.raise_from_cause(newraise, exc_info)
   1457             elif should_wrap:
-> 1458                 util.raise_from_cause(sqlalchemy_exception, exc_info)
   1459             else:
   1460                 util.reraise(*exc_info)
~/miniconda3/lib/python3.6/site-packages/sqlalchemy/util/compat.py in raise_from_cause(exception, exc_info)
    294     exc_type, exc_value, exc_tb = exc_info
    295     cause = exc_value if exc_value is not exception else None
--> 296     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    297
    298

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
    274             value.__cause__ = cause
    275         if value.__traceback__ is not tb:
--> 276             raise value.with_traceback(tb)
    277         raise value
    278

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1234                 if not evt_handled:
   1235                     self.dialect.do_execute(
-> 1236                         cursor, statement, parameters, context
   1237                     )
   1238         except BaseException as e:

~/miniconda3/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    534
    535     def do_execute(self, cursor, statement, parameters, context=None):
--> 536         cursor.execute(statement, parameters)
    537
    538     def do_execute_no_params(self, cursor, statement, context=None):

~/miniconda3/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args)
    168         query = self.mogrify(query, args)
    169
--> 170         result = self._query(query)
    171         self._executed = query
    172         return result

~/miniconda3/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q)
    326         self._last_executed = q
    327         self._clear_result()
--> 328         conn.query(q)
    329         self._do_get_result()
    330         return self.rowcount

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    514                 sql = sql.encode(self.encoding, 'surrogateescape')
    515         self._execute_command(COMMAND.COM_QUERY, sql)
--> 516         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    517         return self._affected_rows
    518

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    725         else:
    726             result = MySQLResult(self)
--> 727             result.read()
    728         self._result = result
    729         if result.server_status is not None:

~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in read(self)
   1064     def read(self):
   1065         try:
-> 1066             first_packet = self.connection._read_packet()
   1067
   1068             if first_packet.is_ok_packet():
~/miniconda3/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    681
    682         packet = packet_type(buff, self.encoding)
--> 683         packet.check_error()
    684         return packet
    685

~/miniconda3/lib/python3.6/site-packages/pymysql/protocol.py in check_error(self)
    218             errno = self.read_uint16()
    219             if DEBUG: print("errno =", errno)
--> 220             err.raise_mysql_exception(self._data)
    221
    222     def dump(self):

~/miniconda3/lib/python3.6/site-packages/pymysql/err.py in raise_mysql_exception(data)
    107         errval = data[3:].decode('utf-8', 'replace')
    108     errorclass = error_map.get(errno, InternalError)
--> 109     raise errorclass(errno, errval)

InternalError: (pymysql.err.InternalError) (1046, 'No database selected') [SQL: 'DESCRIBE `select * from MyDatabase.my_temp_table`'] (Background on this error at: http://sqlalche.me/e/2j85)

Problem description

Reverting to Pandas version 0.23.4 fixes the issue.

Output of `pd.show_versions()`

[paste the output of `pd.show_versions()` here below this line]
INSTALLED VERSIONS

commit: None
python: 3.6.5.final.0
python-bits: 64
OS: Linux
OS-release: 4.15.0-1021-aws
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: C.UTF-8
LOCALE: en_US.UTF-8

pandas: 0.24.0
pytest: None
pip: 19.0.1
setuptools: 39.2.0
Cython: None
numpy: 1.16.0
scipy: None
pyarrow: None
xarray: None
IPython: 7.2.0
sphinx: None
patsy: None
dateutil: 2.7.5
pytz: 2018.9
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: None
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml.etree: None
bs4: None
html5lib: None
sqlalchemy: 1.2.17
pymysql: 0.9.3
psycopg2: None
jinja2: None
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
gcsfs: None

The text was updated successfully, but these errors were encountered:

TomAugspurger · 2019-01-28T23:55:38Z

Can you step through the traceback and check where things break? It may be difficult for someone other than yourself to debug.

Shellcat-Zero · 2019-01-29T00:09:21Z

@TomAugspurger I'll see what I can do. It will likely be a few days or more before I can delve deeper into this problem.

fordmustang111 · 2019-01-29T06:34:27Z

Hello,

Not sure if this will help or not but I am having the same issue. Everything was working flawlessly until I updated to 0.24.0. The error is below:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/pandas/compat/__init__.py", line 51, in <module>
    import __builtin__ as builtins
ModuleNotFoundError: No module named '__builtin__'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "address.py", line 1, in <module>
    import pandas
  File "/usr/local/lib/python3.6/dist-packages/pandas/__init__.py", line 23, in <module>
    from pandas.compat.numpy import *
  File "/usr/local/lib/python3.6/dist-packages/pandas/compat/__init__.py", line 64, in <module>
    import http.client as httplib
  File "/usr/lib/python3.6/http/client.py", line 71, in <module>
    import email.parser
  File "/usr/lib/python3.6/email/parser.py", line 12, in <module>
    from email.feedparser import FeedParser, BytesFeedParser
  File "/usr/lib/python3.6/email/feedparser.py", line 27, in <module>
    from email._policybase import compat32
  File "/usr/lib/python3.6/email/_policybase.py", line 9, in <module>
    from email.utils import _has_surrogates
  File "/usr/lib/python3.6/email/utils.py", line 33, in <module>
    from email._parseaddr import quote
  File "/usr/lib/python3.6/email/_parseaddr.py", line 16, in <module>
    import time, calendar
  File "/mnt/ntfs/backup_scripts/calendar.py", line 60, in <module>
    results = pandas.read_sql_query(query, conn)
AttributeError: module 'pandas' has no attribute 'read_sql_query'
Error in sys.excepthook:
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/pandas/compat/__init__.py", line 51, in <module>
    import __builtin__ as builtins
ModuleNotFoundError: No module named '__builtin__'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3/dist-packages/apport_python_hook.py", line 63, in apport_excepthook
    from apport.fileutils import likely_packaged, get_recent_crashes
  File "/usr/lib/python3/dist-packages/apport/__init__.py", line 5, in <module>
    from apport.report import Report
  File "/usr/lib/python3/dist-packages/apport/report.py", line 21, in <module>
    from urllib.request import urlopen
  File "/usr/lib/python3.6/urllib/request.py", line 88, in <module>
    import http.client
  File "/usr/lib/python3.6/http/client.py", line 71, in <module>
    import email.parser
  File "/usr/lib/python3.6/email/parser.py", line 12, in <module>
    from email.feedparser import FeedParser, BytesFeedParser
  File "/usr/lib/python3.6/email/feedparser.py", line 27, in <module>
    from email._policybase import compat32
  File "/usr/lib/python3.6/email/_policybase.py", line 9, in <module>
    from email.utils import _has_surrogates
  File "/usr/lib/python3.6/email/utils.py", line 33, in <module>
    from email._parseaddr import quote
  File "/usr/lib/python3.6/email/_parseaddr.py", line 16, in <module>
    import time, calendar
  File "/mnt/ntfs/backup_scripts/calendar.py", line 1, in <module>
    import pandas
  File "/usr/local/lib/python3.6/dist-packages/pandas/__init__.py", line 23, in <module>
    from pandas.compat.numpy import *
  File "/usr/local/lib/python3.6/dist-packages/pandas/compat/__init__.py", line 64, in <module>
    import http.client as httplib
AttributeError: module 'http' has no attribute 'client'

Original exception was:
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/pandas/compat/__init__.py", line 51, in <module>
    import __builtin__ as builtins
ModuleNotFoundError: No module named '__builtin__'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "address.py", line 1, in <module>
    import pandas
  File "/usr/local/lib/python3.6/dist-packages/pandas/__init__.py", line 23, in <module>
    from pandas.compat.numpy import *
  File "/usr/local/lib/python3.6/dist-packages/pandas/compat/__init__.py", line 64, in <module>
    import http.client as httplib
  File "/usr/lib/python3.6/http/client.py", line 71, in <module>
    import email.parser
  File "/usr/lib/python3.6/email/parser.py", line 12, in <module>
    from email.feedparser import FeedParser, BytesFeedParser
  File "/usr/lib/python3.6/email/feedparser.py", line 27, in <module>
    from email._policybase import compat32
  File "/usr/lib/python3.6/email/_policybase.py", line 9, in <module>
    from email.utils import _has_surrogates
  File "/usr/lib/python3.6/email/utils.py", line 33, in <module>
    from email._parseaddr import quote
  File "/usr/lib/python3.6/email/_parseaddr.py", line 16, in <module>
    import time, calendar
  File "/mnt/ntfs/backup_scripts/calendar.py", line 60, in <module>
    results = pandas.read_sql_query(query, conn)
AttributeError: module 'pandas' has no attribute 'read_sql_query'

TomAugspurger · 2019-01-29T13:08:26Z

@fordmustang111 that looks like a different error. It seems like your install of pandas didn't go correctly. Can you reproduce in a new environment? Do you have a module named pandas.py somewhere on your python path?

jorisvandenbossche · 2019-01-29T13:40:59Z

On the original issue, from looking at that error traceback and at the diff of the sql code between 0.23.4 and 0.24.0, I assume it is related to this change: https://github.com/pandas-dev/pandas/pull/23004/files#diff-b41f9fd042c423682f8e4c4d808dbe64R385

pymysql seems to be raising an InternalError, while we are only catching ImportError and AttributeError

@Shellcat-Zero Thanks for the report!

It should be an easy fix we can include in 0.24.1, main question is if we can also test it (I think we should be able to as we have pymysql tests)

TomAugspurger · 2019-01-29T15:55:32Z

@Shellcat-Zero we're releasing 0.24.1 soonish (as early as tomorrow perhaps), so if you're able to construct a test quickly that'd be great.

susodapop · 2019-01-29T18:27:07Z

I bumped into this today.

Python 3.7.0
MSSQL via Python-TDS
SQLAlchemy==1.2.7
PANDAS==0.24

Sample query:

SELECT TOP 1 * FROM oe_hdr

I called it in Python like this:

engine = sqlalchemy.create_engine( $(my-connection-string) )
query = sqlalchemy.text("SELECT TOP 1 * FROM oe_hdr")
df = pandas.read_sql(query, engine)

The problem I determined is that read_sql was not recognizing my SQLAlchemy text() construct as a query and therefore assumed that it's contents were instead a table name. It was emitting this SQL to the server:

SELECT [INFORMATION_SCHEMA].[COLUMNS].[TABLE_SCHEMA],
	 [INFORMATION_SCHEMA].[COLUMNS].[TABLE_NAME],
	 [INFORMATION_SCHEMA].[COLUMNS].[COLUMN_NAME],
	 [INFORMATION_SCHEMA].[COLUMNS].[IS_NULLABLE],
	 [INFORMATION_SCHEMA].[COLUMNS].[DATA_TYPE],
	 [INFORMATION_SCHEMA].[COLUMNS].[ORDINAL_POSITION],
	 [INFORMATION_SCHEMA].[COLUMNS].[CHARACTER_MAXIMUM_LENGTH],
	 [INFORMATION_SCHEMA].[COLUMNS].[NUMERIC_PRECISION],
	 [INFORMATION_SCHEMA].[COLUMNS].[NUMERIC_SCALE],
	 [INFORMATION_SCHEMA].[COLUMNS].[COLUMN_DEFAULT],
	 [INFORMATION_SCHEMA].[COLUMNS].[COLLATION_NAME] 
FROM
	[INFORMATION_SCHEMA].[COLUMNS] 
WHERE
	[INFORMATION_SCHEMA].[COLUMNS].[TABLE_NAME] = SELECT TOP 1 FROM oe_hdr

Notice that it includes the entire contents of my query as the TABLE_NAME in this schema query.

Shellcat-Zero · 2019-01-29T21:20:55Z

@TomAugspurger When you say "construct a test", is that something different from what I provided in my initial report? If not, I most definitely have tests ready to run for you.

TomAugspurger · 2019-01-29T21:23:39Z

A unittest similar to the others in pandas/tests/io/test_sql.py that reproduces your error, and is fixed by @jorisvandenbossche's suggestion in
#24988 (comment)

jorisvandenbossche · 2019-01-30T09:45:31Z

I did a quick try to create a test case that is failing with current master, but didn't succeed (https://github.com/pandas-dev/pandas/compare/master...jorisvandenbossche:read-sql-regression?expand=1 is still green on travis, don't have mysql locally)

So I would propose to at least already fix the regression without a test?

jorisvandenbossche · 2019-01-30T09:56:15Z

I opened #25024 with just the fix.

@Shellcat-Zero @susodapop could you try if the fix in #25024 resolved the problem? (it's a 1-line change that you could edit in the installed pandas source, not ideal but should work for giving feedback)

When you say "construct a test", is that something different from what I provided in my initial report? If not, I most definitely have tests ready to run for you.

Something like your initial report is fine, but, the main problem is that we need to get a failing test (without the fix) on Travis CI. That's what I tried in https://github.com/pandas-dev/pandas/compare/master...jorisvandenbossche:read-sql-regression?expand=1 but without luck. So there seems to be something different about the query you are doing.

jorisvandenbossche · 2019-01-31T16:05:35Z

@Shellcat-Zero @susodapop would any body be able to confirm the fix? That would be very welcome! (I can't reproduce it locally)

susodapop · 2019-02-01T15:05:20Z

Hi @jorisvandenbossche -- I have confirmed that this fixes the problem on my end. I updated my local version of Pandas with the single change and now read_sql behaves properly. Sorry for the delay!

jorisvandenbossche · 2019-02-01T15:10:37Z

Thanks for confirming!

TomAugspurger added the IO SQL to_sql, read_sql, read_sql_query label Jan 28, 2019

jorisvandenbossche added this to the 0.24.1 milestone Jan 29, 2019

jorisvandenbossche added the Regression Functionality that used to work in a prior pandas version label Jan 29, 2019

jorisvandenbossche mentioned this issue Jan 30, 2019

REGR: fix read_sql delegation for queries on MySQL/pymysql #25024

Merged

jorisvandenbossche closed this as completed in #25024 Jan 31, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Version 0.24.0 breaks read_sql compatibility with read_sql_query #24988

Version 0.24.0 breaks read_sql compatibility with read_sql_query #24988

Shellcat-Zero commented Jan 28, 2019 •

edited

Loading

[paste the output of `pd.show_versions()` here below this line]
INSTALLED VERSIONS

TomAugspurger commented Jan 28, 2019

Shellcat-Zero commented Jan 29, 2019

fordmustang111 commented Jan 29, 2019 •

edited by TomAugspurger

Loading

TomAugspurger commented Jan 29, 2019

jorisvandenbossche commented Jan 29, 2019

TomAugspurger commented Jan 29, 2019

susodapop commented Jan 29, 2019

Shellcat-Zero commented Jan 29, 2019

TomAugspurger commented Jan 29, 2019

jorisvandenbossche commented Jan 30, 2019

jorisvandenbossche commented Jan 30, 2019

jorisvandenbossche commented Jan 31, 2019

susodapop commented Feb 1, 2019

jorisvandenbossche commented Feb 1, 2019 via email

Version 0.24.0 breaks read_sql compatibility with read_sql_query #24988

Version 0.24.0 breaks read_sql compatibility with read_sql_query #24988

Comments

Shellcat-Zero commented Jan 28, 2019 • edited Loading

Problem description

Output of pd.show_versions()

[paste the output of pd.show_versions() here below this line] INSTALLED VERSIONS

TomAugspurger commented Jan 28, 2019

Shellcat-Zero commented Jan 29, 2019

fordmustang111 commented Jan 29, 2019 • edited by TomAugspurger Loading

TomAugspurger commented Jan 29, 2019

jorisvandenbossche commented Jan 29, 2019

TomAugspurger commented Jan 29, 2019

susodapop commented Jan 29, 2019

Shellcat-Zero commented Jan 29, 2019

TomAugspurger commented Jan 29, 2019

jorisvandenbossche commented Jan 30, 2019

jorisvandenbossche commented Jan 30, 2019

jorisvandenbossche commented Jan 31, 2019

susodapop commented Feb 1, 2019

jorisvandenbossche commented Feb 1, 2019 via email

Shellcat-Zero commented Jan 28, 2019 •

edited

Loading

Output of `pd.show_versions()`

[paste the output of `pd.show_versions()` here below this line]
INSTALLED VERSIONS

fordmustang111 commented Jan 29, 2019 •

edited by TomAugspurger

Loading