diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index a26da75d921ef..b5a18a1fd2674 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -25,6 +25,7 @@ "s3fs": "0.4.0", "scipy": "1.2.0", "sqlalchemy": "1.2.8", + "sql_metadata": None, "tables": "3.5.1", "tabulate": "0.8.7", "xarray": "0.12.3", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2736560def2cb..b2f6893eb0752 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -437,6 +437,7 @@ 3 bar 8 """ +# DataFrame helper functions # ----------------------------------------------------------------------- # DataFrame class @@ -566,7 +567,6 @@ def __init__( dtype: Dtype | None = None, copy: bool | None = None, ): - if copy is None: if isinstance(data, dict) or data is None: # retain pre-GH#38939 default behavior @@ -669,6 +669,16 @@ def __init__( # For data is list-like, or Iterable (will consume into list) elif is_list_like(data): if not isinstance(data, (abc.Sequence, ExtensionArray)): + # For data is a sqlalchemy query, extract column names + if str(type(data)) == "": + sql_mt = import_optional_dependency("sql_metadata") + # Extract column names using sql_metadata + columns = list(sql_mt.get_query_columns(str(data))) + # Sanitize column names and remove everything before . character + for i in range(len(columns)): + if columns[i].find(".") != -1: + columns[i] = columns[i][columns[i].find(".") + 1 :] + data = list(data) if len(data) > 0: if is_dataclass(data[0]):