Skip to content

CLN: Add typing for dtype argument in io/sql.py #38680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 29, 2020
Merged
48 changes: 33 additions & 15 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ def read_sql_query(
Data type for data or columns. E.g. np.float64 or
{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}

.. versionadded:: 1.3.0

Returns
-------
DataFrame or Iterator[DataFrame]
Expand Down Expand Up @@ -609,7 +611,7 @@ def to_sql(
index: bool = True,
index_label=None,
chunksize: Optional[int] = None,
dtype=None,
dtype: Optional[DtypeArg] = None,
method: Optional[str] = None,
) -> None:
"""
Expand Down Expand Up @@ -768,7 +770,7 @@ def __init__(
index_label=None,
schema=None,
keys=None,
dtype=None,
dtype: Optional[DtypeArg] = None,
):
self.name = name
self.pd_sql = pandas_sql_engine
Expand Down Expand Up @@ -1108,9 +1110,9 @@ def _harmonize_columns(self, parse_dates=None):

def _sqlalchemy_type(self, col):

dtype = self.dtype or {}
if col.name in dtype:
return self.dtype[col.name]
dtype: DtypeArg = self.dtype or {}
if isinstance(dtype, dict) and col.name in dtype:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shoudl is is_dict_like

return dtype[col.name]

# Infer type of column, while ignoring missing values.
# Needed for inserting typed data containing NULLs, GH 8778.
Expand Down Expand Up @@ -1209,7 +1211,18 @@ def read_sql(self, *args, **kwargs):
"connectable or sqlite connection"
)

def to_sql(self, *args, **kwargs):
def to_sql(
self,
frame,
name,
if_exists="fail",
index=True,
index_label=None,
schema=None,
chunksize=None,
dtype: Optional[DtypeArg] = None,
method=None,
):
raise ValueError(
"PandasSQL must be created with an SQLAlchemy "
"connectable or sqlite connection"
Expand Down Expand Up @@ -1436,7 +1449,7 @@ def to_sql(
index_label=None,
schema=None,
chunksize=None,
dtype=None,
dtype: Optional[DtypeArg] = None,
method=None,
):
"""
Expand Down Expand Up @@ -1483,7 +1496,7 @@ def to_sql(
if dtype and not is_dict_like(dtype):
dtype = {col_name: dtype for col_name in frame}

if dtype is not None:
if dtype is not None and isinstance(dtype, dict):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting you have to do this as L1496 explicity converts this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so is_dict_like will pass thrue a Series for example which will fail in other places which we are not likely testing. I would change L1499 to

if dtype is not None:
     if not is_dict_like(...):
       ..
    else:
        dtype = dict(dtype)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting you have to do this as L1496 explicity converts this.

Yes, the problem is that we define the type of dtype already in the function at L1452. After that you cannot change the type of dtype, even not by overwriting the variable. isinstance checks are the only way to narrow it down. So the provided solution (see below) will not work, since we are overwriting the dtype variable

        if dtype is not None:
            if not is_dict_like(dtype):
                dtype = {col_name: dtype for col_name in frame}
            else:
                dtype = dict(dtype) # This line gives a mypy error

Mypy error

error: Argument 1 to "dict" has incompatible type "Union[ExtensionDtype, Any, str, Type[object], Dict[Optional[Hashable], Union[ExtensionDtype, str, Any, Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object]]]]"; expected "Mapping[Any, Any]"  [arg-type]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isinstance checks are the only way to narrow it down.

also can use cast or assert. In this case, is_dict_like function will not narrow types, so ok to use a cast following the is_* call. so something like

            if is_dict_like(dtype):
                dtype = cast(dict, dtype)
                ...
            else:
                ...

can always replace dict with Mapping, or union if more than dict is accepted for dict-like parameter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @simonjayhawkins did not know the cast function also helps in narrowing the type down. With the cast function it works.

from sqlalchemy.types import TypeEngine, to_instance

for col, my_type in dtype.items():
Expand Down Expand Up @@ -1569,7 +1582,7 @@ def _create_sql_schema(
frame: DataFrame,
table_name: str,
keys: Optional[List[str]] = None,
dtype: Optional[dict] = None,
dtype: Optional[DtypeArg] = None,
schema: Optional[str] = None,
):
table = SQLTable(
Expand Down Expand Up @@ -1740,8 +1753,8 @@ def _create_table_setup(self):
return create_stmts

def _sql_type_name(self, col):
dtype = self.dtype or {}
if col.name in dtype:
dtype: DtypeArg = self.dtype or {}
if isinstance(dtype, dict) and col.name in dtype:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use is_dict_like

return dtype[col.name]

# Infer type of column, while ignoring missing values.
Expand Down Expand Up @@ -1901,7 +1914,7 @@ def to_sql(
index_label=None,
schema=None,
chunksize=None,
dtype=None,
dtype: Optional[DtypeArg] = None,
method=None,
):
"""
Expand Down Expand Up @@ -1947,7 +1960,7 @@ def to_sql(
if dtype and not is_dict_like(dtype):
dtype = {col_name: dtype for col_name in frame}

if dtype is not None:
if dtype is not None and isinstance(dtype, dict):
for col, my_type in dtype.items():
if not isinstance(my_type, str):
raise ValueError(f"{col} ({my_type}) not a string")
Expand Down Expand Up @@ -1986,7 +1999,7 @@ def _create_sql_schema(
frame,
table_name: str,
keys=None,
dtype=None,
dtype: Optional[DtypeArg] = None,
schema: Optional[str] = None,
):
table = SQLiteTable(
Expand All @@ -2002,7 +2015,12 @@ def _create_sql_schema(


def get_schema(
frame, name: str, keys=None, con=None, dtype=None, schema: Optional[str] = None
frame,
name: str,
keys=None,
con=None,
dtype: Optional[DtypeArg] = None,
schema: Optional[str] = None,
):
"""
Get the SQL db table schema for the given frame.
Expand Down