Skip to content

Commit 08230db

Browse files
ENH: Map pandas integer to optimal SQLAlchemy integer type (GH35076) (#38548)
* BUG: Map pandas integer to optimal SQLAlchemy integer type (GH35076) * Add conditional for default 32/64 bit integer size * Check precision of int dtype * Add whatsnew entry * Update whatsnew
1 parent 12cea49 commit 08230db

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Other enhancements
4444
- Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
4545
- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`)
4646
- :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`)
47+
- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
4748

4849
.. ---------------------------------------------------------------------------
4950

pandas/io/sql.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,7 @@ def _sqlalchemy_type(self, col):
11241124
DateTime,
11251125
Float,
11261126
Integer,
1127+
SmallInteger,
11271128
Text,
11281129
Time,
11291130
)
@@ -1154,8 +1155,13 @@ def _sqlalchemy_type(self, col):
11541155
else:
11551156
return Float(precision=53)
11561157
elif col_type == "integer":
1157-
if col.dtype == "int32":
1158+
# GH35076 Map pandas integer to optimal SQLAlchemy integer type
1159+
if col.dtype.name.lower() in ("int8", "uint8", "int16"):
1160+
return SmallInteger
1161+
elif col.dtype.name.lower() in ("uint16", "int32"):
11581162
return Integer
1163+
elif col.dtype.name.lower() == "uint64":
1164+
raise ValueError("Unsigned 64 bit integer datatype is not supported")
11591165
else:
11601166
return BigInteger
11611167
elif col_type == "boolean":

pandas/tests/io/test_sql.py

+39
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,45 @@ def test_sqlalchemy_type_mapping(self):
11601160
# GH 9086: TIMESTAMP is the suggested type for datetimes with timezones
11611161
assert isinstance(table.table.c["time"].type, sqltypes.TIMESTAMP)
11621162

1163+
@pytest.mark.parametrize(
1164+
"integer, expected",
1165+
[
1166+
("int8", "SMALLINT"),
1167+
("Int8", "SMALLINT"),
1168+
("uint8", "SMALLINT"),
1169+
("UInt8", "SMALLINT"),
1170+
("int16", "SMALLINT"),
1171+
("Int16", "SMALLINT"),
1172+
("uint16", "INTEGER"),
1173+
("UInt16", "INTEGER"),
1174+
("int32", "INTEGER"),
1175+
("Int32", "INTEGER"),
1176+
("uint32", "BIGINT"),
1177+
("UInt32", "BIGINT"),
1178+
("int64", "BIGINT"),
1179+
("Int64", "BIGINT"),
1180+
(int, "BIGINT" if np.dtype(int).name == "int64" else "INTEGER"),
1181+
],
1182+
)
1183+
def test_sqlalchemy_integer_mapping(self, integer, expected):
1184+
# GH35076 Map pandas integer to optimal SQLAlchemy integer type
1185+
df = DataFrame([0, 1], columns=["a"], dtype=integer)
1186+
db = sql.SQLDatabase(self.conn)
1187+
table = sql.SQLTable("test_type", db, frame=df)
1188+
1189+
result = str(table.table.c.a.type)
1190+
assert result == expected
1191+
1192+
@pytest.mark.parametrize("integer", ["uint64", "UInt64"])
1193+
def test_sqlalchemy_integer_overload_mapping(self, integer):
1194+
# GH35076 Map pandas integer to optimal SQLAlchemy integer type
1195+
df = DataFrame([0, 1], columns=["a"], dtype=integer)
1196+
db = sql.SQLDatabase(self.conn)
1197+
with pytest.raises(
1198+
ValueError, match="Unsigned 64 bit integer datatype is not supported"
1199+
):
1200+
sql.SQLTable("test_type", db, frame=df)
1201+
11631202
def test_database_uri_string(self):
11641203

11651204
# Test read_sql and .to_sql method with a database URI (GH10654)

0 commit comments

Comments
 (0)