Skip to content

Commit fddbe55

Browse files
committed
Merge branch 'master' into nui-regression
2 parents e66128c + d481c13 commit fddbe55

File tree

11 files changed

+186
-66
lines changed

11 files changed

+186
-66
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Other enhancements
4444
- Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
4545
- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`)
4646
- :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`)
47+
- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
4748

4849
.. ---------------------------------------------------------------------------
4950

pandas/core/internals/blocks.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,15 +1063,12 @@ def putmask(self, mask, new, axis: int = 0) -> List["Block"]:
10631063
# We only get here for non-Extension Blocks, so _try_coerce_args
10641064
# is only relevant for DatetimeBlock and TimedeltaBlock
10651065
if self.dtype.kind in ["m", "M"]:
1066-
blk = self
1067-
if not inplace:
1068-
blk = self.copy()
1069-
arr = blk.array_values()
1066+
arr = self.array_values()
10701067
arr = cast("NDArrayBackedExtensionArray", arr)
10711068
if transpose:
10721069
arr = arr.T
10731070
arr.putmask(mask, new)
1074-
return [blk]
1071+
return [self]
10751072

10761073
if lib.is_scalar(new):
10771074
new = convert_scalar_for_putitemlike(new, self.values.dtype)

pandas/io/sql.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,7 @@ def _sqlalchemy_type(self, col):
11241124
DateTime,
11251125
Float,
11261126
Integer,
1127+
SmallInteger,
11271128
Text,
11281129
Time,
11291130
)
@@ -1154,8 +1155,13 @@ def _sqlalchemy_type(self, col):
11541155
else:
11551156
return Float(precision=53)
11561157
elif col_type == "integer":
1157-
if col.dtype == "int32":
1158+
# GH35076 Map pandas integer to optimal SQLAlchemy integer type
1159+
if col.dtype.name.lower() in ("int8", "uint8", "int16"):
1160+
return SmallInteger
1161+
elif col.dtype.name.lower() in ("uint16", "int32"):
11581162
return Integer
1163+
elif col.dtype.name.lower() == "uint64":
1164+
raise ValueError("Unsigned 64 bit integer datatype is not supported")
11591165
else:
11601166
return BigInteger
11611167
elif col_type == "boolean":

pandas/plotting/_matplotlib/core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,8 +1232,8 @@ def _get_stacked_values(cls, ax: "Axes", stacking_id, values, label):
12321232

12331233
raise ValueError(
12341234
"When stacked is True, each column must be either "
1235-
"all positive or negative."
1236-
f"{label} contains both positive and negative values"
1235+
"all positive or all negative. "
1236+
f"Column '{label}' contains both positive and negative values"
12371237
)
12381238

12391239
@classmethod
@@ -1555,7 +1555,7 @@ class PiePlot(MPLPlot):
15551555
def __init__(self, data, kind=None, **kwargs):
15561556
data = data.fillna(value=0)
15571557
if (data < 0).any().any():
1558-
raise ValueError(f"{kind} doesn't allow negative values")
1558+
raise ValueError(f"{self._kind} plot doesn't allow negative values")
15591559
MPLPlot.__init__(self, data, kind=kind, **kwargs)
15601560

15611561
def _args_adjust(self):

pandas/tests/io/test_sql.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,45 @@ def test_sqlalchemy_type_mapping(self):
11601160
# GH 9086: TIMESTAMP is the suggested type for datetimes with timezones
11611161
assert isinstance(table.table.c["time"].type, sqltypes.TIMESTAMP)
11621162

1163+
@pytest.mark.parametrize(
1164+
"integer, expected",
1165+
[
1166+
("int8", "SMALLINT"),
1167+
("Int8", "SMALLINT"),
1168+
("uint8", "SMALLINT"),
1169+
("UInt8", "SMALLINT"),
1170+
("int16", "SMALLINT"),
1171+
("Int16", "SMALLINT"),
1172+
("uint16", "INTEGER"),
1173+
("UInt16", "INTEGER"),
1174+
("int32", "INTEGER"),
1175+
("Int32", "INTEGER"),
1176+
("uint32", "BIGINT"),
1177+
("UInt32", "BIGINT"),
1178+
("int64", "BIGINT"),
1179+
("Int64", "BIGINT"),
1180+
(int, "BIGINT" if np.dtype(int).name == "int64" else "INTEGER"),
1181+
],
1182+
)
1183+
def test_sqlalchemy_integer_mapping(self, integer, expected):
1184+
# GH35076 Map pandas integer to optimal SQLAlchemy integer type
1185+
df = DataFrame([0, 1], columns=["a"], dtype=integer)
1186+
db = sql.SQLDatabase(self.conn)
1187+
table = sql.SQLTable("test_type", db, frame=df)
1188+
1189+
result = str(table.table.c.a.type)
1190+
assert result == expected
1191+
1192+
@pytest.mark.parametrize("integer", ["uint64", "UInt64"])
1193+
def test_sqlalchemy_integer_overload_mapping(self, integer):
1194+
# GH35076 Map pandas integer to optimal SQLAlchemy integer type
1195+
df = DataFrame([0, 1], columns=["a"], dtype=integer)
1196+
db = sql.SQLDatabase(self.conn)
1197+
with pytest.raises(
1198+
ValueError, match="Unsigned 64 bit integer datatype is not supported"
1199+
):
1200+
sql.SQLTable("test_type", db, frame=df)
1201+
11631202
def test_database_uri_string(self):
11641203

11651204
# Test read_sql and .to_sql method with a database URI (GH10654)

pandas/tests/plotting/frame/test_frame.py

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
""" Test cases for DataFrame.plot """
2-
32
from datetime import date, datetime
43
import itertools
4+
import re
55
import string
66
import warnings
77

@@ -358,10 +358,10 @@ def test_negative_log(self):
358358
index=list(string.ascii_letters[:6]),
359359
columns=["x", "y", "z", "four"],
360360
)
361-
362-
with pytest.raises(ValueError):
361+
msg = "Log-y scales are not supported in area plot"
362+
with pytest.raises(ValueError, match=msg):
363363
df.plot.area(logy=True)
364-
with pytest.raises(ValueError):
364+
with pytest.raises(ValueError, match=msg):
365365
df.plot.area(loglog=True)
366366

367367
def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
@@ -406,7 +406,12 @@ def test_line_area_stacked(self):
406406
self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
407407

408408
_check_plot_works(mixed_df.plot, stacked=False)
409-
with pytest.raises(ValueError):
409+
msg = (
410+
"When stacked is True, each column must be either all positive or "
411+
"all negative. Column 'w' contains both positive and negative "
412+
"values"
413+
)
414+
with pytest.raises(ValueError, match=msg):
410415
mixed_df.plot(stacked=True)
411416

412417
# Use an index with strictly positive values, preventing
@@ -650,9 +655,11 @@ def test_plot_scatter(self):
650655
_check_plot_works(df.plot.scatter, x="x", y="y")
651656
_check_plot_works(df.plot.scatter, x=1, y=2)
652657

653-
with pytest.raises(TypeError):
658+
msg = re.escape("scatter() missing 1 required positional argument: 'y'")
659+
with pytest.raises(TypeError, match=msg):
654660
df.plot.scatter(x="x")
655-
with pytest.raises(TypeError):
661+
msg = re.escape("scatter() missing 1 required positional argument: 'x'")
662+
with pytest.raises(TypeError, match=msg):
656663
df.plot.scatter(y="y")
657664

658665
# GH 6951
@@ -850,8 +857,9 @@ def test_boxplot_return_type(self):
850857
index=list(string.ascii_letters[:6]),
851858
columns=["one", "two", "three", "four"],
852859
)
853-
with pytest.raises(ValueError):
854-
df.plot.box(return_type="NOTATYPE")
860+
msg = "return_type must be {None, 'axes', 'dict', 'both'}"
861+
with pytest.raises(ValueError, match=msg):
862+
df.plot.box(return_type="not_a_type")
855863

856864
result = df.plot.box(return_type="dict")
857865
self._check_box_return_type(result, "dict")
@@ -1309,44 +1317,47 @@ def test_partially_invalid_plot_data(self):
13091317
df = DataFrame(np.random.randn(10, 2), dtype=object)
13101318
df[np.random.rand(df.shape[0]) > 0.5] = "a"
13111319
for kind in plotting.PlotAccessor._common_kinds:
1312-
13131320
msg = "no numeric data to plot"
13141321
with pytest.raises(TypeError, match=msg):
13151322
df.plot(kind=kind)
13161323

13171324
with tm.RNGContext(42):
13181325
# area plot doesn't support positive/negative mixed data
1319-
kinds = ["area"]
13201326
df = DataFrame(np.random.rand(10, 2), dtype=object)
13211327
df[np.random.rand(df.shape[0]) > 0.5] = "a"
1322-
for kind in kinds:
1323-
with pytest.raises(TypeError):
1324-
df.plot(kind=kind)
1328+
with pytest.raises(TypeError, match="no numeric data to plot"):
1329+
df.plot(kind="area")
13251330

13261331
def test_invalid_kind(self):
13271332
df = DataFrame(np.random.randn(10, 2))
1328-
with pytest.raises(ValueError):
1329-
df.plot(kind="aasdf")
1333+
msg = "invalid_plot_kind is not a valid plot kind"
1334+
with pytest.raises(ValueError, match=msg):
1335+
df.plot(kind="invalid_plot_kind")
13301336

13311337
@pytest.mark.parametrize(
13321338
"x,y,lbl",
13331339
[
13341340
(["B", "C"], "A", "a"),
13351341
(["A"], ["B", "C"], ["b", "c"]),
1336-
("A", ["B", "C"], "badlabel"),
13371342
],
13381343
)
13391344
def test_invalid_xy_args(self, x, y, lbl):
13401345
# GH 18671, 19699 allows y to be list-like but not x
13411346
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
1342-
with pytest.raises(ValueError):
1347+
with pytest.raises(ValueError, match="x must be a label or position"):
13431348
df.plot(x=x, y=y, label=lbl)
13441349

1350+
def test_bad_label(self):
1351+
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
1352+
msg = "label should be list-like and same length as y"
1353+
with pytest.raises(ValueError, match=msg):
1354+
df.plot(x="A", y=["B", "C"], label="bad_label")
1355+
13451356
@pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")])
13461357
def test_invalid_xy_args_dup_cols(self, x, y):
13471358
# GH 18671, 19699 allows y to be list-like but not x
13481359
df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB"))
1349-
with pytest.raises(ValueError):
1360+
with pytest.raises(ValueError, match="x must be a label or position"):
13501361
df.plot(x=x, y=y)
13511362

13521363
@pytest.mark.parametrize(
@@ -1416,7 +1427,8 @@ def test_pie_df(self):
14161427
columns=["X", "Y", "Z"],
14171428
index=["a", "b", "c", "d", "e"],
14181429
)
1419-
with pytest.raises(ValueError):
1430+
msg = "pie requires either y column or 'subplots=True'"
1431+
with pytest.raises(ValueError, match=msg):
14201432
df.plot.pie()
14211433

14221434
ax = _check_plot_works(df.plot.pie, y="Y")
@@ -1520,11 +1532,11 @@ def test_errorbar_plot(self):
15201532
ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr)
15211533
self._check_has_errorbars(ax, xerr=0, yerr=1)
15221534

1523-
with pytest.raises(ValueError):
1535+
with tm.external_error_raised(ValueError):
15241536
df.plot(yerr=np.random.randn(11))
15251537

15261538
df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12})
1527-
with pytest.raises((ValueError, TypeError)):
1539+
with tm.external_error_raised(TypeError):
15281540
df.plot(yerr=df_err)
15291541

15301542
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
@@ -1647,7 +1659,10 @@ def test_errorbar_asymmetrical(self):
16471659
expected_0_0 = err[0, :, 0] * np.array([-1, 1])
16481660
tm.assert_almost_equal(yerr_0_0, expected_0_0)
16491661

1650-
with pytest.raises(ValueError):
1662+
msg = re.escape(
1663+
"Asymmetrical error bars should be provided with the shape (3, 2, 5)"
1664+
)
1665+
with pytest.raises(ValueError, match=msg):
16511666
df.plot(yerr=err.T)
16521667

16531668
tm.close()
@@ -1837,9 +1852,10 @@ def test_memory_leak(self):
18371852
tm.close()
18381853
# force a garbage collection
18391854
gc.collect()
1855+
msg = "weakly-referenced object no longer exists"
18401856
for key in results:
18411857
# check that every plot was collected
1842-
with pytest.raises(ReferenceError):
1858+
with pytest.raises(ReferenceError, match=msg):
18431859
# need to actually access something to get an error
18441860
results[key].lines
18451861

@@ -2095,7 +2111,7 @@ def test_plot_no_rows(self):
20952111

20962112
def test_plot_no_numeric_data(self):
20972113
df = DataFrame(["a", "b", "c"])
2098-
with pytest.raises(TypeError):
2114+
with pytest.raises(TypeError, match="no numeric data to plot"):
20992115
df.plot()
21002116

21012117
def test_missing_markers_legend(self):

pandas/tests/plotting/frame/test_frame_color.py

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
""" Test cases for DataFrame.plot """
2-
2+
import re
33
import warnings
44

55
import numpy as np
@@ -63,7 +63,7 @@ def test_rgb_tuple_color(self, color):
6363

6464
def test_color_empty_string(self):
6565
df = DataFrame(np.random.randn(10, 2))
66-
with pytest.raises(ValueError):
66+
with pytest.raises(ValueError, match="Invalid color argument:"):
6767
df.plot(color="")
6868

6969
def test_color_and_style_arguments(self):
@@ -79,7 +79,12 @@ def test_color_and_style_arguments(self):
7979
assert color == ["red", "black"]
8080
# passing both 'color' and 'style' arguments should not be allowed
8181
# if there is a color symbol in the style strings:
82-
with pytest.raises(ValueError):
82+
msg = (
83+
"Cannot pass 'style' string with a color symbol and 'color' keyword "
84+
"argument. Please use one or the other or pass 'style' without a color "
85+
"symbol"
86+
)
87+
with pytest.raises(ValueError, match=msg):
8388
df.plot(color=["red", "black"], style=["k-", "r--"])
8489

8590
@pytest.mark.parametrize(
@@ -217,7 +222,7 @@ def test_scatter_with_c_column_name_with_colors(self, cmap):
217222

218223
def test_scatter_colors(self):
219224
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
220-
with pytest.raises(TypeError):
225+
with pytest.raises(TypeError, match="Specify exactly one of `c` and `color`"):
221226
df.plot.scatter(x="a", y="b", c="c", color="green")
222227

223228
default_colors = self._unpack_cycler(self.plt.rcParams)
@@ -585,7 +590,11 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None):
585590
bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict")
586591
_check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456")
587592

588-
with pytest.raises(ValueError):
593+
msg = re.escape(
594+
"color dict contains invalid key 'xxxx'. The key must be either "
595+
"['boxes', 'whiskers', 'medians', 'caps']"
596+
)
597+
with pytest.raises(ValueError, match=msg):
589598
# Color contains invalid key results in ValueError
590599
df.plot.box(color={"boxes": "red", "xxxx": "blue"})
591600

@@ -641,6 +650,36 @@ def test_colors_of_columns_with_same_name(self):
641650

642651
def test_invalid_colormap(self):
643652
df = DataFrame(np.random.randn(3, 2), columns=["A", "B"])
644-
645-
with pytest.raises(ValueError):
653+
msg = (
654+
"'invalid_colormap' is not a valid value for name; supported values are "
655+
"'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', "
656+
"'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', "
657+
"'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', "
658+
"'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', "
659+
"'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', "
660+
"'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', "
661+
"'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', "
662+
"'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', "
663+
"'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', "
664+
"'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', "
665+
"'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', "
666+
"'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', "
667+
"'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', "
668+
"'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', "
669+
"'coolwarm_r', 'copper', 'copper_r', 'cubehelix', 'cubehelix_r', "
670+
"'flag', 'flag_r', 'gist_earth', 'gist_earth_r', 'gist_gray', "
671+
"'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', "
672+
"'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', "
673+
"'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', "
674+
"'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', "
675+
"'hsv_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', "
676+
"'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', "
677+
"'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', "
678+
"'rainbow_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', "
679+
"'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', "
680+
"'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', "
681+
"'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', "
682+
"'twilight_shifted_r', 'viridis', 'viridis_r', 'winter', 'winter_r'"
683+
)
684+
with pytest.raises(ValueError, match=msg):
646685
df.plot(colormap="invalid_colormap")

pandas/tests/plotting/test_backend.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ def test_setting_backend_without_plot_raises():
9595

9696
@td.skip_if_mpl
9797
def test_no_matplotlib_ok():
98-
with pytest.raises(ImportError):
98+
msg = (
99+
'matplotlib is required for plotting when the default backend "matplotlib" is '
100+
"selected."
101+
)
102+
with pytest.raises(ImportError, match=msg):
99103
pandas.plotting._core._get_plot_backend("matplotlib")
100104

101105

0 commit comments

Comments
 (0)