Skip to content

Commit 6cea61d

Browse files
committed
Don't store object or unicode numpy arrays in figure. Coerce to lists
1 parent 340aed3 commit 6cea61d

File tree

10 files changed

+132
-85
lines changed

10 files changed

+132
-85
lines changed

Diff for: packages/python/plotly/_plotly_utils/basevalidators.py

+25-44
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def to_scalar_or_list(v):
5353
return v
5454

5555

56-
def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
56+
def copy_to_readonly_numpy_array_or_list(v, kind=None, force_numeric=False):
5757
"""
5858
Convert an array-like value into a read-only numpy array
5959
@@ -89,7 +89,7 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
8989

9090
# u: unsigned int, i: signed int, f: float
9191
numeric_kinds = {"u", "i", "f"}
92-
kind_default_dtypes = {"u": "uint32", "i": "int32", "f": "float64", "O": "object"}
92+
kind_default_dtypes = {"u": "uint32", "i": "int32", "f": "float64", "O": "object", "U": "U"}
9393

9494
# Handle pandas Series and Index objects
9595
if pd and isinstance(v, (pd.Series, pd.Index)):
@@ -113,18 +113,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
113113
if not isinstance(v, np.ndarray):
114114
# v has its own logic on how to convert itself into a numpy array
115115
if is_numpy_convertable(v):
116-
return copy_to_readonly_numpy_array(
116+
return copy_to_readonly_numpy_array_or_list(
117117
np.array(v), kind=kind, force_numeric=force_numeric
118118
)
119119
else:
120120
# v is not homogenous array
121-
v_list = [to_scalar_or_list(e) for e in v]
122-
123-
# Lookup dtype for requested kind, if any
124-
dtype = kind_default_dtypes.get(first_kind, None)
125-
126-
# construct new array from list
127-
new_v = np.array(v_list, order="C", dtype=dtype)
121+
return [to_scalar_or_list(e) for e in v]
128122
elif v.dtype.kind in numeric_kinds:
129123
# v is a homogenous numeric array
130124
if kind and v.dtype.kind not in kind:
@@ -135,6 +129,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
135129
else:
136130
# Either no kind was requested or requested kind is satisfied
137131
new_v = np.ascontiguousarray(v.copy())
132+
elif v.dtype.kind == "O":
133+
if kind:
134+
dtype = kind_default_dtypes.get(first_kind, None)
135+
return np.array(v, dtype=dtype)
136+
else:
137+
return v.tolist()
138138
else:
139139
# v is a non-numeric homogenous array
140140
new_v = v.copy()
@@ -149,12 +149,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
149149
if "U" not in kind:
150150
# Force non-numeric arrays to have object type
151151
# --------------------------------------------
152-
# Here we make sure that non-numeric arrays have the object
153-
# datatype. This works around cases like np.array([1, 2, '3']) where
152+
# Here we make sure that non-numeric arrays become lists
153+
# This works around cases like np.array([1, 2, '3']) where
154154
# numpy converts the integers to strings and returns array of dtype
155155
# '<U21'
156156
if new_v.dtype.kind not in ["u", "i", "f", "O", "M"]:
157-
new_v = np.array(v, dtype="object")
157+
return v.tolist()
158158

159159
# Set new array to be read-only
160160
# -----------------------------
@@ -191,7 +191,7 @@ def is_homogeneous_array(v):
191191
if v_numpy.shape == ():
192192
return False
193193
else:
194-
return True
194+
return True # v_numpy.dtype.kind in ["u", "i", "f", "M", "U"]
195195
return False
196196

197197

@@ -393,7 +393,7 @@ def validate_coerce(self, v):
393393
# Pass None through
394394
pass
395395
elif is_homogeneous_array(v):
396-
v = copy_to_readonly_numpy_array(v)
396+
v = copy_to_readonly_numpy_array_or_list(v)
397397
elif is_simple_array(v):
398398
v = to_scalar_or_list(v)
399399
else:
@@ -598,7 +598,7 @@ def validate_coerce(self, v):
598598
self.raise_invalid_elements(invalid_els[:10])
599599

600600
if is_homogeneous_array(v):
601-
v = copy_to_readonly_numpy_array(v)
601+
v = copy_to_readonly_numpy_array_or_list(v)
602602
else:
603603
v = to_scalar_or_list(v)
604604
else:
@@ -754,7 +754,7 @@ def validate_coerce(self, v):
754754
elif self.array_ok and is_homogeneous_array(v):
755755
np = get_module("numpy")
756756
try:
757-
v_array = copy_to_readonly_numpy_array(v, force_numeric=True)
757+
v_array = copy_to_readonly_numpy_array_or_list(v, force_numeric=True)
758758
except (ValueError, TypeError, OverflowError):
759759
self.raise_invalid_val(v)
760760

@@ -881,7 +881,7 @@ def validate_coerce(self, v):
881881
pass
882882
elif self.array_ok and is_homogeneous_array(v):
883883
np = get_module("numpy")
884-
v_array = copy_to_readonly_numpy_array(
884+
v_array = copy_to_readonly_numpy_array_or_list(
885885
v, kind=("i", "u"), force_numeric=True
886886
)
887887

@@ -1042,26 +1042,7 @@ def validate_coerce(self, v):
10421042
if invalid_els:
10431043
self.raise_invalid_elements(invalid_els)
10441044

1045-
if is_homogeneous_array(v):
1046-
np = get_module("numpy")
1047-
1048-
# If not strict, let numpy cast elements to strings
1049-
v = copy_to_readonly_numpy_array(v, kind="U")
1050-
1051-
# Check no_blank
1052-
if self.no_blank:
1053-
invalid_els = v[v == ""][:10].tolist()
1054-
if invalid_els:
1055-
self.raise_invalid_elements(invalid_els)
1056-
1057-
# Check values
1058-
if self.values:
1059-
invalid_inds = np.logical_not(np.isin(v, self.values))
1060-
invalid_els = v[invalid_inds][:10].tolist()
1061-
if invalid_els:
1062-
self.raise_invalid_elements(invalid_els)
1063-
1064-
elif is_simple_array(v):
1045+
if is_simple_array(v) or is_homogeneous_array(v):
10651046
if not self.strict:
10661047
v = [StringValidator.to_str_or_unicode_or_none(e) for e in v]
10671048

@@ -1338,8 +1319,8 @@ def validate_coerce(self, v, should_raise=True):
13381319
# Pass None through
13391320
pass
13401321
elif self.array_ok and is_homogeneous_array(v):
1341-
v = copy_to_readonly_numpy_array(v)
1342-
if self.numbers_allowed() and v.dtype.kind in ["u", "i", "f"]:
1322+
v = copy_to_readonly_numpy_array_or_list(v)
1323+
if not isinstance(v, list) and self.numbers_allowed() and v.dtype.kind in ["u", "i", "f"]:
13431324
# Numbers are allowed and we have an array of numbers.
13441325
# All good
13451326
pass
@@ -1353,9 +1334,9 @@ def validate_coerce(self, v, should_raise=True):
13531334

13541335
# ### Check that elements have valid colors types ###
13551336
elif self.numbers_allowed() or invalid_els:
1356-
v = copy_to_readonly_numpy_array(validated_v, kind="O")
1337+
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="O")
13571338
else:
1358-
v = copy_to_readonly_numpy_array(validated_v, kind="U")
1339+
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="U")
13591340
elif self.array_ok and is_simple_array(v):
13601341
validated_v = [self.validate_coerce(e, should_raise=False) for e in v]
13611342

@@ -1870,7 +1851,7 @@ def validate_coerce(self, v):
18701851
self.raise_invalid_elements(invalid_els)
18711852

18721853
if is_homogeneous_array(v):
1873-
v = copy_to_readonly_numpy_array(validated_v, kind="U")
1854+
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="U")
18741855
else:
18751856
v = to_scalar_or_list(v)
18761857
else:
@@ -1918,7 +1899,7 @@ def validate_coerce(self, v):
19181899
# Pass None through
19191900
pass
19201901
elif self.array_ok and is_homogeneous_array(v):
1921-
v = copy_to_readonly_numpy_array(v, kind="O")
1902+
v = copy_to_readonly_numpy_array_or_list(v, kind="O")
19221903
elif self.array_ok and is_simple_array(v):
19231904
v = to_scalar_or_list(v)
19241905
return v

Diff for: packages/python/plotly/_plotly_utils/tests/validators/test_dataarray_validator.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,25 @@ def test_validator_acceptance_simple(val, validator):
3333

3434
@pytest.mark.parametrize(
3535
"val",
36-
[np.array([2, 3, 4]), pd.Series(["a", "b", "c"]), np.array([[1, 2, 3], [4, 5, 6]])],
36+
[np.array([2, 3, 4]), np.array([[1, 2, 3], [4, 5, 6]])],
3737
)
3838
def test_validator_acceptance_homogeneous(val, validator):
3939
coerce_val = validator.validate_coerce(val)
4040
assert isinstance(coerce_val, np.ndarray)
4141
assert np.array_equal(validator.present(coerce_val), val)
4242

4343

44+
# Accept object array as list
45+
@pytest.mark.parametrize(
46+
"val",
47+
[["A", "B", "C"], np.array(["A", "B", "C"], dtype="object"), pd.Series(["a", "b", "c"])]
48+
)
49+
def test_validator_accept_object_array_as_list(val, validator):
50+
coerce_val = validator.validate_coerce(val)
51+
assert isinstance(coerce_val, list)
52+
assert coerce_val == list(val)
53+
54+
4455
# ### Rejection ###
4556
@pytest.mark.parametrize("val", ["Hello", 23, set(), {}])
4657
def test_rejection(val, validator):

Diff for: packages/python/plotly/_plotly_utils/tests/validators/test_enumerated_validator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_rejection_by_element_aok(val, validator_aok):
126126
[],
127127
["bar12"],
128128
("foo", "bar012", "baz"),
129-
np.array([]),
129+
np.array([], dtype="object"),
130130
np.array(["bar12"]),
131131
np.array(["foo", "bar012", "baz"]),
132132
],
@@ -135,7 +135,7 @@ def test_acceptance_aok(val, validator_aok_re):
135135
# Values should be accepted and returned unchanged
136136
coerce_val = validator_aok_re.validate_coerce(val)
137137
if isinstance(val, (np.ndarray, pd.Series)):
138-
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
138+
assert coerce_val == list(np.array(val))
139139
elif isinstance(val, (list, tuple)):
140140
assert validator_aok_re.present(coerce_val) == tuple(val)
141141
else:

Diff for: packages/python/plotly/_plotly_utils/tests/validators/test_pandas_series_input.py

+8-20
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,10 @@ def test_color_validator_object(color_validator, color_object_pandas):
149149
res = color_validator.validate_coerce(color_object_pandas)
150150

151151
# Check type
152-
assert isinstance(res, np.ndarray)
153-
154-
# Check dtype
155-
assert res.dtype == "object"
152+
assert isinstance(res, list)
156153

157154
# Check values
158-
np.testing.assert_array_equal(res, color_object_pandas)
155+
assert res == color_object_pandas.tolist()
159156

160157

161158
def test_color_validator_categorical(color_validator, color_categorical_pandas):
@@ -164,13 +161,10 @@ def test_color_validator_categorical(color_validator, color_categorical_pandas):
164161

165162
# Check type
166163
assert color_categorical_pandas.dtype == "category"
167-
assert isinstance(res, np.ndarray)
168-
169-
# Check dtype
170-
assert res.dtype == "object"
164+
assert isinstance(res, list)
171165

172166
# Check values
173-
np.testing.assert_array_equal(res, np.array(color_categorical_pandas))
167+
assert res == color_categorical_pandas.tolist()
174168

175169

176170
def test_data_array_validator_dates_series(
@@ -180,13 +174,10 @@ def test_data_array_validator_dates_series(
180174
res = data_array_validator.validate_coerce(datetime_pandas)
181175

182176
# Check type
183-
assert isinstance(res, np.ndarray)
184-
185-
# Check dtype
186-
assert res.dtype == "object"
177+
assert isinstance(res, list)
187178

188179
# Check values
189-
np.testing.assert_array_equal(res, dates_array)
180+
assert res == dates_array.tolist()
190181

191182

192183
def test_data_array_validator_dates_dataframe(
@@ -197,10 +188,7 @@ def test_data_array_validator_dates_dataframe(
197188
res = data_array_validator.validate_coerce(df)
198189

199190
# Check type
200-
assert isinstance(res, np.ndarray)
201-
202-
# Check dtype
203-
assert res.dtype == "object"
191+
assert isinstance(res, list)
204192

205193
# Check values
206-
np.testing.assert_array_equal(res, dates_array.reshape(len(dates_array), 1))
194+
assert res == dates_array.reshape(len(dates_array), 1).tolist()

Diff for: packages/python/plotly/_plotly_utils/tests/validators/test_string_validator.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ def test_acceptance_aok_scalars(val, validator_aok):
138138
def test_acceptance_aok_list(val, validator_aok):
139139
coerce_val = validator_aok.validate_coerce(val)
140140
if isinstance(val, np.ndarray):
141-
assert isinstance(coerce_val, np.ndarray)
142-
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
141+
assert coerce_val == val.tolist()
143142
elif isinstance(val, list):
144143
assert validator_aok.present(val) == tuple(val)
145144
else:
@@ -178,9 +177,7 @@ def test_rejection_aok_values(val, validator_aok_values):
178177
)
179178
def test_acceptance_no_blanks_aok(val, validator_no_blanks_aok):
180179
coerce_val = validator_no_blanks_aok.validate_coerce(val)
181-
if isinstance(val, np.ndarray):
182-
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
183-
elif isinstance(val, list):
180+
if isinstance(val, (list, np.ndarray)):
184181
assert validator_no_blanks_aok.present(coerce_val) == tuple(val)
185182
else:
186183
assert coerce_val == val

Diff for: packages/python/plotly/_plotly_utils/tests/validators/test_xarray_input.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,7 @@ def test_color_validator_object(color_validator, color_object_xarray):
126126
res = color_validator.validate_coerce(color_object_xarray)
127127

128128
# Check type
129-
assert isinstance(res, np.ndarray)
130-
131-
# Check dtype
132-
assert res.dtype == "object"
129+
assert isinstance(res, list)
133130

134131
# Check values
135-
np.testing.assert_array_equal(res, color_object_xarray)
132+
assert res == list(color_object_xarray)

Diff for: packages/python/plotly/plotly/tests/test_core/test_px/test_px.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def test_custom_data_scatter():
3737
hover_data=["petal_length", "petal_width"],
3838
custom_data=["species_id", "species"],
3939
)
40-
assert np.all(fig.data[0].customdata[:, 0] == iris.species_id)
41-
assert fig.data[0].customdata.shape[1] == 4
40+
assert [e[0] for e in fig.data[0].customdata] == iris.species_id.to_list()
41+
assert len(fig.data[0].customdata[0]) == 4
4242
# Hover and custom data, with repeated arguments
4343
fig = px.scatter(
4444
iris,
@@ -47,8 +47,8 @@ def test_custom_data_scatter():
4747
hover_data=["petal_length", "petal_width", "species_id"],
4848
custom_data=["species_id", "species"],
4949
)
50-
assert np.all(fig.data[0].customdata[:, 0] == iris.species_id)
51-
assert fig.data[0].customdata.shape[1] == 4
50+
assert [e[0] for e in fig.data[0].customdata] == iris.species_id.tolist()
51+
assert len(fig.data[0].customdata[0]) == 4
5252
assert (
5353
fig.data[0].hovertemplate
5454
== "sepal_width=%{x}<br>sepal_length=%{y}<br>petal_length=%{customdata[2]}<br>petal_width=%{customdata[3]}<br>species_id=%{customdata[0]}<extra></extra>"

Diff for: packages/python/plotly/plotly/tests/test_core/test_px/test_px_functions.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,9 @@ def test_sunburst_treemap_with_path_color():
229229
df["hover"] = [el.lower() for el in vendors]
230230
fig = px.sunburst(df, path=path, color="calls", hover_data=["hover"])
231231
custom = fig.data[0].customdata
232-
assert np.all(custom[:8, 0] == df["hover"])
233-
assert np.all(custom[8:, 0] == "(?)")
234-
assert np.all(custom[:8, 1] == df["calls"])
232+
assert [el[0] for el in custom[:8]] == df["hover"].tolist()
233+
assert [el[0] for el in custom[8:]] == ["(?)"] * 7
234+
assert [el[1] for el in custom[:8]] == df["calls"].tolist()
235235

236236
# Discrete color
237237
fig = px.sunburst(df, path=path, color="vendors")

Diff for: packages/python/plotly/plotly/tests/test_core/test_px/test_px_input.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_repeated_name():
126126
hover_data=["petal_length", "petal_width", "species_id"],
127127
custom_data=["species_id", "species"],
128128
)
129-
assert fig.data[0].customdata.shape[1] == 4
129+
assert len(fig.data[0].customdata[0]) == 4
130130

131131

132132
def test_arrayattrable_numpy():

0 commit comments

Comments
 (0)