Skip to content

Pandas datetime and numpy numeric array fixes #1163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 51 additions & 30 deletions _plotly_utils/basevalidators.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,18 @@ def to_scalar_or_list(v):
return v


def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False):
def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
"""
Convert an array-like value into a read-only numpy array

Parameters
----------
v : array like
Array like value (list, tuple, numpy array, pandas series, etc.)
dtype : str
If specified, the numpy dtype that the array should be forced to
have. If not specified then let numpy infer the datatype
kind : str or tuple of str
If specified, the numpy dtype kind (or kinds) that the array should
have, or be converted to if possible.
If not specified then let numpy infer the datatype
force_numeric : bool
If true, raise an exception if the resulting numpy array does not
have a numeric dtype (i.e. dtype.kind not in ['u', 'i', 'f'])
Expand All @@ -74,30 +75,53 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False):

assert np is not None

# Copy to numpy array and handle dtype param
# ------------------------------------------
# If dtype was not specified then it will be passed to the numpy array
# constructor as None and the data type will be inferred automatically
# ### Process kind ###
if not kind:
kind = ()
elif isinstance(kind, string_types):
kind = (kind,)

first_kind = kind[0] if kind else None

# TODO: support datetime dtype here and in widget serialization
# u: unsigned int, i: signed int, f: float
numeric_kinds = ['u', 'i', 'f']
numeric_kinds = {'u', 'i', 'f'}
kind_default_dtypes = {
'u': 'uint32', 'i': 'int32', 'f': 'float64', 'O': 'object'}

# Unwrap data types that have a `values` property that might be a numpy
# array. If this values property is a numeric numpy array then we
# can take the fast path below
# Handle pandas Series and Index objects
if pd and isinstance(v, (pd.Series, pd.Index)):
v = v.values
if v.dtype.kind in numeric_kinds:
# Get the numeric numpy array so we use fast path below
v = v.values
elif v.dtype.kind == 'M':
# Convert datetime Series/Index to numpy array of datetimes
if isinstance(v, pd.Series):
v = v.dt.to_pydatetime()
else:
# DatetimeIndex
v = v.to_pydatetime()

if not isinstance(v, np.ndarray):
# v is not homogenous array
v_list = [to_scalar_or_list(e) for e in v]

# Lookup dtype for requested kind, if any
dtype = kind_default_dtypes.get(first_kind, None)

# construct new array from list
new_v = np.array(v_list, order='C', dtype=dtype)
elif v.dtype.kind in numeric_kinds:
if dtype:
# v is a homogenous numeric array
if kind and v.dtype.kind not in kind:
# Kind(s) were specified and this array doesn't match
# Convert to the default dtype for the first kind
dtype = kind_default_dtypes.get(first_kind, None)
new_v = np.ascontiguousarray(v.astype(dtype))
else:
# Either no kind was requested or requested kind is satisfied
new_v = np.ascontiguousarray(v.copy())
else:
# v is a non-numeric homogenous array
new_v = v.copy()

# Handle force numeric param
Expand All @@ -106,7 +130,7 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False):
raise ValueError('Input value is not numeric and'
'force_numeric parameter set to True')

if dtype != 'unicode':
if 'U' not in kind:
# Force non-numeric arrays to have object type
# --------------------------------------------
# Here we make sure that non-numeric arrays have the object
Expand All @@ -116,12 +140,6 @@ def copy_to_readonly_numpy_array(v, dtype=None, force_numeric=False):
if new_v.dtype.kind not in ['u', 'i', 'f', 'O']:
new_v = np.array(v, dtype='object')

# Convert int64 arrays to int32
# -----------------------------
# JavaScript doesn't support int64 typed arrays
if new_v.dtype == 'int64':
new_v = new_v.astype('int32')

# Set new array to be read-only
# -----------------------------
new_v.flags['WRITEABLE'] = False
Expand Down Expand Up @@ -749,10 +767,13 @@ def validate_coerce(self, v):
# Pass None through
pass
elif self.array_ok and is_homogeneous_array(v):
if v.dtype.kind not in ['i', 'u']:
self.raise_invalid_val(v)

v_array = copy_to_readonly_numpy_array(v, dtype='int32')
v_array = copy_to_readonly_numpy_array(v,
kind=('i', 'u'),
force_numeric=True)

if v_array.dtype.kind not in ['i', 'u']:
self.raise_invalid_val(v)

# Check min/max
if self.has_min_max:
Expand Down Expand Up @@ -875,7 +896,7 @@ def validate_coerce(self, v):

if is_homogeneous_array(v):
# If not strict, let numpy cast elements to strings
v = copy_to_readonly_numpy_array(v, dtype='unicode')
v = copy_to_readonly_numpy_array(v, kind='U')

# Check no_blank
if self.no_blank:
Expand Down Expand Up @@ -1057,10 +1078,10 @@ def validate_coerce(self, v, should_raise=True):
# ### Check that elements have valid colors types ###
elif self.numbers_allowed() or invalid_els:
v = copy_to_readonly_numpy_array(
validated_v, dtype='object')
validated_v, kind='O')
else:
v = copy_to_readonly_numpy_array(
validated_v, dtype='unicode')
validated_v, kind='U')
elif self.array_ok and is_simple_array(v):
validated_v = [
self.validate_coerce(e, should_raise=False)
Expand Down Expand Up @@ -1509,7 +1530,7 @@ def validate_coerce(self, v):
self.raise_invalid_elements(invalid_els)

if is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(validated_v, dtype='unicode')
v = copy_to_readonly_numpy_array(validated_v, kind='U')
else:
v = to_scalar_or_list(v)
else:
Expand Down Expand Up @@ -1559,7 +1580,7 @@ def validate_coerce(self, v):
# Pass None through
pass
elif self.array_ok and is_homogeneous_array(v):
v = copy_to_readonly_numpy_array(v, dtype='object')
v = copy_to_readonly_numpy_array(v, kind='O')
elif self.array_ok and is_simple_array(v):
v = to_scalar_or_list(v)
return v
Expand Down
2 changes: 1 addition & 1 deletion _plotly_utils/tests/validators/test_integer_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_acceptance_aok_list(val, validator_aok):
def test_coercion_aok_list(val, expected, validator_aok):
v = validator_aok.validate_coerce(val)
if isinstance(val, (np.ndarray, pd.Series, pd.Index)):
assert v.dtype == np.int32
assert v.dtype == val.dtype
assert np.array_equal(validator_aok.present(v),
np.array(expected, dtype=np.int32))
else:
Expand Down
179 changes: 179 additions & 0 deletions _plotly_utils/tests/validators/test_pandas_series_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import pytest
import numpy as np
import pandas as pd
from datetime import datetime
from _plotly_utils.basevalidators import (NumberValidator,
IntegerValidator,
DataArrayValidator,
ColorValidator)


@pytest.fixture
def data_array_validator(request):
return DataArrayValidator('prop', 'parent')


@pytest.fixture
def integer_validator(request):
return IntegerValidator('prop', 'parent', array_ok=True)


@pytest.fixture
def number_validator(request):
return NumberValidator('prop', 'parent', array_ok=True)


@pytest.fixture
def color_validator(request):
return ColorValidator('prop', 'parent', array_ok=True, colorscale_path='')


@pytest.fixture(
params=['int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
'float16', 'float32', 'float64'])
def numeric_dtype(request):
return request.param


@pytest.fixture(
params=[pd.Series, pd.Index])
def pandas_type(request):
return request.param


@pytest.fixture
def numeric_pandas(request, pandas_type, numeric_dtype):
return pandas_type(np.arange(10), dtype=numeric_dtype)


@pytest.fixture
def color_object_pandas(request, pandas_type):
return pandas_type(['blue', 'green', 'red']*3, dtype='object')


@pytest.fixture
def color_categorical_pandas(request, pandas_type):
return pandas_type(pd.Categorical(['blue', 'green', 'red']*3))


@pytest.fixture
def dates_array(request):
return np.array([
datetime(year=2013, month=10, day=10),
datetime(year=2013, month=11, day=10),
datetime(year=2013, month=12, day=10),
datetime(year=2014, month=1, day=10),
datetime(year=2014, month=2, day=10)
])


@pytest.fixture
def datetime_pandas(request, pandas_type, dates_array):
return pandas_type(dates_array)


def test_numeric_validator_numeric_pandas(number_validator, numeric_pandas):
res = number_validator.validate_coerce(numeric_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == numeric_pandas.dtype

# Check values
np.testing.assert_array_equal(res, numeric_pandas)


def test_integer_validator_numeric_pandas(integer_validator, numeric_pandas):
res = integer_validator.validate_coerce(numeric_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
if numeric_pandas.dtype.kind in ('u', 'i'):
# Integer and unsigned integer dtype unchanged
assert res.dtype == numeric_pandas.dtype
else:
# Float datatypes converted to default integer type of int32
assert res.dtype == 'int32'

# Check values
np.testing.assert_array_equal(res, numeric_pandas)


def test_data_array_validator(data_array_validator,
numeric_pandas):
res = data_array_validator.validate_coerce(numeric_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == numeric_pandas.dtype

# Check values
np.testing.assert_array_equal(res, numeric_pandas)


def test_color_validator_numeric(color_validator,
numeric_pandas):
res = color_validator.validate_coerce(numeric_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == numeric_pandas.dtype

# Check values
np.testing.assert_array_equal(res, numeric_pandas)


def test_color_validator_object(color_validator,
color_object_pandas):

res = color_validator.validate_coerce(color_object_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == 'object'

# Check values
np.testing.assert_array_equal(res, color_object_pandas)


def test_color_validator_categorical(color_validator,
color_categorical_pandas):

res = color_validator.validate_coerce(color_categorical_pandas)

# Check type
assert color_categorical_pandas.dtype == 'category'
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == 'object'

# Check values
np.testing.assert_array_equal(res, np.array(color_categorical_pandas))


def test_data_array_validator_dates(data_array_validator,
datetime_pandas,
dates_array):

res = data_array_validator.validate_coerce(datetime_pandas)

# Check type
assert isinstance(res, np.ndarray)

# Check dtype
assert res.dtype == 'object'

# Check values
np.testing.assert_array_equal(res, dates_array)
4 changes: 2 additions & 2 deletions plotly/basedatatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2981,7 +2981,7 @@ def _set_compound_prop(self, prop, val):
# ------------------
if not self._in_batch_mode:
if not new_dict_val:
if prop in self._props:
if self._props and prop in self._props:
self._props.pop(prop)
else:
self._init_props()
Expand Down Expand Up @@ -3055,7 +3055,7 @@ def _set_array_prop(self, prop, val):
# ------------------
if not self._in_batch_mode:
if not new_dict_vals:
if prop in self._props:
if self._props and prop in self._props:
self._props.pop(prop)
else:
self._init_props()
Expand Down
Loading