Skip to content

Parametrized test_json_table_schema module #19128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 8, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 125 additions & 149 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,82 +88,82 @@ def test_multiindex(self):

class TestTableSchemaType(object):

def test_as_json_table_type_int_data(self):
@pytest.mark.parametrize('int_type', [
np.int, np.int16, np.int32, np.int64])
def test_as_json_table_type_int_data(self, int_type):
int_data = [1, 2, 3]
int_types = [np.int, np.int16, np.int32, np.int64]
for t in int_types:
assert as_json_table_type(np.array(
int_data, dtype=t)) == 'integer'
assert as_json_table_type(np.array(
int_data, dtype=int_type)) == 'integer'

def test_as_json_table_type_float_data(self):
@pytest.mark.parametrize('float_type', [
np.float, np.float16, np.float32, np.float64])
def test_as_json_table_type_float_data(self, float_type):
float_data = [1., 2., 3.]
float_types = [np.float, np.float16, np.float32, np.float64]
for t in float_types:
assert as_json_table_type(np.array(
float_data, dtype=t)) == 'number'
assert as_json_table_type(np.array(
float_data, dtype=float_type)) == 'number'

def test_as_json_table_type_bool_data(self):
@pytest.mark.parametrize('bool_type', [bool, np.bool])
def test_as_json_table_type_bool_data(self, bool_type):
bool_data = [True, False]
bool_types = [bool, np.bool]
for t in bool_types:
assert as_json_table_type(np.array(
bool_data, dtype=t)) == 'boolean'

def test_as_json_table_type_date_data(self):
date_data = [pd.to_datetime(['2016']),
pd.to_datetime(['2016'], utc=True),
pd.Series(pd.to_datetime(['2016'])),
pd.Series(pd.to_datetime(['2016'], utc=True)),
pd.period_range('2016', freq='A', periods=3)]
for arr in date_data:
assert as_json_table_type(arr) == 'datetime'

def test_as_json_table_type_string_data(self):
strings = [pd.Series(['a', 'b']), pd.Index(['a', 'b'])]
for t in strings:
assert as_json_table_type(t) == 'string'

def test_as_json_table_type_categorical_data(self):
assert as_json_table_type(pd.Categorical(['a'])) == 'any'
assert as_json_table_type(pd.Categorical([1])) == 'any'
assert as_json_table_type(pd.Series(pd.Categorical([1]))) == 'any'
assert as_json_table_type(pd.CategoricalIndex([1])) == 'any'
assert as_json_table_type(pd.Categorical([1])) == 'any'
assert as_json_table_type(np.array(
bool_data, dtype=bool_type)) == 'boolean'

@pytest.mark.parametrize('date_data', [
pd.to_datetime(['2016']),
pd.to_datetime(['2016'], utc=True),
pd.Series(pd.to_datetime(['2016'])),
pd.Series(pd.to_datetime(['2016'], utc=True)),
pd.period_range('2016', freq='A', periods=3)
])
def test_as_json_table_type_date_data(self, date_data):
assert as_json_table_type(date_data) == 'datetime'

@pytest.mark.parametrize('str_data', [
pd.Series(['a', 'b']), pd.Index(['a', 'b'])])
def test_as_json_table_type_string_data(self, str_data):
assert as_json_table_type(str_data) == 'string'

@pytest.mark.parametrize('cat_data', [
pd.Categorical(['a']),
pd.Categorical([1]),
pd.Series(pd.Categorical([1])),
pd.CategoricalIndex([1]),
pd.Categorical([1])])
def test_as_json_table_type_categorical_data(self, cat_data):
assert as_json_table_type(cat_data) == 'any'

# ------
# dtypes
# ------
def test_as_json_table_type_int_dtypes(self):
integers = [np.int, np.int16, np.int32, np.int64]
for t in integers:
assert as_json_table_type(t) == 'integer'

def test_as_json_table_type_float_dtypes(self):
floats = [np.float, np.float16, np.float32, np.float64]
for t in floats:
assert as_json_table_type(t) == 'number'

def test_as_json_table_type_bool_dtypes(self):
bools = [bool, np.bool]
for t in bools:
assert as_json_table_type(t) == 'boolean'

def test_as_json_table_type_date_dtypes(self):
@pytest.mark.parametrize('int_dtype', [
np.int, np.int16, np.int32, np.int64])
def test_as_json_table_type_int_dtypes(self, int_dtype):
assert as_json_table_type(int_dtype) == 'integer'

@pytest.mark.parametrize('float_dtype', [
np.float, np.float16, np.float32, np.float64])
def test_as_json_table_type_float_dtypes(self, float_dtype):
assert as_json_table_type(float_dtype) == 'number'

@pytest.mark.parametrize('bool_dtype', [bool, np.bool])
def test_as_json_table_type_bool_dtypes(self, bool_dtype):
assert as_json_table_type(bool_dtype) == 'boolean'

@pytest.mark.parametrize('date_dtype', [
np.datetime64, np.dtype("<M8[ns]"), PeriodDtype(),
DatetimeTZDtype('ns', 'US/Central')])
def test_as_json_table_type_date_dtypes(self, date_dtype):
# TODO: datedate.date? datetime.time?
dates = [np.datetime64, np.dtype("<M8[ns]"), PeriodDtype(),
DatetimeTZDtype('ns', 'US/Central')]
for t in dates:
assert as_json_table_type(t) == 'datetime'
assert as_json_table_type(date_dtype) == 'datetime'

def test_as_json_table_type_timedelta_dtypes(self):
durations = [np.timedelta64, np.dtype("<m8[ns]")]
for t in durations:
assert as_json_table_type(t) == 'duration'
@pytest.mark.parametrize('td_dtype', [
np.timedelta64, np.dtype("<m8[ns]")])
def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
assert as_json_table_type(td_dtype) == 'duration'

def test_as_json_table_type_string_dtypes(self):
strings = [object] # TODO
for t in strings:
assert as_json_table_type(t) == 'string'
@pytest.mark.parametrize('str_dtype', [object]) # TODO
def test_as_json_table_type_string_dtypes(self, str_dtype):
assert as_json_table_type(str_dtype) == 'string'

def test_as_json_table_type_categorical_dtypes(self):
# TODO: I think before is_categorical_dtype(Categorical)
Expand Down Expand Up @@ -336,61 +336,55 @@ def test_date_format_raises(self):
self.df.to_json(orient='table', date_format='iso')
self.df.to_json(orient='table')

def test_convert_pandas_type_to_json_field_int(self):
@pytest.mark.parametrize('kind', [pd.Series, pd.Index])
def test_convert_pandas_type_to_json_field_int(self, kind):
data = [1, 2, 3]
kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
for kind in kinds:
result = convert_pandas_type_to_json_field(kind)
expected = {"name": "name", "type": 'integer'}
assert result == expected
result = convert_pandas_type_to_json_field(kind(data, name='name'))
expected = {"name": "name", "type": "integer"}
assert result == expected

def test_convert_pandas_type_to_json_field_float(self):
@pytest.mark.parametrize('kind', [pd.Series, pd.Index])
def test_convert_pandas_type_to_json_field_float(self, kind):
data = [1., 2., 3.]
kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
for kind in kinds:
result = convert_pandas_type_to_json_field(kind)
expected = {"name": "name", "type": 'number'}
assert result == expected
result = convert_pandas_type_to_json_field(kind(data, name='name'))
expected = {"name": "name", "type": "number"}
assert result == expected

def test_convert_pandas_type_to_json_field_datetime(self):
@pytest.mark.parametrize('dt_args,extra_exp', [
({}, {}), ({'utc': True}, {'tz': 'UTC'})])
@pytest.mark.parametrize('wrapper', [None, pd.Series])
def test_convert_pandas_type_to_json_field_datetime(self, dt_args,
extra_exp, wrapper):
data = [1., 2., 3.]
kinds = [pd.Series(pd.to_datetime(data), name='values'),
pd.to_datetime(data)]
for kind in kinds:
result = convert_pandas_type_to_json_field(kind)
expected = {"name": "values", "type": 'datetime'}
assert result == expected

kinds = [pd.Series(pd.to_datetime(data, utc=True), name='values'),
pd.to_datetime(data, utc=True)]
for kind in kinds:
result = convert_pandas_type_to_json_field(kind)
expected = {"name": "values", "type": 'datetime', "tz": "UTC"}
assert result == expected
data = pd.to_datetime(data, **dt_args)
if wrapper is pd.Series:
data = pd.Series(data, name='values')
result = convert_pandas_type_to_json_field(data)
expected = {"name": "values", "type": 'datetime'}
expected.update(extra_exp)
assert result == expected

def test_convert_pandas_type_to_json_period_range(self):
arr = pd.period_range('2016', freq='A-DEC', periods=4)
result = convert_pandas_type_to_json_field(arr)
expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
assert result == expected

def test_convert_pandas_type_to_json_field_categorical(self):
@pytest.mark.parametrize('kind', [pd.Categorical, pd.CategoricalIndex])
@pytest.mark.parametrize('ordered', [True, False])
def test_convert_pandas_type_to_json_field_categorical(self, kind,
ordered):
data = ['a', 'b', 'c']
ordereds = [True, False]

for ordered in ordereds:
arr = pd.Series(pd.Categorical(data, ordered=ordered), name='cats')
result = convert_pandas_type_to_json_field(arr)
expected = {"name": "cats", "type": "any",
"constraints": {"enum": data},
"ordered": ordered}
assert result == expected

arr = pd.CategoricalIndex(data, ordered=ordered, name='cats')
result = convert_pandas_type_to_json_field(arr)
expected = {"name": "cats", "type": "any",
"constraints": {"enum": data},
"ordered": ordered}
assert result == expected
if kind is pd.Categorical:
arr = pd.Series(kind(data, ordered=ordered), name='cats')
elif kind is pd.CategoricalIndex:
arr = kind(data, ordered=ordered, name='cats')

result = convert_pandas_type_to_json_field(arr)
expected = {"name": "cats", "type": "any",
"constraints": {"enum": data},
"ordered": ordered}
assert result == expected

@pytest.mark.parametrize("inp,exp", [
({'type': 'integer'}, 'int64'),
Expand Down Expand Up @@ -440,35 +434,22 @@ def test_categorical(self):
OrderedDict([('idx', 2), ('values', 'a')])])])
assert result == expected

def test_set_default_names_unset(self):
data = pd.Series(1, pd.Index([1]))
result = set_default_names(data)
assert result.index.name == 'index'

def test_set_default_names_set(self):
data = pd.Series(1, pd.Index([1], name='myname'))
result = set_default_names(data)
assert result.index.name == 'myname'

def test_set_default_names_mi_unset(self):
data = pd.Series(
1, pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')]))
result = set_default_names(data)
assert result.index.names == ['level_0', 'level_1']

def test_set_default_names_mi_set(self):
data = pd.Series(
1, pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
names=['n1', 'n2']))
result = set_default_names(data)
assert result.index.names == ['n1', 'n2']

def test_set_default_names_mi_partion(self):
data = pd.Series(
1, pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
names=['n1', None]))
@pytest.mark.parametrize('idx,nm,prop', [
(pd.Index([1]), 'index', 'name'),
(pd.Index([1], name='myname'), 'myname', 'name'),
(pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')]),
['level_0', 'level_1'], 'names'),
(pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
names=['n1', 'n2']),
['n1', 'n2'], 'names'),
(pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
names=['n1', None]),
['n1', 'level_1'], 'names')
])
def test_set_names_unset(self, idx, nm, prop):
data = pd.Series(1, idx)
result = set_default_names(data)
assert result.index.names == ['n1', 'level_1']
assert getattr(result.index, prop) == nm

def test_timestamp_in_columns(self):
df = pd.DataFrame([[1, 2]], columns=[pd.Timestamp('2016'),
Expand All @@ -478,20 +459,15 @@ def test_timestamp_in_columns(self):
assert js['schema']['fields'][1]['name'] == 1451606400000
assert js['schema']['fields'][2]['name'] == 10000

def test_overlapping_names(self):
cases = [
pd.Series([1], index=pd.Index([1], name='a'), name='a'),
pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
pd.DataFrame({"A": [1]}, index=pd.MultiIndex.from_arrays([
['a'], [1]
], names=["A", "a"])),
]

for data in cases:
with pytest.raises(ValueError) as excinfo:
data.to_json(orient='table')

assert 'Overlapping' in str(excinfo.value)
@pytest.mark.parametrize('case', [
pd.Series([1], index=pd.Index([1], name='a'), name='a'),
pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
pd.DataFrame({"A": [1]}, index=pd.MultiIndex.from_arrays([
['a'], [1]], names=["A", "a"]))
])
def test_overlapping_names(self, case):
with tm.assert_raises_regex(ValueError, 'Overlapping'):
case.to_json(orient='table')

def test_mi_falsey_name(self):
# GH 16203
Expand Down