Skip to content

CLN: Removed levels attribute from Categorical #13612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ Removal of prior version deprecations/changes

- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`)
- ``DataFrame.to_dict()`` has dropped the ``outtype`` parameter in favor of ``orient`` (:issue:`13627`, :issue:`8486`)
- ``pd.Categorical`` has dropped the ``levels`` attribute in favour of ``categories`` (:issue:`8376`)


.. _whatsnew_0190.performance:
Expand Down
30 changes: 2 additions & 28 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ class Categorical(PandasObject):
__array_priority__ = 1000
_typ = 'categorical'

def __init__(self, values, categories=None, ordered=False, name=None,
fastpath=False, levels=None):
def __init__(self, values, categories=None, ordered=False,
name=None, fastpath=False):

if fastpath:
# fast path
Expand All @@ -245,17 +245,6 @@ def __init__(self, values, categories=None, ordered=False, name=None,
"name=\"something\")'")
warn(msg, UserWarning, stacklevel=2)

# TODO: Remove after deprecation period in 2017/ after 0.18
if levels is not None:
warn("Creating a 'Categorical' with 'levels' is deprecated, use "
"'categories' instead", FutureWarning, stacklevel=2)
if categories is None:
categories = levels
else:
raise ValueError("Cannot pass in both 'categories' and "
"(deprecated) 'levels', use only "
"'categories'", stacklevel=2)

# sanitize input
if is_categorical_dtype(values):

Expand Down Expand Up @@ -580,21 +569,6 @@ def _get_categories(self):
categories = property(fget=_get_categories, fset=_set_categories,
doc=_categories_doc)

def _set_levels(self, levels):
""" set new levels (deprecated, use "categories") """
warn("Assigning to 'levels' is deprecated, use 'categories'",
FutureWarning, stacklevel=2)
self.categories = levels

def _get_levels(self):
""" Gets the levels (deprecated, use "categories") """
warn("Accessing 'levels' is deprecated, use 'categories'",
FutureWarning, stacklevel=2)
return self.categories

# TODO: Remove after deprecation period in 2017/ after 0.18
levels = property(fget=_get_levels, fset=_set_levels)

_ordered = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iirc there is some unpick code that we should think about

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it would be ok to keep the old compat code in the pickle codepaths...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with @JanSchulz : I don't think we should remove that code-path right away.


def _set_ordered(self, value):
Expand Down
38 changes: 38 additions & 0 deletions pandas/io/tests/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,44 @@ def python_unpickler(path):
result = python_unpickler(path)
self.compare_element(result, expected, typ)

def test_pickle_v0_14_1(self):

# we have the name warning
# 10482
with tm.assert_produces_warning(UserWarning):
cat = pd.Categorical(values=['a', 'b', 'c'],
categories=['a', 'b', 'c', 'd'],
name='foobar', ordered=False)
pickle_path = os.path.join(tm.get_data_path(),
'categorical_0_14_1.pickle')
# This code was executed once on v0.14.1 to generate the pickle:
#
# cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
# name='foobar')
# with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
#
tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))

def test_pickle_v0_15_2(self):
# ordered -> _ordered
# GH 9347

# we have the name warning
# 10482
with tm.assert_produces_warning(UserWarning):
cat = pd.Categorical(values=['a', 'b', 'c'],
categories=['a', 'b', 'c', 'd'],
name='foobar', ordered=False)
pickle_path = os.path.join(tm.get_data_path(),
'categorical_0_15_2.pickle')
# This code was executed once on v0.15.2 to generate the pickle:
#
# cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
# name='foobar')
# with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
#
tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
50 changes: 0 additions & 50 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,18 +1559,6 @@ def test_deprecated_labels(self):
res = cat.labels
self.assert_numpy_array_equal(res, exp)

def test_deprecated_levels(self):
# TODO: levels is deprecated and should be removed in 0.18 or 2017,
# whatever is earlier
cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
exp = cat.categories
with tm.assert_produces_warning(FutureWarning):
res = cat.levels
self.assert_index_equal(res, exp)
with tm.assert_produces_warning(FutureWarning):
res = pd.Categorical([1, 2, 3, np.nan], levels=[1, 2, 3])
self.assert_index_equal(res.categories, exp)

def test_removed_names_produces_warning(self):
Copy link
Contributor

@jreback jreback Jul 12, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, all of the legacy pickles need to be an EARLIER version of pandas that HAS the .levels in the categorical.

This is not testing anything.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this case create, let's do a different way. create a pickle in 0.18.1 with the levels. save and create create a specific test to read back and validate.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Whoops, I'll generate new pickles then.

  2. Wait, but isn't that what the test_pickle tests are for? I'm confused here...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that IS what test_pickles does. However a Categorical that has a levels attribute does not exist in old pickles.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look at the comments in test_categorical.py (which I now understand why they were included), you'll see that in fact the old pickles do have the levels attribute passed in, as far back as test_pickle_v0_14_1 - is a test in 0.18.1 for levels necessary then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JanSchulz : I agree. @jreback , there is no need to generate pickles with the levels parameter. We already have tests for them that pass in this PR with no issues. I think this is ready to be merged then since the potential backwards compatibility issue isn't actually a problem.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gfyoung your comments indicate you don't understand my point. We don't have tests that have a levels attribute IN A PICKLE. Thus the tests don't test the correct thing, and you cannot assure back-compat.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do: https://github.com/pydata/pandas/blob/master/pandas/tests/data/categorical_0_14_1.pickle contains a _pickle string. Or do I misunderstand something?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh @JanSchulz yes I remember now. The issue is that we don't do this with anything else, and therefore they are in the wrong place (they really should have been in the generate_data scripts). ok @gfyoung can you move those 2 test to pandas/io/read_pickle (and the associated data files). Then they will be in the correct place. We do all pickle testing in a centralized place (except for round-tripping, which doesn't require data files).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


# 10482
Expand Down Expand Up @@ -4431,44 +4419,6 @@ def test_dt_accessor_api_for_categorical(self):
invalid.dt
self.assertFalse(hasattr(invalid, 'str'))

def test_pickle_v0_14_1(self):

# we have the name warning
# 10482
with tm.assert_produces_warning(UserWarning):
cat = pd.Categorical(values=['a', 'b', 'c'],
categories=['a', 'b', 'c', 'd'],
name='foobar', ordered=False)
pickle_path = os.path.join(tm.get_data_path(),
'categorical_0_14_1.pickle')
# This code was executed once on v0.14.1 to generate the pickle:
#
# cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
# name='foobar')
# with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
#
self.assert_categorical_equal(cat, pd.read_pickle(pickle_path))

def test_pickle_v0_15_2(self):
# ordered -> _ordered
# GH 9347

# we have the name warning
# 10482
with tm.assert_produces_warning(UserWarning):
cat = pd.Categorical(values=['a', 'b', 'c'],
categories=['a', 'b', 'c', 'd'],
name='foobar', ordered=False)
pickle_path = os.path.join(tm.get_data_path(),
'categorical_0_15_2.pickle')
# This code was executed once on v0.15.2 to generate the pickle:
#
# cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
# name='foobar')
# with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
#
self.assert_categorical_equal(cat, pd.read_pickle(pickle_path))

def test_concat_categorical(self):
# See GH 10177
df1 = pd.DataFrame(
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ def pxd(name):
'tests/data/legacy_msgpack/*/*.msgpack',
'tests/data/*.csv*',
'tests/data/*.dta',
'tests/data/*.pickle',
'tests/data/*.txt',
'tests/data/*.xls',
'tests/data/*.xlsx',
Expand All @@ -605,8 +606,7 @@ def pxd(name):
'tests/data/html_encoding/*.html',
'tests/json/data/*.json'],
'pandas.tools': ['tests/data/*.csv'],
'pandas.tests': ['data/*.pickle',
'data/*.csv'],
'pandas.tests': ['data/*.csv'],
'pandas.tests.formats': ['data/*.csv'],
'pandas.tests.indexes': ['data/*.pickle'],
'pandas.tseries.tests': ['data/*.pickle',
Expand Down