Skip to content

Commit 73af219

Browse files
TomAugspurgeralanbato
authored andcommitted
API: Have MultiIndex consturctors always return a MI (pandas-dev#17236)
* API: Have MultiIndex constructors return MI This removes the special case for MultiIndex constructors returning an Index if all the levels are length-1. Now this will return a MultiIndex with a single level. This is a backwards incompatabile change, with no clear method for deprecation, so we're making a clean break. Closes pandas-dev#17178 * fixup! API: Have MultiIndex constructors return MI * Update for comments
1 parent ac46bd9 commit 73af219

File tree

12 files changed

+170
-45
lines changed

12 files changed

+170
-45
lines changed

doc/source/whatsnew/v0.21.0.txt

+24
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,30 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical
274274

275275
The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.
276276

277+
.. _whatsnew_210.api.multiindex_single:
278+
279+
MultiIndex Constructor with a Single Level
280+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
281+
282+
The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all
283+
length-one levels down to a regular ``Index``. This affects all the
284+
``MultiIndex`` constructors. (:issue:`17178`)
285+
286+
Previous behavior:
287+
288+
.. code-block:: ipython
289+
290+
In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
291+
Out[2]: Index(['a', 'b'], dtype='object')
292+
293+
Length 1 levels are no longer special-cased. They behave exactly as if you had
294+
length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
295+
``MultiIndex`` constructors:
296+
297+
.. ipython:: python
298+
299+
pd.MultiIndex.from_tuples([('a',), ('b',)])
300+
277301
.. _whatsnew_0210.api:
278302

279303
Other API Changes

pandas/core/frame.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@
6767
_dict_compat,
6868
standardize_mapping)
6969
from pandas.core.generic import NDFrame, _shared_docs
70-
from pandas.core.index import Index, MultiIndex, _ensure_index
70+
from pandas.core.index import (Index, MultiIndex, _ensure_index,
71+
_ensure_index_from_sequences)
7172
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
7273
check_bool_indexer)
7374
from pandas.core.internals import (BlockManager,
@@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
11551156
else:
11561157
try:
11571158
to_remove = [arr_columns.get_loc(field) for field in index]
1158-
1159-
result_index = MultiIndex.from_arrays(
1160-
[arrays[i] for i in to_remove], names=index)
1159+
index_data = [arrays[i] for i in to_remove]
1160+
result_index = _ensure_index_from_sequences(index_data,
1161+
names=index)
11611162

11621163
exclude.update(index)
11631164
except Exception:
@@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
30003001
to_remove.append(col)
30013002
arrays.append(level)
30023003

3003-
index = MultiIndex.from_arrays(arrays, names=names)
3004+
index = _ensure_index_from_sequences(arrays, names)
30043005

30053006
if verify_integrity and not index.is_unique:
30063007
duplicates = index.get_duplicates()

pandas/core/indexes/api.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
from pandas.core.indexes.base import (Index, _new_Index, # noqa
2-
_ensure_index, _get_na_value,
3-
InvalidIndexError)
1+
from pandas.core.indexes.base import (Index,
2+
_new_Index,
3+
_ensure_index,
4+
_ensure_index_from_sequences,
5+
_get_na_value,
6+
InvalidIndexError) # noqa
47
from pandas.core.indexes.category import CategoricalIndex # noqa
58
from pandas.core.indexes.multi import MultiIndex # noqa
69
from pandas.core.indexes.interval import IntervalIndex # noqa
@@ -22,7 +25,8 @@
2225
'InvalidIndexError', 'TimedeltaIndex',
2326
'PeriodIndex', 'DatetimeIndex',
2427
'_new_Index', 'NaT',
25-
'_ensure_index', '_get_na_value', '_get_combined_index',
28+
'_ensure_index', '_ensure_index_from_sequences', '_get_na_value',
29+
'_get_combined_index',
2630
'_get_objs_combined_axis', '_union_indexes',
2731
'_get_consensus_names',
2832
'_all_indexes_same']

pandas/core/indexes/base.py

+69
Original file line numberDiff line numberDiff line change
@@ -4012,7 +4012,76 @@ def invalid_op(self, other=None):
40124012
Index._add_comparison_methods()
40134013

40144014

4015+
def _ensure_index_from_sequences(sequences, names=None):
4016+
"""Construct an index from sequences of data.
4017+
4018+
A single sequence returns an Index. Many sequences returns a
4019+
MultiIndex.
4020+
4021+
Parameters
4022+
----------
4023+
sequences : sequence of sequences
4024+
names : sequence of str
4025+
4026+
Returns
4027+
-------
4028+
index : Index or MultiIndex
4029+
4030+
Examples
4031+
--------
4032+
>>> _ensure_index_from_sequences([[1, 2, 3]], names=['name'])
4033+
Int64Index([1, 2, 3], dtype='int64', name='name')
4034+
4035+
>>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']],
4036+
names=['L1', 'L2'])
4037+
MultiIndex(levels=[['a'], ['a', 'b']],
4038+
labels=[[0, 0], [0, 1]],
4039+
names=['L1', 'L2'])
4040+
4041+
See Also
4042+
--------
4043+
_ensure_index
4044+
"""
4045+
from .multi import MultiIndex
4046+
4047+
if len(sequences) == 1:
4048+
if names is not None:
4049+
names = names[0]
4050+
return Index(sequences[0], name=names)
4051+
else:
4052+
return MultiIndex.from_arrays(sequences, names=names)
4053+
4054+
40154055
def _ensure_index(index_like, copy=False):
4056+
"""
4057+
Ensure that we have an index from some index-like object
4058+
4059+
Parameters
4060+
----------
4061+
index : sequence
4062+
An Index or other sequence
4063+
copy : bool
4064+
4065+
Returns
4066+
-------
4067+
index : Index or MultiIndex
4068+
4069+
Examples
4070+
--------
4071+
>>> _ensure_index(['a', 'b'])
4072+
Index(['a', 'b'], dtype='object')
4073+
4074+
>>> _ensure_index([('a', 'a'), ('b', 'c')])
4075+
Index([('a', 'a'), ('b', 'c')], dtype='object')
4076+
4077+
>>> _ensure_index([['a', 'a'], ['b', 'c']])
4078+
MultiIndex(levels=[['a'], ['b', 'c']],
4079+
labels=[[0, 0], [0, 1]])
4080+
4081+
See Also
4082+
--------
4083+
_ensure_index_from_sequences
4084+
"""
40164085
if isinstance(index_like, Index):
40174086
if copy:
40184087
index_like = index_like.copy()

pandas/core/indexes/multi.py

-10
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
9191
raise ValueError('Length of levels and labels must be the same.')
9292
if len(levels) == 0:
9393
raise ValueError('Must pass non-zero number of levels/labels')
94-
if len(levels) == 1:
95-
if names:
96-
name = names[0]
97-
else:
98-
name = None
99-
return Index(levels[0], name=name, copy=True).take(labels[0])
10094

10195
result = object.__new__(MultiIndex)
10296

@@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
10841078
MultiIndex.from_product : Make a MultiIndex from cartesian product
10851079
of iterables
10861080
"""
1087-
if len(arrays) == 1:
1088-
name = None if names is None else names[0]
1089-
return Index(arrays[0], name=name)
1090-
10911081
# Check if lengths of all arrays are equal or not,
10921082
# raise ValueError, if not
10931083
for i in range(1, len(arrays)):

pandas/core/reshape/reshape.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from pandas.core.frame import _shared_docs
3333
from pandas.util._decorators import Appender
34-
from pandas.core.index import MultiIndex, _get_na_value
34+
from pandas.core.index import Index, MultiIndex, _get_na_value
3535

3636

3737
class _Unstacker(object):
@@ -311,10 +311,14 @@ def _unstack_multiple(data, clocs):
311311
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
312312
xnull=False)
313313

314-
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
315-
labels=rlabels + [comp_ids],
316-
names=rnames + ['__placeholder__'],
317-
verify_integrity=False)
314+
if rlocs == []:
315+
# Everything is in clocs, so the dummy df has a regular index
316+
dummy_index = Index(obs_ids, name='__placeholder__')
317+
else:
318+
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
319+
labels=rlabels + [comp_ids],
320+
names=rnames + ['__placeholder__'],
321+
verify_integrity=False)
318322

319323
if isinstance(data, Series):
320324
dummy = data.copy()
@@ -446,7 +450,12 @@ def _slow_pivot(index, columns, values):
446450

447451
def unstack(obj, level, fill_value=None):
448452
if isinstance(level, (tuple, list)):
449-
return _unstack_multiple(obj, level)
453+
if len(level) != 1:
454+
# _unstack_multiple only handles MultiIndexes,
455+
# and isn't needed for a single level
456+
return _unstack_multiple(obj, level)
457+
else:
458+
level = level[0]
450459

451460
if isinstance(obj, DataFrame):
452461
if isinstance(obj.index, MultiIndex):

pandas/core/sparse/scipy_sparse.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,11 @@ def robust_get_level_values(i):
7171
labels_to_i = Series(labels_to_i)
7272
if len(subset) > 1:
7373
labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
74-
labels_to_i.index.names = [index.names[i] for i in subset]
74+
labels_to_i.index.names = [index.names[i] for i in subset]
75+
else:
76+
labels_to_i.index = Index(x[0] for x in labels_to_i.index)
77+
labels_to_i.index.name = index.names[subset[0]]
78+
7579
labels_to_i.name = 'value'
7680
return (labels_to_i)
7781

pandas/core/strings.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,12 @@ def cons_row(x):
14521452

14531453
if expand:
14541454
result = list(result)
1455-
return MultiIndex.from_tuples(result, names=name)
1455+
out = MultiIndex.from_tuples(result, names=name)
1456+
if out.nlevels == 1:
1457+
# We had all tuples of length-one, which are
1458+
# better represented as a regular Index.
1459+
out = out.get_level_values(0)
1460+
return out
14561461
else:
14571462
return Index(result, name=name)
14581463
else:

pandas/io/parsers.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
is_scalar, is_categorical_dtype)
2424
from pandas.core.dtypes.missing import isna
2525
from pandas.core.dtypes.cast import astype_nansafe
26-
from pandas.core.index import Index, MultiIndex, RangeIndex
26+
from pandas.core.index import (Index, MultiIndex, RangeIndex,
27+
_ensure_index_from_sequences)
2728
from pandas.core.series import Series
2829
from pandas.core.frame import DataFrame
2930
from pandas.core.categorical import Categorical
@@ -1444,7 +1445,8 @@ def _agg_index(self, index, try_parse_dates=True):
14441445
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
14451446
arrays.append(arr)
14461447

1447-
index = MultiIndex.from_arrays(arrays, names=self.index_names)
1448+
names = self.index_names
1449+
index = _ensure_index_from_sequences(arrays, names)
14481450

14491451
return index
14501452

@@ -1808,7 +1810,7 @@ def read(self, nrows=None):
18081810
try_parse_dates=True)
18091811
arrays.append(values)
18101812

1811-
index = MultiIndex.from_arrays(arrays)
1813+
index = _ensure_index_from_sequences(arrays)
18121814

18131815
if self.usecols is not None:
18141816
names = self._filter_usecols(names)
@@ -3138,9 +3140,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
31383140
if index_col is None or index_col is False:
31393141
index = Index([])
31403142
else:
3141-
index = [Series([], dtype=dtype[index_name])
3142-
for index_name in index_names]
3143-
index = MultiIndex.from_arrays(index, names=index_names)
3143+
data = [Series([], dtype=dtype[name]) for name in index_names]
3144+
index = _ensure_index_from_sequences(data, names=index_names)
31443145
index_col.sort()
31453146
for i, n in enumerate(index_col):
31463147
columns.pop(n - i)

pandas/tests/indexes/test_base.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
DataFrame, Float64Index, Int64Index,
1818
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
1919
PeriodIndex, isna)
20-
from pandas.core.index import _get_combined_index
20+
from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
2121
from pandas.util.testing import assert_almost_equal
2222
from pandas.compat.numpy import np_datetime64_compat
2323

@@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self):
21122112
res = i2.intersection(i1)
21132113

21142114
assert len(res) == 0
2115+
2116+
2117+
class TestIndexUtils(object):
2118+
2119+
@pytest.mark.parametrize('data, names, expected', [
2120+
([[1, 2, 3]], None, Index([1, 2, 3])),
2121+
([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
2122+
([['a', 'a'], ['c', 'd']], None,
2123+
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
2124+
([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
2125+
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
2126+
names=['L1', 'L2'])),
2127+
])
2128+
def test_ensure_index_from_sequences(self, data, names, expected):
2129+
result = _ensure_index_from_sequences(data, names)
2130+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_multi.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -537,15 +537,12 @@ def test_astype(self):
537537
self.index.astype(np.dtype(int))
538538

539539
def test_constructor_single_level(self):
540-
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
541-
labels=[[0, 1, 2, 3]], names=['first'])
542-
assert isinstance(single_level, Index)
543-
assert not isinstance(single_level, MultiIndex)
544-
assert single_level.name == 'first'
545-
546-
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
547-
labels=[[0, 1, 2, 3]])
548-
assert single_level.name is None
540+
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
541+
labels=[[0, 1, 2, 3]], names=['first'])
542+
assert isinstance(result, MultiIndex)
543+
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
544+
tm.assert_index_equal(result.levels[0], expected)
545+
assert result.names == ['first']
549546

550547
def test_constructor_no_levels(self):
551548
tm.assert_raises_regex(ValueError, "non-zero number "
@@ -768,8 +765,9 @@ def test_from_arrays_empty(self):
768765

769766
# 1 level
770767
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
768+
assert isinstance(result, MultiIndex)
771769
expected = Index([], name='A')
772-
tm.assert_index_equal(result, expected)
770+
tm.assert_index_equal(result.levels[0], expected)
773771

774772
# N levels
775773
for N in [2, 3]:
@@ -830,7 +828,7 @@ def test_from_product_empty(self):
830828
# 1 level
831829
result = MultiIndex.from_product([[]], names=['A'])
832830
expected = pd.Index([], name='A')
833-
tm.assert_index_equal(result, expected)
831+
tm.assert_index_equal(result.levels[0], expected)
834832

835833
# 2 levels
836834
l1 = [[], ['foo', 'bar', 'baz'], []]

pandas/util/testing.py

+4
Original file line numberDiff line numberDiff line change
@@ -1909,7 +1909,11 @@ def keyfunc(x):
19091909

19101910
# convert tuples to index
19111911
if nentries == 1:
1912+
# we have a single level of tuples, i.e. a regular Index
19121913
index = Index(tuples[0], name=names[0])
1914+
elif nlevels == 1:
1915+
name = None if names is None else names[0]
1916+
index = Index((x[0] for x in tuples), name=name)
19131917
else:
19141918
index = MultiIndex.from_tuples(tuples, names=names)
19151919
return index

0 commit comments

Comments
 (0)