Skip to content

BUG: GH #12223, GH #15262. Allow ints for names in MultiIndex #15478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,12 @@ Other enhancements
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).
- ``pandas.tools.hashing`` has gained a ``hash_tuples`` routine, and ``hash_pandas_object`` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`)
- ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`)
- ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`)<<<<<<< f4edb053e17e51e8c2bed7c16755c4f7f3222117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merge residual :>

- ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`)
- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`)
- ``pd.TimedeltaIndex`` now has a custom datetick formatter specifically designed for nanosecond level precision (:issue:`8711`)
- ``pd.types.concat.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs <categorical.union>` for more information.
- Using numerical names in ``MultiIndex`` causes less errors. (:issue:`12223`) (:issue:`15262`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

say instead about the bug report about output formatting with a MI under certain conditions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(:issue:`12223`, :issue:`15262`)


.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2876,7 +2876,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
names = [x for x in self.index.names]
if isinstance(self.index, MultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index.get_level_values(i))
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

Expand All @@ -2886,9 +2886,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
# append all but the last column so we don't have to modify
# the end of this loop
for n in range(col.nlevels - 1):
arrays.append(col.get_level_values(n))
arrays.append(col._get_level_values(n))

level = col.get_level_values(col.nlevels - 1)
level = col._get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, Series):
level = col._values
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,8 @@ def _set_grouper(self, obj, sort=False):
# equivalent to the axis name
if isinstance(ax, MultiIndex):
level = ax._get_level_number(level)
ax = Index(ax.get_level_values(
level), name=ax.names[level])
ax = Index(ax._get_level_values(level),
name=ax.names[level])

else:
if level not in (0, ax.name):
Expand Down Expand Up @@ -761,7 +761,7 @@ def _index_with_as_index(self, b):
gp = self.grouper
levels = chain((gp.levels[i][gp.labels[i][b]]
for i in range(len(gp.groupings))),
(original.get_level_values(i)[b]
(original._get_level_values(i)[b]
for i in range(original.nlevels)))
new = MultiIndex.from_arrays(list(levels))
new.names = gp.names + original.names
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
mdata[value_name] = frame.values.ravel('F')
for i, col in enumerate(var_name):
# asanyarray will keep the columns as an Index
mdata[col] = np.asanyarray(frame.columns.get_level_values(i)).repeat(N)
mdata[col] = np.asanyarray(frame.columns
._get_level_values(i)).repeat(N)

return DataFrame(mdata, columns=mcolumns)

Expand Down
2 changes: 1 addition & 1 deletion pandas/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,7 +1566,7 @@ def _save_header(self):
if isinstance(index_label, list) and len(index_label) > 1:
col_line.extend([''] * (len(index_label) - 1))

col_line.extend(columns.get_level_values(i))
col_line.extend(columns._get_level_values(i))

writer.writerow(col_line)

Expand Down
5 changes: 5 additions & 0 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2352,6 +2352,11 @@ def get_level_values(self, level):
self._validate_index_level(level)
return self

def _get_level_values(self, num):
# Used to mirror implementation for MultiIndex
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to add an actual doc-string

# GH #10461
return self.get_level_values(num)

_index_shared_docs['get_indexer'] = """
Compute indexer and mask for new index given the current index. The
indexer should be then used as an input to ndarray.take to align the
Expand Down
18 changes: 12 additions & 6 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ def _try_mi(k):

raise InvalidIndexError(key)

def _get_level_values(self, level):
def _get_level_values(self, level, copy=True):
"""
Return vector of label values for requested level,
equal to the length of the index
Expand All @@ -856,6 +856,7 @@ def _get_level_values(self, level):
Parameters
----------
level : int level
copy : bool whether copy of results should be done
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you adding this? this is a whole different ball game.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback What I needed _get_level_values() to do is have the same behavior as the public get_level_values(), with the assumption of the int argument, so the copy argument makes that happen by doing the shallow copy there.

When I first looked at this, there was no _get_level_values(), but that got introduced within the past 2 weeks, so I then had to make everything compatible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still its not clear why you would actually be making this change, it just adds too much complexity.

show an example of why you think you need it (or simply take it out)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback I'll try an alternate implementation and let you review that.


Returns
-------
Expand All @@ -866,7 +867,11 @@ def _get_level_values(self, level):
labels = self.labels[level]
filled = algos.take_1d(unique._values, labels,
fill_value=unique._na_value)
return filled
if copy:
values = unique._shallow_copy(filled)
else:
values = filled
return values

def get_level_values(self, level):
"""
Expand All @@ -882,7 +887,7 @@ def get_level_values(self, level):
values : Index
"""
level = self._get_level_number(level)
values = self._get_level_values(level)
values = self._get_level_values(level, copy=False)
return self.levels[level]._shallow_copy(values)

def format(self, space=2, sparsify=None, adjoin=True, names=False,
Expand Down Expand Up @@ -966,7 +971,8 @@ def to_frame(self, index=True):
"""

from pandas import DataFrame
result = DataFrame({(name or level): self.get_level_values(level)
result = DataFrame({(name or level):
self._get_level_values(level)
for name, level in
zip(self.names, range(len(self.levels)))},
copy=False)
Expand Down Expand Up @@ -1301,8 +1307,8 @@ def append(self, other):
for o in other):
arrays = []
for i in range(self.nlevels):
label = self.get_level_values(i)
appended = [o.get_level_values(i) for o in other]
label = self._get_level_values(i)
appended = [o._get_level_values(i) for o in other]
arrays.append(label.append(appended))
return MultiIndex.from_arrays(arrays, names=self.names)

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper):
if self.index is not None:
for i, idx_label in enumerate(self.index):
idx_type = dtype_mapper(
self.frame.index.get_level_values(i))
self.frame.index._get_level_values(i))
column_names_and_types.append((text_type(idx_label),
idx_type, True))

Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/test_combine_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,24 @@ def test_concat_axis_parameter(self):
with assertRaisesRegexp(ValueError, 'No axis named'):
pd.concat([series1, series2], axis='something')

def test_concat_numerical_names(self):
# #15262 # #12223
df = pd.DataFrame({'col': range(9)},
dtype='int32',
index=(pd.MultiIndex
.from_product([['A0', 'A1', 'A2'],
['B0', 'B1', 'B2']],
names=[1, 2])))
result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :]))
expected = pd.DataFrame({'col': [0, 1, 7, 8]},
dtype='int32',
index=pd.MultiIndex.from_tuples([('A0', 'B0'),
('A0', 'B1'),
('A2', 'B1'),
('A2', 'B2')],
names=[1, 2]))
tm.assert_frame_equal(result, expected)


class TestDataFrameCombineFirst(tm.TestCase, TestData):

Expand Down
6 changes: 3 additions & 3 deletions pandas/util/doctools.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def _insert_index(self, data):
else:
for i in range(idx_nlevels):
data.insert(i, 'Index{0}'.format(i),
data.index.get_level_values(i))
data.index._get_level_values(i))

col_nlevels = data.columns.nlevels
if col_nlevels > 1:
col = data.columns.get_level_values(0)
values = [data.columns.get_level_values(i).values
col = data.columns._get_level_values(0)
values = [data.columns._get_level_values(i).values
for i in range(1, col_nlevels)]
col_df = pd.DataFrame(values)
data.columns = col_df.columns
Expand Down