Skip to content

CLN: reshape #29627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Nov 20, 2019
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _get_distinct_objs(objs):
return res


def _get_combined_index(indexes, intersect=False, sort=False):
def _get_combined_index(indexes, intersect: bool = False, sort=False) -> Index:
"""
Return the union or intersection of indexes.

Expand Down
46 changes: 9 additions & 37 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,13 +437,13 @@ def get_result(self):
mgr = self.objs[0]._data.concat(
[x._data for x in self.objs], self.new_axes
)
cons = _get_series_result_type(mgr, self.objs)
cons = self.objs[0]._constructor
return cons(mgr, name=name).__finalize__(self, method="concat")

# combine as columns in a frame
else:
data = dict(zip(range(len(self.objs)), self.objs))
cons = _get_series_result_type(data)
cons = DataFrame
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this play nice with subclassing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not, but its the exact behavior we currently have


index, columns = self.new_axes
df = cons(data, index=index)
Expand Down Expand Up @@ -473,7 +473,7 @@ def get_result(self):
if not self.copy:
new_data._consolidate_inplace()

cons = _get_frame_result_type(new_data, self.objs)
cons = self.objs[0]._constructor
return cons._from_axes(new_data, self.new_axes).__finalize__(
self, method="concat"
)
Expand Down Expand Up @@ -520,17 +520,13 @@ def _get_new_axes(self):
new_axes[self.axis] = self._get_concat_axis()
return new_axes

def _get_comb_axis(self, i):
def _get_comb_axis(self, i: int) -> Index:
data_axis = self.objs[0]._get_block_manager_axis(i)
try:
return get_objs_combined_axis(
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
)
except IndexError:
types = [type(x).__name__ for x in self.objs]
raise TypeError("Cannot concatenate list of {types}".format(types=types))
return get_objs_combined_axis(
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
)

def _get_concat_axis(self):
def _get_concat_axis(self) -> Index:
"""
Return index to be used along concatenation axis.
"""
Expand All @@ -541,7 +537,7 @@ def _get_concat_axis(self):
idx = ibase.default_index(len(self.objs))
return idx
elif self.keys is None:
names = [None] * len(self.objs)
names = [None] * len(self.objs) # type: list
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least for now should always prefer typing.* to the builtin containers

num = 0
has_names = False
for i, x in enumerate(self.objs):
Expand Down Expand Up @@ -706,27 +702,3 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
return MultiIndex(
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
)


def _get_series_result_type(result, objs=None):
"""
return appropriate class of Series concat
input is either dict or array-like
"""
# TODO: See if we can just inline with _constructor_expanddim
# now that sparse is removed.

# concat Series with axis 1
if isinstance(result, dict):
return DataFrame

# otherwise it is a SingleBlockManager (axis = 0)
return objs[0]._constructor


def _get_frame_result_type(result, objs):
"""
return appropriate class of DataFrame-like concat
"""
# TODO: just inline this as _constructor.
return objs[0]
18 changes: 11 additions & 7 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from typing import List

import numpy as np

Expand All @@ -10,7 +11,7 @@
from pandas.core.dtypes.missing import notna

from pandas.core.arrays import Categorical
from pandas.core.frame import _shared_docs
from pandas.core.frame import DataFrame, _shared_docs
from pandas.core.indexes.base import Index
from pandas.core.reshape.concat import concat
from pandas.core.tools.numeric import to_numeric
Expand All @@ -21,20 +22,21 @@
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt")
)
def melt(
frame,
frame: DataFrame,
id_vars=None,
value_vars=None,
var_name=None,
value_name="value",
col_level=None,
):
) -> DataFrame:
# TODO: what about the existing index?
# If multiindex, gather names of columns on all level for checking presence
# of `id_vars` and `value_vars`
if isinstance(frame.columns, ABCMultiIndex):
cols = [x for c in frame.columns for x in c]
else:
cols = list(frame.columns)

if id_vars is not None:
if not is_list_like(id_vars):
id_vars = [id_vars]
Expand Down Expand Up @@ -119,7 +121,7 @@ def melt(
return frame._constructor(mdata, columns=mcolumns)


def lreshape(data, groups, dropna=True, label=None):
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame:
"""
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot

Expand All @@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None):
groups : dict
{new_name : list_of_columns}
dropna : boolean, default True
label : object, default None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should deprecate / remove this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you create an issue

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you create an issue to deprecate this

Dummy kwarg, not used.

Examples
--------
Expand Down Expand Up @@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None):
return data._constructor(mdata, columns=id_cols + pivot_cols)


def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
r"""
Wide panel to long format. Less flexible but more user-friendly than melt.

Expand Down Expand Up @@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
two 2.9
"""

def get_var_names(df, stub, sep, suffix):
def get_var_names(df, stub: str, sep: str, suffix) -> List[str]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think can annotation suffix as a str

regex = r"^{stub}{sep}{suffix}$".format(
stub=re.escape(stub), sep=re.escape(sep), suffix=suffix
)
pattern = re.compile(regex)
return [col for col in df.columns if pattern.match(col)]

def melt_stub(df, stub, i, j, value_vars, sep: str):
def melt_stub(df, stub: str, i, j, value_vars, sep: str):
newdf = melt(
df,
id_vars=i,
Expand Down
32 changes: 17 additions & 15 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def merge(


def _groupby_and_merge(
by, on, left, right, _merge_pieces, check_duplicates: bool = True
by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True
):
"""
groupby & merge; we are always performing a left-by type operation
Expand Down Expand Up @@ -313,7 +313,7 @@ def merge_asof(
suffixes=("_x", "_y"),
tolerance=None,
allow_exact_matches: bool = True,
direction="backward",
direction: str = "backward",
):
"""
Perform an asof merge. This is similar to a left-join except that we
Expand Down Expand Up @@ -1299,19 +1299,21 @@ def _get_join_indexers(
right_keys
), "left_key and right_keys must be the same length"

# bind `sort` arg. of _factorize_keys
fkeys = partial(_factorize_keys, sort=sort)

# get left & right join labels and num. of levels at each location
llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys)))
mapped = (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can use enumerate here instead of range(len(...))

_factorize_keys(left_keys[n], right_keys[n], sort=sort)
for n in range(len(left_keys))
)
zipped = zip(*mapped)
llab, rlab, shape = [list(x) for x in zipped]

# get flat i8 keys from label lists
lkey, rkey = _get_join_keys(llab, rlab, shape, sort)

# factorize keys to a dense i8 space
# `count` is the num. of unique keys
# set(lkey) | set(rkey) == range(count)
lkey, rkey, count = fkeys(lkey, rkey)
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

# preserve left frame order if how == 'left' and sort == False
kwargs = copy.copy(kwargs)
Expand Down Expand Up @@ -1487,12 +1489,12 @@ def get_result(self):
return result


def _asof_function(direction):
def _asof_function(direction: str):
name = "asof_join_{dir}".format(dir=direction)
return getattr(libjoin, name, None)


def _asof_by_function(direction):
def _asof_by_function(direction: str):
name = "asof_join_{dir}_on_X_by_Y".format(dir=direction)
return getattr(libjoin, name, None)

Expand Down Expand Up @@ -1536,7 +1538,7 @@ def __init__(
how: str = "asof",
tolerance=None,
allow_exact_matches: bool = True,
direction="backward",
direction: str = "backward",
):

self.by = by
Expand Down Expand Up @@ -1775,11 +1777,11 @@ def flip(xs):

def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):

# bind `sort` argument
fkeys = partial(_factorize_keys, sort=sort)

# left & right join labels and num. of levels at each location
mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels)))
mapped = (
_factorize_keys(index.levels[n], join_keys[n], sort=sort)
for n in range(len(index.levels))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use enumerate

)
zipped = zip(*mapped)
rcodes, lcodes, shape = [list(x) for x in zipped]
if sort:
Expand All @@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort)

# factorize keys to a dense i8 space
lkey, rkey, count = fkeys(lkey, rkey)
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

return libjoin.left_outer_join(lkey, rkey, count, sort=sort)

Expand Down
Loading