-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: reshape #29627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN: reshape #29627
Changes from 10 commits
7e42340
1f24020
f8008d2
51b6b0f
51276e3
68926f5
e9b981a
6c70dcb
3c7e763
ebbf53b
5976f6f
18a205e
751ec83
66b9e71
4895d19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -437,13 +437,13 @@ def get_result(self): | |
mgr = self.objs[0]._data.concat( | ||
[x._data for x in self.objs], self.new_axes | ||
) | ||
cons = _get_series_result_type(mgr, self.objs) | ||
cons = self.objs[0]._constructor | ||
return cons(mgr, name=name).__finalize__(self, method="concat") | ||
|
||
# combine as columns in a frame | ||
else: | ||
data = dict(zip(range(len(self.objs)), self.objs)) | ||
cons = _get_series_result_type(data) | ||
cons = DataFrame | ||
|
||
index, columns = self.new_axes | ||
df = cons(data, index=index) | ||
|
@@ -473,7 +473,7 @@ def get_result(self): | |
if not self.copy: | ||
new_data._consolidate_inplace() | ||
|
||
cons = _get_frame_result_type(new_data, self.objs) | ||
cons = self.objs[0]._constructor | ||
return cons._from_axes(new_data, self.new_axes).__finalize__( | ||
self, method="concat" | ||
) | ||
|
@@ -520,17 +520,13 @@ def _get_new_axes(self): | |
new_axes[self.axis] = self._get_concat_axis() | ||
return new_axes | ||
|
||
def _get_comb_axis(self, i): | ||
def _get_comb_axis(self, i: int) -> Index: | ||
data_axis = self.objs[0]._get_block_manager_axis(i) | ||
try: | ||
return get_objs_combined_axis( | ||
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort | ||
) | ||
except IndexError: | ||
types = [type(x).__name__ for x in self.objs] | ||
raise TypeError("Cannot concatenate list of {types}".format(types=types)) | ||
return get_objs_combined_axis( | ||
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort | ||
) | ||
|
||
def _get_concat_axis(self): | ||
def _get_concat_axis(self) -> Index: | ||
""" | ||
Return index to be used along concatenation axis. | ||
""" | ||
|
@@ -541,7 +537,7 @@ def _get_concat_axis(self): | |
idx = ibase.default_index(len(self.objs)) | ||
return idx | ||
elif self.keys is None: | ||
names = [None] * len(self.objs) | ||
names = [None] * len(self.objs) # type: list | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least for now should always prefer |
||
num = 0 | ||
has_names = False | ||
for i, x in enumerate(self.objs): | ||
|
@@ -706,27 +702,3 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde | |
return MultiIndex( | ||
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False | ||
) | ||
|
||
|
||
def _get_series_result_type(result, objs=None): | ||
""" | ||
return appropriate class of Series concat | ||
input is either dict or array-like | ||
""" | ||
# TODO: See if we can just inline with _constructor_expanddim | ||
# now that sparse is removed. | ||
|
||
# concat Series with axis 1 | ||
if isinstance(result, dict): | ||
return DataFrame | ||
|
||
# otherwise it is a SingleBlockManager (axis = 0) | ||
return objs[0]._constructor | ||
|
||
|
||
def _get_frame_result_type(result, objs): | ||
""" | ||
return appropriate class of DataFrame-like concat | ||
""" | ||
# TODO: just inline this as _constructor. | ||
return objs[0] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import re | ||
from typing import List | ||
|
||
import numpy as np | ||
|
||
|
@@ -10,7 +11,7 @@ | |
from pandas.core.dtypes.missing import notna | ||
|
||
from pandas.core.arrays import Categorical | ||
from pandas.core.frame import _shared_docs | ||
from pandas.core.frame import DataFrame, _shared_docs | ||
from pandas.core.indexes.base import Index | ||
from pandas.core.reshape.concat import concat | ||
from pandas.core.tools.numeric import to_numeric | ||
|
@@ -21,20 +22,21 @@ | |
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt") | ||
) | ||
def melt( | ||
frame, | ||
frame: DataFrame, | ||
id_vars=None, | ||
value_vars=None, | ||
var_name=None, | ||
value_name="value", | ||
col_level=None, | ||
): | ||
) -> DataFrame: | ||
# TODO: what about the existing index? | ||
# If multiindex, gather names of columns on all level for checking presence | ||
# of `id_vars` and `value_vars` | ||
if isinstance(frame.columns, ABCMultiIndex): | ||
cols = [x for c in frame.columns for x in c] | ||
else: | ||
cols = list(frame.columns) | ||
|
||
if id_vars is not None: | ||
if not is_list_like(id_vars): | ||
id_vars = [id_vars] | ||
|
@@ -119,7 +121,7 @@ def melt( | |
return frame._constructor(mdata, columns=mcolumns) | ||
|
||
|
||
def lreshape(data, groups, dropna=True, label=None): | ||
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: | ||
""" | ||
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot | ||
|
||
|
@@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None): | |
groups : dict | ||
{new_name : list_of_columns} | ||
dropna : boolean, default True | ||
label : object, default None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should deprecate / remove this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you create an issue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you create an issue to deprecate this |
||
Dummy kwarg, not used. | ||
|
||
Examples | ||
-------- | ||
|
@@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None): | |
return data._constructor(mdata, columns=id_cols + pivot_cols) | ||
|
||
|
||
def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): | ||
def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): | ||
r""" | ||
Wide panel to long format. Less flexible but more user-friendly than melt. | ||
|
||
|
@@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): | |
two 2.9 | ||
""" | ||
|
||
def get_var_names(df, stub, sep, suffix): | ||
def get_var_names(df, stub: str, sep: str, suffix) -> List[str]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think can annotation suffix as a str |
||
regex = r"^{stub}{sep}{suffix}$".format( | ||
stub=re.escape(stub), sep=re.escape(sep), suffix=suffix | ||
) | ||
pattern = re.compile(regex) | ||
return [col for col in df.columns if pattern.match(col)] | ||
|
||
def melt_stub(df, stub, i, j, value_vars, sep: str): | ||
def melt_stub(df, stub: str, i, j, value_vars, sep: str): | ||
newdf = melt( | ||
df, | ||
id_vars=i, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,7 @@ def merge( | |
|
||
|
||
def _groupby_and_merge( | ||
by, on, left, right, _merge_pieces, check_duplicates: bool = True | ||
by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True | ||
): | ||
""" | ||
groupby & merge; we are always performing a left-by type operation | ||
|
@@ -313,7 +313,7 @@ def merge_asof( | |
suffixes=("_x", "_y"), | ||
tolerance=None, | ||
allow_exact_matches: bool = True, | ||
direction="backward", | ||
direction: str = "backward", | ||
): | ||
""" | ||
Perform an asof merge. This is similar to a left-join except that we | ||
|
@@ -1299,19 +1299,21 @@ def _get_join_indexers( | |
right_keys | ||
), "left_key and right_keys must be the same length" | ||
|
||
# bind `sort` arg. of _factorize_keys | ||
fkeys = partial(_factorize_keys, sort=sort) | ||
|
||
# get left & right join labels and num. of levels at each location | ||
llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys))) | ||
mapped = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can use enumerate here instead of range(len(...)) |
||
_factorize_keys(left_keys[n], right_keys[n], sort=sort) | ||
for n in range(len(left_keys)) | ||
) | ||
zipped = zip(*mapped) | ||
llab, rlab, shape = [list(x) for x in zipped] | ||
|
||
# get flat i8 keys from label lists | ||
lkey, rkey = _get_join_keys(llab, rlab, shape, sort) | ||
|
||
# factorize keys to a dense i8 space | ||
# `count` is the num. of unique keys | ||
# set(lkey) | set(rkey) == range(count) | ||
lkey, rkey, count = fkeys(lkey, rkey) | ||
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) | ||
|
||
# preserve left frame order if how == 'left' and sort == False | ||
kwargs = copy.copy(kwargs) | ||
|
@@ -1487,12 +1489,12 @@ def get_result(self): | |
return result | ||
|
||
|
||
def _asof_function(direction): | ||
def _asof_function(direction: str): | ||
name = "asof_join_{dir}".format(dir=direction) | ||
return getattr(libjoin, name, None) | ||
|
||
|
||
def _asof_by_function(direction): | ||
def _asof_by_function(direction: str): | ||
name = "asof_join_{dir}_on_X_by_Y".format(dir=direction) | ||
return getattr(libjoin, name, None) | ||
|
||
|
@@ -1536,7 +1538,7 @@ def __init__( | |
how: str = "asof", | ||
tolerance=None, | ||
allow_exact_matches: bool = True, | ||
direction="backward", | ||
direction: str = "backward", | ||
): | ||
|
||
self.by = by | ||
|
@@ -1775,11 +1777,11 @@ def flip(xs): | |
|
||
def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): | ||
|
||
# bind `sort` argument | ||
fkeys = partial(_factorize_keys, sort=sort) | ||
|
||
# left & right join labels and num. of levels at each location | ||
mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels))) | ||
mapped = ( | ||
_factorize_keys(index.levels[n], join_keys[n], sort=sort) | ||
for n in range(len(index.levels)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use enumerate |
||
) | ||
zipped = zip(*mapped) | ||
rcodes, lcodes, shape = [list(x) for x in zipped] | ||
if sort: | ||
|
@@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): | |
lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) | ||
|
||
# factorize keys to a dense i8 space | ||
lkey, rkey, count = fkeys(lkey, rkey) | ||
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) | ||
|
||
return libjoin.left_outer_join(lkey, rkey, count, sort=sort) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does this play nice with subclassing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably not, but its the exact behavior we currently have