-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: reshape #29627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN: reshape #29627
Changes from all commits
7e42340
1f24020
f8008d2
51b6b0f
51276e3
68926f5
e9b981a
6c70dcb
3c7e763
ebbf53b
5976f6f
18a205e
751ec83
66b9e71
4895d19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import re | ||
from typing import List | ||
|
||
import numpy as np | ||
|
||
|
@@ -10,7 +11,7 @@ | |
from pandas.core.dtypes.missing import notna | ||
|
||
from pandas.core.arrays import Categorical | ||
from pandas.core.frame import _shared_docs | ||
from pandas.core.frame import DataFrame, _shared_docs | ||
from pandas.core.indexes.base import Index | ||
from pandas.core.reshape.concat import concat | ||
from pandas.core.tools.numeric import to_numeric | ||
|
@@ -21,20 +22,21 @@ | |
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt") | ||
) | ||
def melt( | ||
frame, | ||
frame: DataFrame, | ||
id_vars=None, | ||
value_vars=None, | ||
var_name=None, | ||
value_name="value", | ||
col_level=None, | ||
): | ||
) -> DataFrame: | ||
# TODO: what about the existing index? | ||
# If multiindex, gather names of columns on all level for checking presence | ||
# of `id_vars` and `value_vars` | ||
if isinstance(frame.columns, ABCMultiIndex): | ||
cols = [x for c in frame.columns for x in c] | ||
else: | ||
cols = list(frame.columns) | ||
|
||
if id_vars is not None: | ||
if not is_list_like(id_vars): | ||
id_vars = [id_vars] | ||
|
@@ -119,7 +121,7 @@ def melt( | |
return frame._constructor(mdata, columns=mcolumns) | ||
|
||
|
||
def lreshape(data, groups, dropna=True, label=None): | ||
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: | ||
""" | ||
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot | ||
|
||
|
@@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None): | |
groups : dict | ||
{new_name : list_of_columns} | ||
dropna : boolean, default True | ||
label : object, default None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should deprecate / remove this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you create an issue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you create an issue to deprecate this |
||
Dummy kwarg, not used. | ||
|
||
Examples | ||
-------- | ||
|
@@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None): | |
return data._constructor(mdata, columns=id_cols + pivot_cols) | ||
|
||
|
||
def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): | ||
def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): | ||
r""" | ||
Wide panel to long format. Less flexible but more user-friendly than melt. | ||
|
||
|
@@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): | |
two 2.9 | ||
""" | ||
|
||
def get_var_names(df, stub, sep, suffix): | ||
def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]: | ||
regex = r"^{stub}{sep}{suffix}$".format( | ||
stub=re.escape(stub), sep=re.escape(sep), suffix=suffix | ||
) | ||
pattern = re.compile(regex) | ||
return [col for col in df.columns if pattern.match(col)] | ||
|
||
def melt_stub(df, stub, i, j, value_vars, sep: str): | ||
def melt_stub(df, stub: str, i, j, value_vars, sep: str): | ||
newdf = melt( | ||
df, | ||
id_vars=i, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,7 @@ def merge( | |
|
||
|
||
def _groupby_and_merge( | ||
by, on, left, right, _merge_pieces, check_duplicates: bool = True | ||
by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True | ||
): | ||
""" | ||
groupby & merge; we are always performing a left-by type operation | ||
|
@@ -313,7 +313,7 @@ def merge_asof( | |
suffixes=("_x", "_y"), | ||
tolerance=None, | ||
allow_exact_matches: bool = True, | ||
direction="backward", | ||
direction: str = "backward", | ||
): | ||
""" | ||
Perform an asof merge. This is similar to a left-join except that we | ||
|
@@ -1299,19 +1299,21 @@ def _get_join_indexers( | |
right_keys | ||
), "left_key and right_keys must be the same length" | ||
|
||
# bind `sort` arg. of _factorize_keys | ||
fkeys = partial(_factorize_keys, sort=sort) | ||
|
||
# get left & right join labels and num. of levels at each location | ||
llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys))) | ||
mapped = ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can use enumerate here instead of range(len(...)) |
||
_factorize_keys(left_keys[n], right_keys[n], sort=sort) | ||
for n in range(len(left_keys)) | ||
) | ||
zipped = zip(*mapped) | ||
llab, rlab, shape = [list(x) for x in zipped] | ||
|
||
# get flat i8 keys from label lists | ||
lkey, rkey = _get_join_keys(llab, rlab, shape, sort) | ||
|
||
# factorize keys to a dense i8 space | ||
# `count` is the num. of unique keys | ||
# set(lkey) | set(rkey) == range(count) | ||
lkey, rkey, count = fkeys(lkey, rkey) | ||
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) | ||
|
||
# preserve left frame order if how == 'left' and sort == False | ||
kwargs = copy.copy(kwargs) | ||
|
@@ -1487,12 +1489,12 @@ def get_result(self): | |
return result | ||
|
||
|
||
def _asof_function(direction): | ||
def _asof_function(direction: str): | ||
name = "asof_join_{dir}".format(dir=direction) | ||
return getattr(libjoin, name, None) | ||
|
||
|
||
def _asof_by_function(direction): | ||
def _asof_by_function(direction: str): | ||
name = "asof_join_{dir}_on_X_by_Y".format(dir=direction) | ||
return getattr(libjoin, name, None) | ||
|
||
|
@@ -1536,7 +1538,7 @@ def __init__( | |
how: str = "asof", | ||
tolerance=None, | ||
allow_exact_matches: bool = True, | ||
direction="backward", | ||
direction: str = "backward", | ||
): | ||
|
||
self.by = by | ||
|
@@ -1775,11 +1777,11 @@ def flip(xs): | |
|
||
def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): | ||
|
||
# bind `sort` argument | ||
fkeys = partial(_factorize_keys, sort=sort) | ||
|
||
# left & right join labels and num. of levels at each location | ||
mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels))) | ||
mapped = ( | ||
_factorize_keys(index.levels[n], join_keys[n], sort=sort) | ||
for n in range(index.nlevels) | ||
) | ||
zipped = zip(*mapped) | ||
rcodes, lcodes, shape = [list(x) for x in zipped] | ||
if sort: | ||
|
@@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): | |
lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) | ||
|
||
# factorize keys to a dense i8 space | ||
lkey, rkey, count = fkeys(lkey, rkey) | ||
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) | ||
|
||
return libjoin.left_outer_join(lkey, rkey, count, sort=sort) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does this play nice with subclassing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably not, but its the exact behavior we currently have