Skip to content

Commit 9ef94e8

Browse files
jbrockmendelproost
authored andcommitted
CLN: reshape (pandas-dev#29627)
1 parent 1d8a11b commit 9ef94e8

File tree

5 files changed

+72
-71
lines changed

5 files changed

+72
-71
lines changed

pandas/core/reshape/concat.py

+7-30
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
concat routines
33
"""
44

5+
from typing import List
56
import warnings
67

78
import numpy as np
@@ -437,13 +438,13 @@ def get_result(self):
437438
mgr = self.objs[0]._data.concat(
438439
[x._data for x in self.objs], self.new_axes
439440
)
440-
cons = _get_series_result_type(mgr, self.objs)
441+
cons = self.objs[0]._constructor
441442
return cons(mgr, name=name).__finalize__(self, method="concat")
442443

443444
# combine as columns in a frame
444445
else:
445446
data = dict(zip(range(len(self.objs)), self.objs))
446-
cons = _get_series_result_type(data)
447+
cons = DataFrame
447448

448449
index, columns = self.new_axes
449450
df = cons(data, index=index)
@@ -473,7 +474,7 @@ def get_result(self):
473474
if not self.copy:
474475
new_data._consolidate_inplace()
475476

476-
cons = _get_frame_result_type(new_data, self.objs)
477+
cons = self.objs[0]._constructor
477478
return cons._from_axes(new_data, self.new_axes).__finalize__(
478479
self, method="concat"
479480
)
@@ -520,13 +521,13 @@ def _get_new_axes(self):
520521
new_axes[self.axis] = self._get_concat_axis()
521522
return new_axes
522523

523-
def _get_comb_axis(self, i):
524+
def _get_comb_axis(self, i: int) -> Index:
524525
data_axis = self.objs[0]._get_block_manager_axis(i)
525526
return get_objs_combined_axis(
526527
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
527528
)
528529

529-
def _get_concat_axis(self):
530+
def _get_concat_axis(self) -> Index:
530531
"""
531532
Return index to be used along concatenation axis.
532533
"""
@@ -537,7 +538,7 @@ def _get_concat_axis(self):
537538
idx = ibase.default_index(len(self.objs))
538539
return idx
539540
elif self.keys is None:
540-
names = [None] * len(self.objs)
541+
names: List = [None] * len(self.objs)
541542
num = 0
542543
has_names = False
543544
for i, x in enumerate(self.objs):
@@ -702,27 +703,3 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
702703
return MultiIndex(
703704
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
704705
)
705-
706-
707-
def _get_series_result_type(result, objs=None):
708-
"""
709-
return appropriate class of Series concat
710-
input is either dict or array-like
711-
"""
712-
# TODO: See if we can just inline with _constructor_expanddim
713-
# now that sparse is removed.
714-
715-
# concat Series with axis 1
716-
if isinstance(result, dict):
717-
return DataFrame
718-
719-
# otherwise it is a SingleBlockManager (axis = 0)
720-
return objs[0]._constructor
721-
722-
723-
def _get_frame_result_type(result, objs):
724-
"""
725-
return appropriate class of DataFrame-like concat
726-
"""
727-
# TODO: just inline this as _constructor.
728-
return objs[0]

pandas/core/reshape/melt.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import re
2+
from typing import List
23

34
import numpy as np
45

@@ -10,7 +11,7 @@
1011
from pandas.core.dtypes.missing import notna
1112

1213
from pandas.core.arrays import Categorical
13-
from pandas.core.frame import _shared_docs
14+
from pandas.core.frame import DataFrame, _shared_docs
1415
from pandas.core.indexes.base import Index
1516
from pandas.core.reshape.concat import concat
1617
from pandas.core.tools.numeric import to_numeric
@@ -21,20 +22,21 @@
2122
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt")
2223
)
2324
def melt(
24-
frame,
25+
frame: DataFrame,
2526
id_vars=None,
2627
value_vars=None,
2728
var_name=None,
2829
value_name="value",
2930
col_level=None,
30-
):
31+
) -> DataFrame:
3132
# TODO: what about the existing index?
3233
# If multiindex, gather names of columns on all level for checking presence
3334
# of `id_vars` and `value_vars`
3435
if isinstance(frame.columns, ABCMultiIndex):
3536
cols = [x for c in frame.columns for x in c]
3637
else:
3738
cols = list(frame.columns)
39+
3840
if id_vars is not None:
3941
if not is_list_like(id_vars):
4042
id_vars = [id_vars]
@@ -119,7 +121,7 @@ def melt(
119121
return frame._constructor(mdata, columns=mcolumns)
120122

121123

122-
def lreshape(data, groups, dropna=True, label=None):
124+
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame:
123125
"""
124126
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot
125127
@@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None):
129131
groups : dict
130132
{new_name : list_of_columns}
131133
dropna : boolean, default True
134+
label : object, default None
135+
Dummy kwarg, not used.
132136
133137
Examples
134138
--------
@@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None):
188192
return data._constructor(mdata, columns=id_cols + pivot_cols)
189193

190194

191-
def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
195+
def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
192196
r"""
193197
Wide panel to long format. Less flexible but more user-friendly than melt.
194198
@@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
412416
two 2.9
413417
"""
414418

415-
def get_var_names(df, stub, sep, suffix):
419+
def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]:
416420
regex = r"^{stub}{sep}{suffix}$".format(
417421
stub=re.escape(stub), sep=re.escape(sep), suffix=suffix
418422
)
419423
pattern = re.compile(regex)
420424
return [col for col in df.columns if pattern.match(col)]
421425

422-
def melt_stub(df, stub, i, j, value_vars, sep: str):
426+
def melt_stub(df, stub: str, i, j, value_vars, sep: str):
423427
newdf = melt(
424428
df,
425429
id_vars=i,

pandas/core/reshape/merge.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def merge(
9292

9393

9494
def _groupby_and_merge(
95-
by, on, left, right, _merge_pieces, check_duplicates: bool = True
95+
by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True
9696
):
9797
"""
9898
groupby & merge; we are always performing a left-by type operation
@@ -313,7 +313,7 @@ def merge_asof(
313313
suffixes=("_x", "_y"),
314314
tolerance=None,
315315
allow_exact_matches: bool = True,
316-
direction="backward",
316+
direction: str = "backward",
317317
):
318318
"""
319319
Perform an asof merge. This is similar to a left-join except that we
@@ -1299,19 +1299,21 @@ def _get_join_indexers(
12991299
right_keys
13001300
), "left_key and right_keys must be the same length"
13011301

1302-
# bind `sort` arg. of _factorize_keys
1303-
fkeys = partial(_factorize_keys, sort=sort)
1304-
13051302
# get left & right join labels and num. of levels at each location
1306-
llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys)))
1303+
mapped = (
1304+
_factorize_keys(left_keys[n], right_keys[n], sort=sort)
1305+
for n in range(len(left_keys))
1306+
)
1307+
zipped = zip(*mapped)
1308+
llab, rlab, shape = [list(x) for x in zipped]
13071309

13081310
# get flat i8 keys from label lists
13091311
lkey, rkey = _get_join_keys(llab, rlab, shape, sort)
13101312

13111313
# factorize keys to a dense i8 space
13121314
# `count` is the num. of unique keys
13131315
# set(lkey) | set(rkey) == range(count)
1314-
lkey, rkey, count = fkeys(lkey, rkey)
1316+
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
13151317

13161318
# preserve left frame order if how == 'left' and sort == False
13171319
kwargs = copy.copy(kwargs)
@@ -1487,12 +1489,12 @@ def get_result(self):
14871489
return result
14881490

14891491

1490-
def _asof_function(direction):
1492+
def _asof_function(direction: str):
14911493
name = "asof_join_{dir}".format(dir=direction)
14921494
return getattr(libjoin, name, None)
14931495

14941496

1495-
def _asof_by_function(direction):
1497+
def _asof_by_function(direction: str):
14961498
name = "asof_join_{dir}_on_X_by_Y".format(dir=direction)
14971499
return getattr(libjoin, name, None)
14981500

@@ -1536,7 +1538,7 @@ def __init__(
15361538
how: str = "asof",
15371539
tolerance=None,
15381540
allow_exact_matches: bool = True,
1539-
direction="backward",
1541+
direction: str = "backward",
15401542
):
15411543

15421544
self.by = by
@@ -1775,11 +1777,11 @@ def flip(xs):
17751777

17761778
def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
17771779

1778-
# bind `sort` argument
1779-
fkeys = partial(_factorize_keys, sort=sort)
1780-
17811780
# left & right join labels and num. of levels at each location
1782-
mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels)))
1781+
mapped = (
1782+
_factorize_keys(index.levels[n], join_keys[n], sort=sort)
1783+
for n in range(index.nlevels)
1784+
)
17831785
zipped = zip(*mapped)
17841786
rcodes, lcodes, shape = [list(x) for x in zipped]
17851787
if sort:
@@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
18041806
lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort)
18051807

18061808
# factorize keys to a dense i8 space
1807-
lkey, rkey, count = fkeys(lkey, rkey)
1809+
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
18081810

18091811
return libjoin.left_outer_join(lkey, rkey, count, sort=sort)
18101812

0 commit comments

Comments
 (0)