diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c314ae03a7daf..4468b5e07cc07 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -245,6 +245,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/core/reshape/pivot.py \ pandas/core/reshape/reshape.py \ pandas/core/reshape/tile.py \ + pandas/core/reshape/melt.py \ -k"-crosstab -pivot_table -cut" RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 824b0c59f9ce5..d655a8be13de7 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -270,15 +270,15 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] ... }) >>> df - birth famid ht1 ht2 + famid birth ht1 ht2 0 1 1 2.8 3.4 - 1 2 1 2.9 3.8 - 2 3 1 2.2 2.9 - 3 1 2 2.0 3.2 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 4 2 2 1.8 2.8 - 5 3 2 1.9 2.4 - 6 1 3 2.2 3.3 - 7 2 3 2.3 3.4 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 8 3 3 2.1 2.9 >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') >>> l @@ -323,33 +323,29 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): Less wieldy column names are also handled >>> np.random.seed(0) - >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3), - ... 'A(quarterly)-2011': np.random.rand(3), - ... 'B(quarterly)-2010': np.random.rand(3), - ... 'B(quarterly)-2011': np.random.rand(3), + >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), + ... 'A(weekly)-2011': np.random.rand(3), + ... 'B(weekly)-2010': np.random.rand(3), + ... 'B(weekly)-2011': np.random.rand(3), ... 'X' : np.random.randint(3, size=3)}) >>> df['id'] = df.index >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ... - 0 0.548814 0.544883 0.437587 ... - 1 0.715189 0.423655 0.891773 ... - 2 0.602763 0.645894 0.963663 ... - X id - 0 0 0 - 1 1 1 - 2 1 2 - - >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', + A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id + 0 0.548814 0.544883 0.437587 0.383442 0 0 + 1 0.715189 0.423655 0.891773 0.791725 1 1 + 2 0.602763 0.645894 0.963663 0.528895 1 2 + + >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', ... j='year', sep='-') ... # doctest: +NORMALIZE_WHITESPACE - X A(quarterly) B(quarterly) + X A(weekly) B(weekly) id year - 0 2010 0 0.548814 0.437587 - 1 2010 1 0.715189 0.891773 - 2 2010 1 0.602763 0.963663 - 0 2011 0 0.544883 0.383442 - 1 2011 1 0.423655 0.791725 - 2 2011 1 0.645894 0.528895 + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 If we have many columns, we could also use a regex to find our stubnames and pass that list on to wide_to_long @@ -359,7 +355,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): ... r'[A-B]\(.*\)').values if match != [] ]) ... ) >>> list(stubnames) - ['A(quarterly)', 'B(quarterly)'] + ['A(weekly)', 'B(weekly)'] All of the above examples have integers as suffixes. It is possible to have non-integers as suffixes. @@ -371,19 +367,19 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] ... }) >>> df - birth famid ht_one ht_two + famid birth ht_one ht_two 0 1 1 2.8 3.4 - 1 2 1 2.9 3.8 - 2 3 1 2.2 2.9 - 3 1 2 2.0 3.2 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 4 2 2 1.8 2.8 - 5 3 2 1.9 2.4 - 6 1 3 2.2 3.3 - 7 2 3 2.3 3.4 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 8 3 3 2.1 2.9 >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', - sep='_', suffix='\w') + ... sep='_', suffix='\w+') >>> l ... # doctest: +NORMALIZE_WHITESPACE ht