@@ -907,10 +907,13 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
907
907
in the wide format, to be stripped from the names in the long format.
908
908
For example, if your column names are A-suffix1, A-suffix2, you
909
909
can strip the hypen by specifying `sep`='-'
910
- suffix : str default '\d+'
910
+ suffix : str, default '\d+'
911
911
A regular expression capturing the wanted suffixes. '\d+' captures
912
912
numeric suffixes. Suffixes with no numbers could be specified with the
913
- negated character class '\D+'.
913
+ negated character class '\D+'. You can also further disambiguate
914
+ suffixes, for example, if your wide variables are of the form
915
+ Aone, Btwo,.., and you have an unrelated column Arating, you can
916
+ ignore the last one by specyfing `suffix`='(!?one|two)'
914
917
915
918
Returns
916
919
-------
@@ -1048,16 +1051,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
1048
1051
in a typicaly case.
1049
1052
"""
1050
1053
def get_var_names (df , stub , sep , suffix ):
1051
- # The first part of this regex is needed to avoid multiple "greedy"
1052
- # matches with stubs that have overlapping substrings. For example
1053
- # A2011, A2012 are separate from AA2011, AA2012. And BBone, BBtwo is
1054
- # different from Bone, Btwo, and BBBrating
1055
- # The last part lets us disambiguate suffixes. For example, with
1056
- # stubname A: (A2011, A2012) would be captured while Arating would
1057
- # be ignored by the numeric class \d+
1058
- regex = "^{0}(?!{1}){2}{3}" .format (
1059
- re .escape (stub ), re .escape (stub [- 1 ]), re .escape (sep ), suffix )
1060
-
1054
+ regex = "^{0}{1}{2}" .format (re .escape (stub ), re .escape (sep ), suffix )
1061
1055
return df .filter (regex = regex ).columns .tolist ()
1062
1056
1063
1057
def melt_stub (df , stub , i , j , value_vars , sep ):
0 commit comments