@@ -764,12 +764,12 @@ def test_simple(self):
764
764
exp_data = {"X" : x .tolist () + x .tolist (),
765
765
"A" : ['a' , 'b' , 'c' , 'd' , 'e' , 'f' ],
766
766
"B" : [2.5 , 1.2 , 0.7 , 3.2 , 1.3 , 0.1 ],
767
- "year" : [' 1970' , ' 1970' , ' 1970' , ' 1980' , ' 1980' , ' 1980' ],
767
+ "year" : [1970 , 1970 , 1970 , 1980 , 1980 , 1980 ],
768
768
"id" : [0 , 1 , 2 , 0 , 1 , 2 ]}
769
- exp_frame = DataFrame (exp_data )
770
- exp_frame = exp_frame .set_index (['id' , 'year' ])[["X" , "A" , "B" ]]
771
- long_frame = wide_to_long (df , ["A" , "B" ], i = "id" , j = "year" )
772
- tm .assert_frame_equal (long_frame , exp_frame )
769
+ expected = DataFrame (exp_data )
770
+ expected = expected .set_index (['id' , 'year' ])[["X" , "A" , "B" ]]
771
+ result = wide_to_long (df , ["A" , "B" ], i = "id" , j = "year" )
772
+ tm .assert_frame_equal (result , expected )
773
773
774
774
def test_stubs (self ):
775
775
# GH9204
@@ -804,12 +804,12 @@ def test_separating_character(self):
804
804
exp_data = {"X" : x .tolist () + x .tolist (),
805
805
"A" : ['a' , 'b' , 'c' , 'd' , 'e' , 'f' ],
806
806
"B" : [2.5 , 1.2 , 0.7 , 3.2 , 1.3 , 0.1 ],
807
- "year" : [' 1970' , ' 1970' , ' 1970' , ' 1980' , ' 1980' , ' 1980' ],
807
+ "year" : [1970 , 1970 , 1970 , 1980 , 1980 , 1980 ],
808
808
"id" : [0 , 1 , 2 , 0 , 1 , 2 ]}
809
- exp_frame = DataFrame (exp_data )
810
- exp_frame = exp_frame .set_index (['id' , 'year' ])[["X" , "A" , "B" ]]
811
- long_frame = wide_to_long (df , ["A" , "B" ], i = "id" , j = "year" , sep = "." )
812
- tm .assert_frame_equal (long_frame , exp_frame )
809
+ expected = DataFrame (exp_data )
810
+ expected = expected .set_index (['id' , 'year' ])[["X" , "A" , "B" ]]
811
+ result = wide_to_long (df , ["A" , "B" ], i = "id" , j = "year" , sep = "." )
812
+ tm .assert_frame_equal (result , expected )
813
813
814
814
def test_escapable_characters (self ):
815
815
np .random .seed (123 )
@@ -832,14 +832,14 @@ def test_escapable_characters(self):
832
832
exp_data = {"X" : x .tolist () + x .tolist (),
833
833
"A(quarterly)" : ['a' , 'b' , 'c' , 'd' , 'e' , 'f' ],
834
834
"B(quarterly)" : [2.5 , 1.2 , 0.7 , 3.2 , 1.3 , 0.1 ],
835
- "year" : [' 1970' , ' 1970' , ' 1970' , ' 1980' , ' 1980' , ' 1980' ],
835
+ "year" : [1970 , 1970 , 1970 , 1980 , 1980 , 1980 ],
836
836
"id" : [0 , 1 , 2 , 0 , 1 , 2 ]}
837
- exp_frame = DataFrame (exp_data )
838
- exp_frame = exp_frame .set_index (
837
+ expected = DataFrame (exp_data )
838
+ expected = expected .set_index (
839
839
['id' , 'year' ])[["X" , "A(quarterly)" , "B(quarterly)" ]]
840
- long_frame = wide_to_long (df , ["A(quarterly)" , "B(quarterly)" ],
841
- i = "id" , j = "year" )
842
- tm .assert_frame_equal (long_frame , exp_frame )
840
+ result = wide_to_long (df , ["A(quarterly)" , "B(quarterly)" ],
841
+ i = "id" , j = "year" )
842
+ tm .assert_frame_equal (result , expected )
843
843
844
844
def test_unbalanced (self ):
845
845
# test that we can have a varying amount of time variables
@@ -852,11 +852,11 @@ def test_unbalanced(self):
852
852
'A' : [1.0 , 3.0 , 2.0 , 4.0 ],
853
853
'B' : [5.0 , np .nan , 6.0 , np .nan ],
854
854
'id' : [0 , 0 , 1 , 1 ],
855
- 'year' : [' 2010' , ' 2011' , ' 2010' , ' 2011' ]}
856
- exp_frame = pd .DataFrame (exp_data )
857
- exp_frame = exp_frame .set_index (['id' , 'year' ])[["X" , "A" , "B" ]]
858
- long_frame = wide_to_long (df , ['A' , 'B' ], i = 'id' , j = 'year' )
859
- tm .assert_frame_equal (long_frame , exp_frame )
855
+ 'year' : [2010 , 2011 , 2010 , 2011 ]}
856
+ expected = pd .DataFrame (exp_data )
857
+ expected = expected .set_index (['id' , 'year' ])[["X" , "A" , "B" ]]
858
+ result = wide_to_long (df , ['A' , 'B' ], i = 'id' , j = 'year' )
859
+ tm .assert_frame_equal (result , expected )
860
860
861
861
def test_character_overlap (self ):
862
862
# Test we handle overlapping characters in both id_vars and value_vars
@@ -871,19 +871,19 @@ def test_character_overlap(self):
871
871
'BBBZ' : [91 , 92 , 93 ]
872
872
})
873
873
df ['id' ] = df .index
874
- exp_frame = pd .DataFrame ({
874
+ expected = pd .DataFrame ({
875
875
'BBBX' : [91 , 92 , 93 , 91 , 92 , 93 ],
876
876
'BBBZ' : [91 , 92 , 93 , 91 , 92 , 93 ],
877
877
'A' : ['a11' , 'a22' , 'a33' , 'a21' , 'a22' , 'a23' ],
878
878
'B' : ['b11' , 'b12' , 'b13' , 'b21' , 'b22' , 'b23' ],
879
879
'BB' : [1 , 2 , 3 , 4 , 5 , 6 ],
880
880
'id' : [0 , 1 , 2 , 0 , 1 , 2 ],
881
- 'year' : ['11' , '11' , '11' , '12' , '12' , '12' ]})
882
- exp_frame = exp_frame .set_index (['id' , 'year' ])[
881
+ 'year' : [11 , 11 , 11 , 12 , 12 , 12 ]})
882
+ expected = expected .set_index (['id' , 'year' ])[
883
883
['BBBX' , 'BBBZ' , 'A' , 'B' , 'BB' ]]
884
- long_frame = wide_to_long (df , ['A' , 'B' , 'BB' ], i = 'id' , j = 'year' )
885
- tm .assert_frame_equal (long_frame .sort_index (axis = 1 ),
886
- exp_frame .sort_index (axis = 1 ))
884
+ result = wide_to_long (df , ['A' , 'B' , 'BB' ], i = 'id' , j = 'year' )
885
+ tm .assert_frame_equal (result .sort_index (axis = 1 ),
886
+ expected .sort_index (axis = 1 ))
887
887
888
888
def test_invalid_separator (self ):
889
889
# if an invalid separator is supplied a empty data frame is returned
@@ -901,13 +901,13 @@ def test_invalid_separator(self):
901
901
'year' : [],
902
902
'A' : [],
903
903
'B' : []}
904
- exp_frame = pd .DataFrame (exp_data )
905
- exp_frame = exp_frame .set_index (['id' , 'year' ])[[
904
+ expected = pd .DataFrame (exp_data ). astype ({ 'year' : 'int' } )
905
+ expected = expected .set_index (['id' , 'year' ])[[
906
906
'X' , 'A2010' , 'A2011' , 'B2010' , 'A' , 'B' ]]
907
- exp_frame .index .set_levels ([[ 0 , 1 ], []] , inplace = True )
908
- long_frame = wide_to_long (df , ['A' , 'B' ], i = 'id' , j = 'year' , sep = sep )
909
- tm .assert_frame_equal (long_frame .sort_index (axis = 1 ),
910
- exp_frame .sort_index (axis = 1 ))
907
+ expected .index .set_levels ([0 , 1 ], level = 0 , inplace = True )
908
+ result = wide_to_long (df , ['A' , 'B' ], i = 'id' , j = 'year' , sep = sep )
909
+ tm .assert_frame_equal (result .sort_index (axis = 1 ),
910
+ expected .sort_index (axis = 1 ))
911
911
912
912
def test_num_string_disambiguation (self ):
913
913
# Test that we can disambiguate number value_vars from
@@ -923,19 +923,19 @@ def test_num_string_disambiguation(self):
923
923
'Arating_old' : [91 , 92 , 93 ]
924
924
})
925
925
df ['id' ] = df .index
926
- exp_frame = pd .DataFrame ({
926
+ expected = pd .DataFrame ({
927
927
'Arating' : [91 , 92 , 93 , 91 , 92 , 93 ],
928
928
'Arating_old' : [91 , 92 , 93 , 91 , 92 , 93 ],
929
929
'A' : ['a11' , 'a22' , 'a33' , 'a21' , 'a22' , 'a23' ],
930
930
'B' : ['b11' , 'b12' , 'b13' , 'b21' , 'b22' , 'b23' ],
931
931
'BB' : [1 , 2 , 3 , 4 , 5 , 6 ],
932
932
'id' : [0 , 1 , 2 , 0 , 1 , 2 ],
933
- 'year' : ['11' , '11' , '11' , '12' , '12' , '12' ]})
934
- exp_frame = exp_frame .set_index (['id' , 'year' ])[
933
+ 'year' : [11 , 11 , 11 , 12 , 12 , 12 ]})
934
+ expected = expected .set_index (['id' , 'year' ])[
935
935
['Arating' , 'Arating_old' , 'A' , 'B' , 'BB' ]]
936
- long_frame = wide_to_long (df , ['A' , 'B' , 'BB' ], i = 'id' , j = 'year' )
937
- tm .assert_frame_equal (long_frame .sort_index (axis = 1 ),
938
- exp_frame .sort_index (axis = 1 ))
936
+ result = wide_to_long (df , ['A' , 'B' , 'BB' ], i = 'id' , j = 'year' )
937
+ tm .assert_frame_equal (result .sort_index (axis = 1 ),
938
+ expected .sort_index (axis = 1 ))
939
939
940
940
def test_invalid_suffixtype (self ):
941
941
# If all stubs names end with a string, but a numeric suffix is
@@ -953,13 +953,13 @@ def test_invalid_suffixtype(self):
953
953
'year' : [],
954
954
'A' : [],
955
955
'B' : []}
956
- exp_frame = pd .DataFrame (exp_data )
957
- exp_frame = exp_frame . set_index ([ 'id' , 'year' ])[[
958
- 'X ' , 'Aone' , 'Atwo' , 'Bone' , 'A' , 'B' ]]
959
- exp_frame .index .set_levels ([[ 0 , 1 ], []] , inplace = True )
960
- long_frame = wide_to_long (df , ['A' , 'B' ], i = 'id' , j = 'year' )
961
- tm .assert_frame_equal (long_frame .sort_index (axis = 1 ),
962
- exp_frame .sort_index (axis = 1 ))
956
+ expected = pd .DataFrame (exp_data ). astype ({ 'year' : 'int' } )
957
+
958
+ expected = expected . set_index ([ 'id ' , 'year' ])
959
+ expected .index .set_levels ([0 , 1 ], level = 0 , inplace = True )
960
+ result = wide_to_long (df , ['A' , 'B' ], i = 'id' , j = 'year' )
961
+ tm .assert_frame_equal (result .sort_index (axis = 1 ),
962
+ expected .sort_index (axis = 1 ))
963
963
964
964
def test_multiple_id_columns (self ):
965
965
# Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
@@ -969,17 +969,17 @@ def test_multiple_id_columns(self):
969
969
'ht1' : [2.8 , 2.9 , 2.2 , 2 , 1.8 , 1.9 , 2.2 , 2.3 , 2.1 ],
970
970
'ht2' : [3.4 , 3.8 , 2.9 , 3.2 , 2.8 , 2.4 , 3.3 , 3.4 , 2.9 ]
971
971
})
972
- exp_frame = pd .DataFrame ({
972
+ expected = pd .DataFrame ({
973
973
'ht' : [2.8 , 3.4 , 2.9 , 3.8 , 2.2 , 2.9 , 2.0 , 3.2 , 1.8 ,
974
974
2.8 , 1.9 , 2.4 , 2.2 , 3.3 , 2.3 , 3.4 , 2.1 , 2.9 ],
975
975
'famid' : [1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3 , 3 ],
976
976
'birth' : [1 , 1 , 2 , 2 , 3 , 3 , 1 , 1 , 2 , 2 , 3 , 3 , 1 , 1 , 2 , 2 , 3 , 3 ],
977
- 'age' : ['1' , '2' , '1' , '2' , '1' , '2' , '1' , '2' , '1' ,
978
- '2' , '1' , '2' , '1' , '2' , '1' , '2' , '1' , '2' ]
977
+ 'age' : [1 , 2 , 1 , 2 , 1 , 2 , 1 , 2 , 1 ,
978
+ 2 , 1 , 2 , 1 , 2 , 1 , 2 , 1 , 2 ]
979
979
})
980
- exp_frame = exp_frame .set_index (['famid' , 'birth' , 'age' ])[['ht' ]]
981
- long_frame = wide_to_long (df , 'ht' , i = ['famid' , 'birth' ], j = 'age' )
982
- tm .assert_frame_equal (long_frame , exp_frame )
980
+ expected = expected .set_index (['famid' , 'birth' , 'age' ])[['ht' ]]
981
+ result = wide_to_long (df , 'ht' , i = ['famid' , 'birth' ], j = 'age' )
982
+ tm .assert_frame_equal (result , expected )
983
983
984
984
def test_non_unique_idvars (self ):
985
985
# GH16382
@@ -991,3 +991,87 @@ def test_non_unique_idvars(self):
991
991
})
992
992
with pytest .raises (ValueError ):
993
993
wide_to_long (df , ['A_A' , 'B_B' ], i = 'x' , j = 'colname' )
994
+
995
+ def test_cast_j_int (self ):
996
+ df = pd .DataFrame ({
997
+ 'actor_1' : ['CCH Pounder' , 'Johnny Depp' , 'Christoph Waltz' ],
998
+ 'actor_2' : ['Joel David Moore' , 'Orlando Bloom' , 'Rory Kinnear' ],
999
+ 'actor_fb_likes_1' : [1000.0 , 40000.0 , 11000.0 ],
1000
+ 'actor_fb_likes_2' : [936.0 , 5000.0 , 393.0 ],
1001
+ 'title' : ['Avatar' , "Pirates of the Caribbean" , 'Spectre' ]})
1002
+
1003
+ expected = pd .DataFrame ({
1004
+ 'actor' : ['CCH Pounder' ,
1005
+ 'Johnny Depp' ,
1006
+ 'Christoph Waltz' ,
1007
+ 'Joel David Moore' ,
1008
+ 'Orlando Bloom' ,
1009
+ 'Rory Kinnear' ],
1010
+ 'actor_fb_likes' : [1000.0 , 40000.0 , 11000.0 , 936.0 , 5000.0 , 393.0 ],
1011
+ 'num' : [1 , 1 , 1 , 2 , 2 , 2 ],
1012
+ 'title' : ['Avatar' ,
1013
+ 'Pirates of the Caribbean' ,
1014
+ 'Spectre' ,
1015
+ 'Avatar' ,
1016
+ 'Pirates of the Caribbean' ,
1017
+ 'Spectre' ]}).set_index (['title' , 'num' ])
1018
+ result = wide_to_long (df , ['actor' , 'actor_fb_likes' ],
1019
+ i = 'title' , j = 'num' , sep = '_' )
1020
+
1021
+ tm .assert_frame_equal (result , expected )
1022
+
1023
+ def test_identical_stubnames (self ):
1024
+ df = pd .DataFrame ({'A2010' : [1.0 , 2.0 ],
1025
+ 'A2011' : [3.0 , 4.0 ],
1026
+ 'B2010' : [5.0 , 6.0 ],
1027
+ 'A' : ['X1' , 'X2' ]})
1028
+ with pytest .raises (ValueError ):
1029
+ wide_to_long (df , ['A' , 'B' ], i = 'A' , j = 'colname' )
1030
+
1031
+ def test_nonnumeric_suffix (self ):
1032
+ df = pd .DataFrame ({'treatment_placebo' : [1.0 , 2.0 ],
1033
+ 'treatment_test' : [3.0 , 4.0 ],
1034
+ 'result_placebo' : [5.0 , 6.0 ],
1035
+ 'A' : ['X1' , 'X2' ]})
1036
+ expected = pd .DataFrame ({
1037
+ 'A' : ['X1' , 'X1' , 'X2' , 'X2' ],
1038
+ 'colname' : ['placebo' , 'test' , 'placebo' , 'test' ],
1039
+ 'result' : [5.0 , np .nan , 6.0 , np .nan ],
1040
+ 'treatment' : [1.0 , 3.0 , 2.0 , 4.0 ]})
1041
+ expected = expected .set_index (['A' , 'colname' ])
1042
+ result = wide_to_long (df , ['result' , 'treatment' ],
1043
+ i = 'A' , j = 'colname' , suffix = '[a-z]+' , sep = '_' )
1044
+ tm .assert_frame_equal (result , expected )
1045
+
1046
+ def test_mixed_type_suffix (self ):
1047
+ df = pd .DataFrame ({
1048
+ 'treatment_1' : [1.0 , 2.0 ],
1049
+ 'treatment_foo' : [3.0 , 4.0 ],
1050
+ 'result_foo' : [5.0 , 6.0 ],
1051
+ 'result_1' : [0 , 9 ],
1052
+ 'A' : ['X1' , 'X2' ]})
1053
+ expected = pd .DataFrame ({
1054
+ 'A' : ['X1' , 'X2' , 'X1' , 'X2' ],
1055
+ 'colname' : ['1' , '1' , 'foo' , 'foo' ],
1056
+ 'result' : [0.0 , 9.0 , 5.0 , 6.0 ],
1057
+ 'treatment' : [1.0 , 2.0 , 3.0 , 4.0 ]}).set_index (['A' , 'colname' ])
1058
+ result = wide_to_long (df , ['result' , 'treatment' ],
1059
+ i = 'A' , j = 'colname' , suffix = '.+' , sep = '_' )
1060
+ tm .assert_frame_equal (result , expected )
1061
+
1062
+ def test_float_suffix (self ):
1063
+ df = pd .DataFrame ({
1064
+ 'treatment_1.1' : [1.0 , 2.0 ],
1065
+ 'treatment_2.1' : [3.0 , 4.0 ],
1066
+ 'result_1.2' : [5.0 , 6.0 ],
1067
+ 'result_1' : [0 , 9 ],
1068
+ 'A' : ['X1' , 'X2' ]})
1069
+ expected = pd .DataFrame ({
1070
+ 'A' : ['X1' , 'X1' , 'X1' , 'X1' , 'X2' , 'X2' , 'X2' , 'X2' ],
1071
+ 'colname' : [1 , 1.1 , 1.2 , 2.1 , 1 , 1.1 , 1.2 , 2.1 ],
1072
+ 'result' : [0.0 , np .nan , 5.0 , np .nan , 9.0 , np .nan , 6.0 , np .nan ],
1073
+ 'treatment' : [np .nan , 1.0 , np .nan , 3.0 , np .nan , 2.0 , np .nan , 4.0 ]})
1074
+ expected = expected .set_index (['A' , 'colname' ])
1075
+ result = wide_to_long (df , ['result' , 'treatment' ],
1076
+ i = 'A' , j = 'colname' , suffix = '[0-9.]+' , sep = '_' )
1077
+ tm .assert_frame_equal (result , expected )
0 commit comments