@@ -740,6 +740,89 @@ def test_to_csv_withcommas(self):
740
740
df2 = self .read_csv (path )
741
741
tm .assert_frame_equal (df2 , df )
742
742
743
+ def test_to_csv_bytes (self ):
744
+ # GH 9712
745
+ times = date_range ("2013-10-27 23:00" , "2013-10-28 00:00" , freq = "H" )
746
+ df = DataFrame ({b"foo" : [b"bar" , b"baz" ], b"times" : times }, index = [b"A" , b"B" ])
747
+ df .loc [b"C" ] = np .nan
748
+ df .index .name = b"idx"
749
+
750
+ df_expected = DataFrame (
751
+ {"foo" : ["bar" , "baz" ], "times" : times }, index = ["A" , "B" ]
752
+ )
753
+ df_expected .loc ["C" ] = np .nan
754
+ df_expected .index .name = "idx"
755
+
756
+ with tm .ensure_clean ("__tmp_to_csv_bytes__.csv" ) as path :
757
+ df .to_csv (path , header = True )
758
+ df_output = self .read_csv (path )
759
+ df_output .times = to_datetime (df_output .times )
760
+ tm .assert_frame_equal (df_output , df_expected )
761
+
762
+ non_unicode_byte = b"\xbc \xa6 "
763
+ non_unicode_decoded = non_unicode_byte .decode ("gb18030" )
764
+ df = DataFrame ({non_unicode_byte : [non_unicode_byte , b"foo" ]})
765
+ df .index .name = "idx"
766
+
767
+ df_expected = DataFrame ({non_unicode_decoded : [non_unicode_decoded , "foo" ]})
768
+ df_expected .index .name = "idx"
769
+
770
+ with tm .ensure_clean ("__tmp_to_csv_bytes__.csv" ) as path :
771
+ df .to_csv (path , encoding = "gb18030" , header = True )
772
+ df_output = self .read_csv (path , encoding = "gb18030" )
773
+ tm .assert_frame_equal (df_output , df_expected )
774
+
775
+ # decoding error, when transcoding fails
776
+ with pytest .raises (UnicodeDecodeError ):
777
+ df .to_csv (encoding = "utf-8" )
778
+
779
+ # mixing of bytes and non-bytes
780
+ df = DataFrame ({"foo" : [b"bar" , "baz" ]})
781
+ with pytest .raises (ValueError ):
782
+ df .to_csv ()
783
+ df = DataFrame ({b"foo" : ["a" , "b" ], "bar" : ["c" , "d" ]})
784
+ with pytest .raises (ValueError ):
785
+ df .to_csv ()
786
+ df = DataFrame ({"foo" : ["a" , "b" ], "bar" : ["c" , "d" ]}, index = ["A" , b"B" ])
787
+ with pytest .raises (ValueError ):
788
+ df .to_csv ()
789
+
790
+ # multi-indexes
791
+ iterables = [[b"A" , b"B" ], ["C" , "D" ]]
792
+ index = pd .MultiIndex .from_product (iterables , names = [b"f" , b"s" ])
793
+ data = np .array ([[0 , 0 ], [0 , 0 ], [0 , 0 ], [0 , 0 ]])
794
+ df = pd .DataFrame (data , index = index )
795
+
796
+ with tm .ensure_clean ("__tmp_to_csv_bytes__.csv" ) as path :
797
+ df .to_csv (path )
798
+ import sys
799
+
800
+ df .to_csv (sys .stdout )
801
+ with open (path ) as csvfile :
802
+ output = csvfile .readlines ()
803
+
804
+ expected = [
805
+ "f,s,0,1\n " ,
806
+ "A,C,0,0\n " ,
807
+ "A,D,0,0\n " ,
808
+ "B,C,0,0\n " ,
809
+ "B,D,0,0\n " ,
810
+ ]
811
+ assert output == expected
812
+
813
+ # mixing of bytes and non-bytes in multi-indexes
814
+ iterables = [[b"A" , "B" ], ["C" , "D" ]]
815
+ index = pd .MultiIndex .from_product (iterables )
816
+ df = pd .DataFrame (data , index = index )
817
+ with pytest .raises (ValueError ):
818
+ df .to_csv ()
819
+
820
+ iterables = [["A" , "B" ], ["C" , "D" ]]
821
+ index = pd .MultiIndex .from_product (iterables , names = [b"f" , "s" ])
822
+ df = pd .DataFrame (data , index = index )
823
+ with pytest .raises (ValueError ):
824
+ df .to_csv ()
825
+
743
826
def test_to_csv_mixed (self ):
744
827
def create_cols (name ):
745
828
return [f"{ name } { i :03d} " for i in range (5 )]
0 commit comments