@@ -1092,3 +1092,38 @@ def test_forced_encodings_with_df(library):
1092
1092
# Should force everything to be unicode now.
1093
1093
assert all ([type (x ) == unicode for x in df_forced_unicode .columns ])
1094
1094
assert all ([type (x ) == unicode for x in df_forced_unicode .index ])
1095
+
1096
+
1097
+ def test_forced_encodings_with_df_py3 (library ):
1098
+ sample_data = {'str_col' : [b'a' , b'b' ], 'unicode_col' : [u'a' , u'b' ], 'int_col' : [1 , 2 ]}
1099
+ unicode_type = unicode if six .PY2 else str
1100
+ # This is for testing reading in py3 with bytes index.
1101
+ if six .PY2 :
1102
+ assert True
1103
+ return
1104
+
1105
+ # ===================BEFORE===================
1106
+ df = pd .DataFrame (sample_data , index = [b'str_type' , b'uni_type' ])
1107
+ assert type (df ['str_col' ][0 ]) == bytes
1108
+ assert type (df ['unicode_col' ][0 ]) == unicode_type
1109
+ # Check that all column names are stored as as is by pandas
1110
+ assert all ([type (x ) == unicode_type for x in df .columns ])
1111
+ assert all ([type (x ) == bytes for x in df .index ])
1112
+
1113
+ library .write ('dummy' , df )
1114
+
1115
+ # ===================READ BACK WITHOUT FORCED ENCODING===================
1116
+ df_normal = library .read ('dummy' ).data
1117
+ assert type (df_normal ['str_col' ][0 ]) == bytes
1118
+ assert type (df_normal ['unicode_col' ][0 ]) == unicode_type
1119
+ # Arctic currently converts all column to unicode_type and will keep index type as is
1120
+ assert all ([type (x ) == unicode_type for x in df_normal .columns ])
1121
+ assert all ([type (x ) == bytes for x in df_normal .index ])
1122
+
1123
+ # ===================READ BACK WITH FORCED ENCODING===================
1124
+ df_forced_unicode = library .read ('dummy' , force_bytes_to_unicode = True ).data
1125
+ assert type (df_forced_unicode ['str_col' ][0 ]) == unicode_type
1126
+ assert type (df_forced_unicode ['unicode_col' ][0 ]) == unicode_type
1127
+ # Should force everything to be unicode_type now.
1128
+ assert all ([type (x ) == unicode_type for x in df_forced_unicode .columns ])
1129
+ assert all ([type (x ) == unicode_type for x in df_forced_unicode .index ])
0 commit comments