@@ -1017,13 +1017,10 @@ def test_mutable_df(library):
1017
1017
assert read_s .data .__array__ ().flags ['WRITEABLE' ]
1018
1018
1019
1019
1020
+ @pytest .mark .skipif (six .PY3 , reason = "Skip for Python3" )
1020
1021
def test_forced_encodings_with_df_mixed_types (library ):
1021
1022
sample_data = {'str_col' : ['a' , 'b' ], u'unicode_col' : [u'a' , u'b' ], 'int_col' : [1 , 2 ]}
1022
1023
# This is for testing py2 bytes vs unicode serialization issues. Ignoring Py3 for now.
1023
- if six .PY3 :
1024
- assert True
1025
- return
1026
-
1027
1024
# ===================BEFORE===================
1028
1025
df = pd .DataFrame (sample_data , index = ['str_type' , u'uni_type' ])
1029
1026
assert type (df ['str_col' ][0 ]) == bytes
@@ -1060,13 +1057,10 @@ def test_forced_encodings_with_df_mixed_types(library):
1060
1057
assert all ([type (x ) == unicode for x in df_forced_unicode .index ])
1061
1058
1062
1059
1060
+ @pytest .mark .skipif (six .PY3 , reason = "Skip for Python3" )
1063
1061
def test_forced_encodings_with_df (library ):
1064
1062
sample_data = {'str_col' : ['a' , 'b' ], 'unicode_col' : [u'a' , u'b' ], 'int_col' : [1 , 2 ]}
1065
1063
# This is for testing py2 bytes vs unicode serialization issues. Ignoring Py3 for now.
1066
- if six .PY3 :
1067
- assert True
1068
- return
1069
-
1070
1064
# ===================BEFORE===================
1071
1065
df = pd .DataFrame (sample_data , index = ['str_type' , 'uni_type' ])
1072
1066
assert type (df ['str_col' ][0 ]) == bytes
@@ -1094,13 +1088,10 @@ def test_forced_encodings_with_df(library):
1094
1088
assert all ([type (x ) == unicode for x in df_forced_unicode .index ])
1095
1089
1096
1090
1091
+ @pytest .mark .skipif (six .PY2 , reason = "Skip for Python2" )
1097
1092
def test_forced_encodings_with_df_py3 (library ):
1098
1093
sample_data = {'str_col' : [b'a' , b'b' ], 'unicode_col' : [u'a' , u'b' ], 'int_col' : [1 , 2 ]}
1099
- unicode_type = unicode if six .PY2 else str
1100
- # This is for testing reading in py3 with bytes index.
1101
- if six .PY2 :
1102
- assert True
1103
- return
1094
+ unicode_type = str
1104
1095
1105
1096
# ===================BEFORE===================
1106
1097
df = pd .DataFrame (sample_data , index = [b'str_type' , b'uni_type' ])
@@ -1127,3 +1118,45 @@ def test_forced_encodings_with_df_py3(library):
1127
1118
# Should force everything to be unicode_type now.
1128
1119
assert all ([type (x ) == unicode_type for x in df_forced_unicode .columns ])
1129
1120
assert all ([type (x ) == unicode_type for x in df_forced_unicode .index ])
1121
+
1122
+
1123
+ @pytest .mark .skipif (six .PY2 , reason = "Skip for Python2" )
1124
+ def test_forced_encodings_with_df_py3_multi_index (library ):
1125
+ sample_data = {'str_col' : [b'a' , b'b' ], 'unicode_col' : [u'a' , u'b' ], 'int_col' : [1 , 2 ]}
1126
+ unicode_type = str
1127
+
1128
+ # ===================BEFORE===================
1129
+ multi_index_df = pd .DataFrame (sample_data ,
1130
+ index = pd .MultiIndex .from_tuples ([(b'ele1' , b'uni_type1' ), (b'ele2' , b'uni_type2' )]))
1131
+ assert type (multi_index_df ['str_col' ][0 ]) == bytes
1132
+ assert type (multi_index_df ['unicode_col' ][0 ]) == unicode_type
1133
+ # Check that all column names are stored as as is by pandas
1134
+ assert all ([type (x ) == unicode_type for x in multi_index_df .columns ])
1135
+ assert all ([
1136
+ type (multi_index_df .index .get_level_values (level )[0 ]) == bytes
1137
+ for level in range (len (multi_index_df .index .levels ))
1138
+ ])
1139
+
1140
+ library .write ('dummy' , multi_index_df )
1141
+
1142
+ # ===================READ BACK WITHOUT FORCED ENCODING===================
1143
+ df_normal = library .read ('dummy' ).data
1144
+ assert type (df_normal ['str_col' ][0 ]) == bytes
1145
+ assert type (df_normal ['unicode_col' ][0 ]) == unicode_type
1146
+ # Arctic currently converts all column to unicode_type and will keep index type as is
1147
+ assert all ([type (x ) == unicode_type for x in df_normal .columns ])
1148
+ assert all ([
1149
+ type (df_normal .index .get_level_values (level )[0 ]) == bytes
1150
+ for level in range (len (df_normal .index .levels ))
1151
+ ])
1152
+
1153
+ # ===================READ BACK WITH FORCED ENCODING===================
1154
+ df_forced_unicode = library .read ('dummy' , force_bytes_to_unicode = True ).data
1155
+ assert type (df_forced_unicode ['str_col' ][0 ]) == unicode_type
1156
+ assert type (df_forced_unicode ['unicode_col' ][0 ]) == unicode_type
1157
+ # Should force everything to be unicode_type now.
1158
+ assert all ([type (x ) == unicode_type for x in df_forced_unicode .columns ])
1159
+ assert all ([
1160
+ type (df_forced_unicode .index .get_level_values (level )[0 ]) == unicode_type
1161
+ for level in range (len (df_forced_unicode .index .levels ))
1162
+ ])
0 commit comments