@@ -506,7 +506,8 @@ def test_invalid_timestamp(self, version):
506
506
original = DataFrame ([(1 ,)], columns = ['variable' ])
507
507
time_stamp = '01 Jan 2000, 00:00:00'
508
508
with tm .ensure_clean () as path :
509
- with pytest .raises (ValueError ):
509
+ msg = "time_stamp should be datetime type"
510
+ with pytest .raises (ValueError , match = msg ):
510
511
original .to_stata (path , time_stamp = time_stamp ,
511
512
version = version )
512
513
@@ -547,8 +548,8 @@ def test_no_index(self):
547
548
with tm .ensure_clean () as path :
548
549
original .to_stata (path , write_index = False )
549
550
written_and_read_again = self .read_dta (path )
550
- pytest .raises (
551
- KeyError , lambda : written_and_read_again ['index_not_written' ])
551
+ with pytest .raises (KeyError , match = original . index . name ):
552
+ written_and_read_again ['index_not_written' ]
552
553
553
554
def test_string_no_dates (self ):
554
555
s1 = Series (['a' , 'A longer string' ])
@@ -713,7 +714,11 @@ def test_excessively_long_string(self):
713
714
s ['s' + str (str_len )] = Series (['a' * str_len ,
714
715
'b' * str_len , 'c' * str_len ])
715
716
original = DataFrame (s )
716
- with pytest .raises (ValueError ):
717
+ msg = (r"Fixed width strings in Stata \.dta files are limited to 244"
718
+ r" \(or fewer\)\ncharacters\. Column 's500' does not satisfy"
719
+ r" this restriction\. Use the\n'version=117' parameter to write"
720
+ r" the newer \(Stata 13 and later\) format\." )
721
+ with pytest .raises (ValueError , match = msg ):
717
722
with tm .ensure_clean () as path :
718
723
original .to_stata (path )
719
724
@@ -864,11 +869,14 @@ def test_drop_column(self):
864
869
columns = columns )
865
870
tm .assert_frame_equal (expected , reordered )
866
871
867
- with pytest .raises (ValueError ):
872
+ msg = "columns contains duplicate entries"
873
+ with pytest .raises (ValueError , match = msg ):
868
874
columns = ['byte_' , 'byte_' ]
869
875
read_stata (self .dta15_117 , convert_dates = True , columns = columns )
870
876
871
- with pytest .raises (ValueError ):
877
+ msg = ("The following columns were not found in the Stata data set:"
878
+ " not_found" )
879
+ with pytest .raises (ValueError , match = msg ):
872
880
columns = ['byte_' , 'int_' , 'long_' , 'not_found' ]
873
881
read_stata (self .dta15_117 , convert_dates = True , columns = columns )
874
882
@@ -924,7 +932,10 @@ def test_categorical_warnings_and_errors(self):
924
932
original = pd .concat ([original [col ].astype ('category' )
925
933
for col in original ], axis = 1 )
926
934
with tm .ensure_clean () as path :
927
- pytest .raises (ValueError , original .to_stata , path )
935
+ msg = ("Stata value labels for a single variable must have"
936
+ r" a combined length less than 32,000 characters\." )
937
+ with pytest .raises (ValueError , match = msg ):
938
+ original .to_stata (path )
928
939
929
940
original = pd .DataFrame .from_records (
930
941
[['a' ],
@@ -1196,14 +1207,17 @@ def test_invalid_variable_labels(self, version):
1196
1207
'b' : 'City Exponent' ,
1197
1208
'c' : 'City' }
1198
1209
with tm .ensure_clean () as path :
1199
- with pytest .raises (ValueError ):
1210
+ msg = "Variable labels must be 80 characters or fewer"
1211
+ with pytest .raises (ValueError , match = msg ):
1200
1212
original .to_stata (path ,
1201
1213
variable_labels = variable_labels ,
1202
1214
version = version )
1203
1215
1204
1216
variable_labels ['a' ] = u'invalid character Œ'
1205
1217
with tm .ensure_clean () as path :
1206
- with pytest .raises (ValueError ):
1218
+ msg = ("Variable labels must contain only characters that can be"
1219
+ " encoded in Latin-1" )
1220
+ with pytest .raises (ValueError , match = msg ):
1207
1221
original .to_stata (path ,
1208
1222
variable_labels = variable_labels ,
1209
1223
version = version )
@@ -1221,7 +1235,9 @@ def test_write_variable_label_errors(self):
1221
1235
'b' : 'City Exponent' ,
1222
1236
'c' : u'' .join (values )}
1223
1237
1224
- with pytest .raises (ValueError ):
1238
+ msg = ("Variable labels must contain only characters that can be"
1239
+ " encoded in Latin-1" )
1240
+ with pytest .raises (ValueError , match = msg ):
1225
1241
with tm .ensure_clean () as path :
1226
1242
original .to_stata (path , variable_labels = variable_labels_utf8 )
1227
1243
@@ -1231,7 +1247,8 @@ def test_write_variable_label_errors(self):
1231
1247
'that is too long for Stata which means '
1232
1248
'that it has more than 80 characters' }
1233
1249
1234
- with pytest .raises (ValueError ):
1250
+ msg = "Variable labels must be 80 characters or fewer"
1251
+ with pytest .raises (ValueError , match = msg ):
1235
1252
with tm .ensure_clean () as path :
1236
1253
original .to_stata (path , variable_labels = variable_labels_long )
1237
1254
@@ -1265,7 +1282,8 @@ def test_default_date_conversion(self):
1265
1282
def test_unsupported_type (self ):
1266
1283
original = pd .DataFrame ({'a' : [1 + 2j , 2 + 4j ]})
1267
1284
1268
- with pytest .raises (NotImplementedError ):
1285
+ msg = "Data type complex128 not supported"
1286
+ with pytest .raises (NotImplementedError , match = msg ):
1269
1287
with tm .ensure_clean () as path :
1270
1288
original .to_stata (path )
1271
1289
@@ -1277,7 +1295,8 @@ def test_unsupported_datetype(self):
1277
1295
'strs' : ['apple' , 'banana' , 'cherry' ],
1278
1296
'dates' : dates })
1279
1297
1280
- with pytest .raises (NotImplementedError ):
1298
+ msg = "Format %tC not implemented"
1299
+ with pytest .raises (NotImplementedError , match = msg ):
1281
1300
with tm .ensure_clean () as path :
1282
1301
original .to_stata (path , convert_dates = {'dates' : 'tC' })
1283
1302
@@ -1291,9 +1310,10 @@ def test_unsupported_datetype(self):
1291
1310
1292
1311
def test_repeated_column_labels (self ):
1293
1312
# GH 13923
1294
- with pytest .raises (ValueError ) as cm :
1313
+ msg = (r"Value labels for column ethnicsn are not unique\. The"
1314
+ r" repeated labels are:\n\n-+wolof" )
1315
+ with pytest .raises (ValueError , match = msg ):
1295
1316
read_stata (self .dta23 , convert_categoricals = True )
1296
- assert 'wolof' in cm .exception
1297
1317
1298
1318
def test_stata_111 (self ):
1299
1319
# 111 is an old version but still used by current versions of
@@ -1316,17 +1336,18 @@ def test_out_of_range_double(self):
1316
1336
'ColumnTooBig' : [0.0 ,
1317
1337
np .finfo (np .double ).eps ,
1318
1338
np .finfo (np .double ).max ]})
1319
- with pytest .raises (ValueError ) as cm :
1339
+ msg = (r"Column ColumnTooBig has a maximum value \(.+\)"
1340
+ r" outside the range supported by Stata \(.+\)" )
1341
+ with pytest .raises (ValueError , match = msg ):
1320
1342
with tm .ensure_clean () as path :
1321
1343
df .to_stata (path )
1322
- assert 'ColumnTooBig' in cm .exception
1323
1344
1324
1345
df .loc [2 , 'ColumnTooBig' ] = np .inf
1325
- with pytest .raises (ValueError ) as cm :
1346
+ msg = ("Column ColumnTooBig has a maximum value of infinity which"
1347
+ " is outside the range supported by Stata" )
1348
+ with pytest .raises (ValueError , match = msg ):
1326
1349
with tm .ensure_clean () as path :
1327
1350
df .to_stata (path )
1328
- assert 'ColumnTooBig' in cm .exception
1329
- assert 'infinity' in cm .exception
1330
1351
1331
1352
def test_out_of_range_float (self ):
1332
1353
original = DataFrame ({'ColumnOk' : [0.0 ,
@@ -1348,11 +1369,11 @@ def test_out_of_range_float(self):
1348
1369
reread .set_index ('index' ))
1349
1370
1350
1371
original .loc [2 , 'ColumnTooBig' ] = np .inf
1351
- with pytest .raises (ValueError ) as cm :
1372
+ msg = ("Column ColumnTooBig has a maximum value of infinity which"
1373
+ " is outside the range supported by Stata" )
1374
+ with pytest .raises (ValueError , match = msg ):
1352
1375
with tm .ensure_clean () as path :
1353
1376
original .to_stata (path )
1354
- assert 'ColumnTooBig' in cm .exception
1355
- assert 'infinity' in cm .exception
1356
1377
1357
1378
def test_path_pathlib (self ):
1358
1379
df = tm .makeDataFrame ()
@@ -1466,7 +1487,8 @@ def test_invalid_date_conversion(self):
1466
1487
'dates' : dates })
1467
1488
1468
1489
with tm .ensure_clean () as path :
1469
- with pytest .raises (ValueError ):
1490
+ msg = "convert_dates key must be a column or an integer"
1491
+ with pytest .raises (ValueError , match = msg ):
1470
1492
original .to_stata (path ,
1471
1493
convert_dates = {'wrong_name' : 'tc' })
1472
1494
@@ -1546,18 +1568,27 @@ def test_all_none_exception(self, version):
1546
1568
output = pd .DataFrame (output )
1547
1569
output .loc [:, 'none' ] = None
1548
1570
with tm .ensure_clean () as path :
1549
- with pytest .raises (ValueError ) as excinfo :
1571
+ msg = (r"Column `none` cannot be exported\.\n\n"
1572
+ "Only string-like object arrays containing all strings or a"
1573
+ r" mix of strings and None can be exported\. Object arrays"
1574
+ r" containing only null values are prohibited\. Other"
1575
+ " object typescannot be exported and must first be"
1576
+ r" converted to one of the supported types\." )
1577
+ with pytest .raises (ValueError , match = msg ):
1550
1578
output .to_stata (path , version = version )
1551
- assert 'Only string-like' in excinfo .value .args [0 ]
1552
- assert 'Column `none`' in excinfo .value .args [0 ]
1553
1579
1554
1580
@pytest .mark .parametrize ('version' , [114 , 117 ])
1555
1581
def test_invalid_file_not_written (self , version ):
1556
1582
content = 'Here is one __�__ Another one __·__ Another one __½__'
1557
1583
df = DataFrame ([content ], columns = ['invalid' ])
1558
1584
expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError
1559
1585
with tm .ensure_clean () as path :
1560
- with pytest .raises (expected_exc ):
1586
+ msg1 = (r"'latin-1' codec can't encode character '\\ufffd'"
1587
+ r" in position 14: ordinal not in range\(256\)" )
1588
+ msg2 = ("'ascii' codec can't decode byte 0xef in position 14:"
1589
+ r" ordinal not in range\(128\)" )
1590
+ with pytest .raises (expected_exc , match = r'{}|{}' .format (
1591
+ msg1 , msg2 )):
1561
1592
with tm .assert_produces_warning (ResourceWarning ):
1562
1593
df .to_stata (path )
1563
1594
0 commit comments