Skip to content

Commit a79a515

Browse files
committed
BUG: Fix limited key range on 32-bit platofrms
Fix use of 64-bit integers as keys in general string objects (GSO) by wrapping in strings when used as dictionary keys
1 parent 2f02697 commit a79a515

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

pandas/io/stata.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ class StataParser(object):
824824
def __init__(self, encoding='latin-1'):
825825

826826
if encoding not in VALID_ENCODINGS:
827-
raise ValueError('Unknown encoding. Only latin-1 and ascii '
827+
raise ValueError('Unknown encoding. Only latin-1 and ascii '
828828
'supported.')
829829

830830
self._encoding = encoding
@@ -1373,7 +1373,8 @@ def _read_value_labels(self):
13731373

13741374
def _read_strls(self):
13751375
self.path_or_buf.seek(self.seek_strls)
1376-
self.GSO = {0: ''}
1376+
# Wrap v_o in a string to allow uint64 values as keys on 32bit OS
1377+
self.GSO = {'0': ''}
13771378
while True:
13781379
if self.path_or_buf.read(3) != b'GSO':
13791380
break
@@ -1398,7 +1399,8 @@ def _read_strls(self):
13981399
if self.format_version == 117:
13991400
encoding = self._encoding or self._default_encoding
14001401
va = va[0:-1].decode(encoding)
1401-
self.GSO[v_o] = va
1402+
# Wrap v_o in a string to allow uint64 values as keys on 32bit OS
1403+
self.GSO[str(v_o)] = va
14021404

14031405
# legacy
14041406
@Appender('DEPRECATED: ' + _data_method_doc)
@@ -1634,7 +1636,8 @@ def _insert_strls(self, data):
16341636
for i, typ in enumerate(self.typlist):
16351637
if typ != 'Q':
16361638
continue
1637-
data.iloc[:, i] = [self.GSO[k] for k in data.iloc[:, i]]
1639+
# Wrap v_o in a string to allow uint64 values as keys on 32bit OS
1640+
data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]]
16381641
return data
16391642

16401643
def _do_select_columns(self, data, columns):

pandas/tests/io/test_stata.py

+1
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,7 @@ def test_out_of_range_float(self):
12771277
tm.assertTrue('ColumnTooBig' in cm.exception)
12781278
tm.assertTrue('infinity' in cm.exception)
12791279

1280+
# GH15723, validate encoding
12801281
def test_invalid_encoding(self):
12811282
original = self.read_csv(self.csv3)
12821283
with tm.assertRaises(ValueError):

0 commit comments

Comments
 (0)