Skip to content

Commit c1f673b

Browse files
BUG: HDFStore select for large integer (#54186)
* use decimal to prevent precision loss * added unit test * raise ValueError if given value cannot be converted to double * added bugfix in whatsnew * updated unit test * added context manager and cleaned up assert for readability * updated whatsnew * moved imports to top * reverted to float's ValueError raise * added comments for float(v)
1 parent 587176e commit c1f673b

File tree

3 files changed

+33
-3
lines changed

3 files changed

+33
-3
lines changed

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ Conversion
485485
- Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`)
486486
- Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`)
487487
- Bug in :meth:`DataFrame.insert` raising ``TypeError`` if ``loc`` is ``np.int64`` (:issue:`53193`)
488-
-
488+
- Bug in :meth:`HDFStore.select` loses precision of large int when stored and retrieved (:issue:`54186`)
489489

490490
Strings
491491
^^^^^^^

pandas/core/computation/pytables.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
from __future__ import annotations
33

44
import ast
5+
from decimal import (
6+
Decimal,
7+
InvalidOperation,
8+
)
59
from functools import partial
610
from typing import (
711
TYPE_CHECKING,
@@ -233,7 +237,14 @@ def stringify(value):
233237
result = metadata.searchsorted(v, side="left")
234238
return TermValue(result, result, "integer")
235239
elif kind == "integer":
236-
v = int(float(v))
240+
try:
241+
v_dec = Decimal(v)
242+
except InvalidOperation:
243+
# GH 54186
244+
# convert v to float to raise float's ValueError
245+
float(v)
246+
else:
247+
v = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN"))
237248
return TermValue(v, v, kind)
238249
elif kind == "float":
239250
v = float(v)

pandas/tests/io/pytables/test_select.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,7 @@ def test_query_compare_column_type(setup_path):
914914
if col == "real_date":
915915
msg = 'Given date string "a" not likely a datetime'
916916
else:
917-
msg = "could not convert string to "
917+
msg = "could not convert string to"
918918
with pytest.raises(ValueError, match=msg):
919919
store.select("test", where=query)
920920

@@ -943,3 +943,22 @@ def test_select_empty_where(tmp_path, where):
943943
store.put("df", df, "t")
944944
result = read_hdf(store, "df", where=where)
945945
tm.assert_frame_equal(result, df)
946+
947+
948+
def test_select_large_integer(tmp_path):
949+
path = tmp_path / "large_int.h5"
950+
951+
df = DataFrame(
952+
zip(
953+
["a", "b", "c", "d"],
954+
[-9223372036854775801, -9223372036854775802, -9223372036854775803, 123],
955+
),
956+
columns=["x", "y"],
957+
)
958+
result = None
959+
with HDFStore(path) as s:
960+
s.append("data", df, data_columns=True, index=False)
961+
result = s.select("data", where="y==-9223372036854775801").get("y").get(0)
962+
expected = df["y"][0]
963+
964+
assert expected == result

0 commit comments

Comments
 (0)