diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6c2784bc93b0c..b0c2b845073f6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -481,7 +481,7 @@ Conversion - Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) - Bug in :meth:`DataFrame.insert` raising ``TypeError`` if ``loc`` is ``np.int64`` (:issue:`53193`) -- +- Bug in :meth:`HDFStore.select` loses precision of large int when stored and retrieved (:issue:`54186`) Strings ^^^^^^^ diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 5175884bca210..433421d35af55 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -2,6 +2,10 @@ from __future__ import annotations import ast +from decimal import ( + Decimal, + InvalidOperation, +) from functools import partial from typing import ( TYPE_CHECKING, @@ -233,7 +237,14 @@ def stringify(value): result = metadata.searchsorted(v, side="left") return TermValue(result, result, "integer") elif kind == "integer": - v = int(float(v)) + try: + v_dec = Decimal(v) + except InvalidOperation: + # GH 54186 + # convert v to float to raise float's ValueError + float(v) + else: + v = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN")) return TermValue(v, v, kind) elif kind == "float": v = float(v) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index f14a3ad7c5e10..8d9e0b9f5ffec 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -914,7 +914,7 @@ def test_query_compare_column_type(setup_path): if col == "real_date": msg = 'Given date string "a" not likely a datetime' else: - msg = "could not convert string to " + msg = "could not convert string to" with pytest.raises(ValueError, match=msg): store.select("test", where=query) @@ -943,3 +943,22 @@ def test_select_empty_where(tmp_path, where): store.put("df", df, "t") result = read_hdf(store, "df", where=where) tm.assert_frame_equal(result, df) + + +def test_select_large_integer(tmp_path): + path = tmp_path / "large_int.h5" + + df = DataFrame( + zip( + ["a", "b", "c", "d"], + [-9223372036854775801, -9223372036854775802, -9223372036854775803, 123], + ), + columns=["x", "y"], + ) + result = None + with HDFStore(path) as s: + s.append("data", df, data_columns=True, index=False) + result = s.select("data", where="y==-9223372036854775801").get("y").get(0) + expected = df["y"][0] + + assert expected == result