From 1a959e6b55ae69ffaae45490ce987f5962edd063 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 22:26:06 +0700 Subject: [PATCH 01/14] REF: extract method _identify_group --- pandas/io/pytables.py | 67 ++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a3d6975c00a95..f148387f0a6bb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1735,38 +1735,12 @@ def _write_to_group( errors: str = "strict", track_times: bool = True, ): - group = self.get_node(key) - - # we make this assertion for mypy; the get_node call will already - # have raised if this is incorrect - assert self._handle is not None - - # remove the node if we are not appending - if group is not None and not append: - self._handle.remove_node(group, recursive=True) - group = None - # we don't want to store a table node at all if our object is 0-len # as there are not dtypes if getattr(value, "empty", None) and (format == "table" or append): return - if group is None: - paths = key.split("/") - - # recursively create the groups - path = "/" - for p in paths: - if not len(p): - continue - new_path = path - if not path.endswith("/"): - new_path += "/" - new_path += p - group = self.get_node(new_path) - if group is None: - group = self._handle.create_group(path, p) - path = new_path + group = self._identify_group(key, append) s = self._create_storer(group, format, value, encoding=encoding, errors=errors) if append: @@ -1807,6 +1781,45 @@ def _read_group(self, group: "Node"): s.infer_axes() return s.read() + def _identify_group(self, key: str, append: bool) -> "Node": + """Identify HDF5 group based on key, delete/create group if needed.""" + group = self.get_node(key) + + # we make this assertion for mypy; the get_node call will already + # have raised if this is incorrect + assert self._handle is not None + + # remove the node if we are not appending + if group is not None and not append: + self._handle.remove_node(group, recursive=True) + group = None + + if group is None: + group = self._create_nodes_and_group(key) + + return group + + def _create_nodes_and_group(self, key: str) -> "Node": + """Create nodes from key and return group name.""" + # assertion for mypy + assert self._handle is not None + + paths = key.split("/") + # recursively create the groups + path = "/" + for p in paths: + if not len(p): + continue + new_path = path + if not path.endswith("/"): + new_path += "/" + new_path += p + group = self.get_node(new_path) + if group is None: + group = self._handle.create_group(path, p) + path = new_path + return group + class TableIterator: """ From 1448d494f833f08b5688eae864afe78e11385369 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 22:32:05 +0700 Subject: [PATCH 02/14] DOC: clean-up docstrings --- pandas/io/pytables.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f148387f0a6bb..836daffce9c55 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -814,19 +814,19 @@ def select( Parameters ---------- key : str - Object being retrieved from file. + Object being retrieved from file. where : list, default None - List of Term (or convertible) objects, optional. + List of Term (or convertible) objects, optional. start : int, default None - Row number to start selection. + Row number to start selection. stop : int, default None - Row number to stop selection. + Row number to stop selection. columns : list, default None - A list of columns that if not None, will limit the return columns. + A list of columns that if not None, will limit the return columns. iterator : bool, default False - Returns an iterator. + Returns an iterator. chunksize : int, default None - Number or rows to include in iteration, return an iterator. + Number or rows to include in iteration, return an iterator. auto_close : bool, default False Should automatically close the store when finished. @@ -1090,7 +1090,7 @@ def put( Table format. Write as a PyTables Table structure which may perform worse but allow more flexible operations like searching / selecting subsets of the data. - append : bool, default False + append : bool, default False This will force Table format, append the input data to the existing. data_columns : list, default None @@ -1099,7 +1099,7 @@ def put( `__. encoding : str, default None Provide an encoding for strings. - dropna : bool, default False, do not write an ALL nan row to + dropna : bool, default False, do not write an ALL nan row to The store settable by the option 'io.hdf.dropna_table'. track_times : bool, default True Parameter is propagated to 'create_table' method of 'PyTables'. @@ -1521,11 +1521,12 @@ def copy( Parameters ---------- - propindexes: bool, default True + propindexes : bool, default True Restore indexes in copied file. - keys : list of keys to include in the copy (defaults to all) - overwrite : overwrite (remove and replace) existing nodes in the - new store (default is True) + keys : list, optional + List of keys to include in the copy (defaults to all). + overwrite : bool, default True + Whether to overwrite (remove and replace) existing nodes in the new store. mode, complib, complevel, fletcher32 same as in HDFStore.__init__ Returns From b166bb60ed52495ebbc3594990cd042be8d3ac44 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 22:34:59 +0700 Subject: [PATCH 03/14] CLN: use suppress instead of try/except/pass --- pandas/io/pytables.py | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 836daffce9c55..85e47e5b47caf 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,6 +2,7 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ +from contextlib import suppress import copy from datetime import date, tzinfo import itertools @@ -202,12 +203,10 @@ def _tables(): # set the file open policy # return the file open policy; this changes as of pytables 3.1 # depending on the HDF5 version - try: + with suppress(AttributeError): _table_file_open_policy_is_strict = ( tables.file._FILE_OPEN_POLICY == "strict" ) - except AttributeError: - pass return _table_mod @@ -423,10 +422,8 @@ def read_hdf( except (ValueError, TypeError, KeyError): if not isinstance(path_or_buf, HDFStore): # if there is an error, close the store if we opened it. - try: + with suppress(AttributeError): store.close() - except AttributeError: - pass raise @@ -763,10 +760,8 @@ def flush(self, fsync: bool = False): if self._handle is not None: self._handle.flush() if fsync: - try: + with suppress(OSError): os.fsync(self._handle.fileno()) - except OSError: - pass def get(self, key: str): """ @@ -3025,11 +3020,9 @@ def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None) atom = None if self._filters is not None: - try: + with suppress(ValueError): # get the atom for this datatype atom = _tables().Atom.from_dtype(value.dtype) - except ValueError: - pass if atom is not None: # We only get here if self._filters is non-None and @@ -5046,14 +5039,12 @@ def _maybe_adjust_name(name: str, version) -> str: ------- str """ - try: + with suppress(IndexError): if version[0] == 0 and version[1] <= 10 and version[2] == 0: m = re.search(r"values_block_(\d+)", name) if m: grp = m.groups()[0] name = f"values_{grp}" - except IndexError: - pass return name @@ -5143,7 +5134,7 @@ def __init__( if is_list_like(where): # see if we have a passed coordinate like - try: + with suppress(ValueError): inferred = lib.infer_dtype(where, skipna=False) if inferred == "integer" or inferred == "boolean": where = np.asarray(where) @@ -5163,9 +5154,6 @@ def __init__( ) self.coordinates = where - except ValueError: - pass - if self.coordinates is None: self.terms = self.generate(where) From c80b2d324c0de238ed3a39fd91ed8cb8f203daee Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 22:43:13 +0700 Subject: [PATCH 04/14] CLN: update multi-line message using dedent --- pandas/io/pytables.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 85e47e5b47caf..15540c1f9f692 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -8,6 +8,7 @@ import itertools import os import re +from textwrap import dedent from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union import warnings @@ -5174,15 +5175,16 @@ def generate(self, where): # raise a nice message, suggesting that the user should use # data_columns qkeys = ",".join(q.keys()) - raise ValueError( - f"The passed where expression: {where}\n" - " contains an invalid variable reference\n" - " all of the variable references must be a " - "reference to\n" - " an axis (e.g. 'index' or 'columns'), or a " - "data_column\n" - f" The currently defined references are: {qkeys}\n" - ) from err + msg = dedent( + f"""\ + The passed where expression: {where} + contains an invalid variable reference + all of the variable references must be a reference to + an axis (e.g. 'index' or 'columns'), or a data_column + The currently defined references are: {qkeys} + """ + ) + raise ValueError(msg) from err def select(self): """ From 65996e65413e36fd864e7a0231a9b2519060f2c6 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 22:52:17 +0700 Subject: [PATCH 05/14] REF: reduce for combining mask pair-wise --- pandas/io/pytables.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 15540c1f9f692..3fbeea930acb5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -5,6 +5,7 @@ from contextlib import suppress import copy from datetime import date, tzinfo +from functools import reduce import itertools import os import re @@ -4297,9 +4298,8 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): # consolidate masks if len(masks): - mask = masks[0] - for m in masks[1:]: - mask = mask & m + combine_masks = lambda first, second: first & second + mask = reduce(combine_masks, masks) mask = mask.ravel() else: mask = None From e59dc62cd32bdbde6719e60f48012071389f37eb Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Sun, 4 Oct 2020 23:00:11 +0700 Subject: [PATCH 06/14] CLN: remove unnecessary empty lines --- pandas/io/pytables.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 3fbeea930acb5..8d831bbf2c723 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -666,12 +666,10 @@ def open(self, mode: str = "a", **kwargs): tables = _tables() if self._mode != mode: - # if we are changing a write mode to read, ok if self._mode in ["a", "w"] and mode in ["r", "r+"]: pass elif mode in ["w"]: - # this would truncate, raise here if self.is_open: raise PossibleDataLossError( @@ -700,7 +698,6 @@ def open(self, mode: str = "a", **kwargs): raise except ValueError as err: - # trap PyTables >= 3.1 FILE_OPEN_POLICY exception # to provide an updated message if "FILE_OPEN_POLICY" in str(err): @@ -715,11 +712,9 @@ def open(self, mode: str = "a", **kwargs): "which allows\n" "files to be opened multiple times at once\n" ) - raise err except Exception as err: - # trying to read from a non-existent file causes an error which # is not part of IOError, make it one if self._mode == "r" and "Unable to open/create file" in str(err): @@ -1646,7 +1641,6 @@ def error(t): # infer the pt from the passed value if pt is None: if value is None: - _tables() assert _table_mod is not None # for mypy if getattr(group, "table", None) or isinstance( @@ -1678,10 +1672,8 @@ def error(t): # existing node (and must be a table) if tt is None: - # if we are a writer, determine the tt if value is not None: - if pt == "series_table": index = getattr(value, "index", None) if index is not None: @@ -1886,11 +1878,9 @@ def __init__( self.auto_close = auto_close def __iter__(self): - # iterate current = self.start while current < self.stop: - stop = min(current + self.chunksize, self.stop) value = self.func(None, None, self.coordinates[current:stop]) current = stop @@ -1906,7 +1896,6 @@ def close(self): self.store.close() def get_result(self, coordinates: bool = False): - # return the actual iterator if self.chunksize is not None: if not isinstance(self.s, Table): @@ -2105,7 +2094,6 @@ def maybe_set_size(self, min_itemsize=None): with an integer size """ if _ensure_decoded(self.kind) == "string": - if isinstance(min_itemsize, dict): min_itemsize = min_itemsize.get(self.name) @@ -2163,7 +2151,6 @@ def update_info(self, info): existing_value = idx.get(key) if key in idx and value is not None and existing_value != value: - # frequency/name just warn if key in ["freq", "index_name"]: ws = attribute_conflict_doc % (key, existing_value, value) @@ -2356,10 +2343,8 @@ def _get_atom(cls, values: ArrayLike) -> "Col": atom = cls.get_atom_timedelta64(shape) elif is_complex_dtype(dtype): atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0]) - elif is_string_dtype(dtype): atom = cls.get_atom_string(shape, itemsize) - else: atom = cls.get_atom_data(shape, kind=dtype.name) @@ -2465,7 +2450,6 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): # reverse converts if dtype == "datetime64": - # recreate with tz if indicated converted = _set_tz(converted, tz, coerce=True) @@ -2482,7 +2466,6 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): ) elif meta == "category": - # we have a categorical categories = metadata codes = converted.ravel() @@ -2837,7 +2820,6 @@ def read_array( ret = node[start:stop] if dtype == "datetime64": - # reconstruct a timezone if indicated tz = getattr(attrs, "tz", None) ret = _set_tz(ret, tz, coerce=True) @@ -3041,7 +3023,6 @@ def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None) self.write_array_empty(key, value) elif value.dtype.type == np.object_: - # infer the type, warn if we have a non-string type here (for # performance) inferred_type = lib.infer_dtype(value, skipna=False) @@ -3725,7 +3706,6 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): # if min_itemsize is a dict, add the keys (exclude 'values') if isinstance(min_itemsize, dict): - existing_data_columns = set(data_columns) data_columns = list(data_columns) # ensure we do not modify data_columns.extend( @@ -4161,7 +4141,6 @@ def read_column( # find the axes for a in self.axes: if column == a.name: - if not a.is_data_indexable: raise ValueError( f"column [{column}] can not be extracted individually; " @@ -4287,9 +4266,7 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): # if dropna==True, then drop ALL nan rows masks = [] if dropna: - for a in self.values_axes: - # figure the mask: only do if we can successfully process this # column, otherwise ignore the mask mask = isna(a.data).all(axis=0) @@ -4868,7 +4845,6 @@ def _unconvert_index( def _maybe_convert_for_string_atom( name: str, block, existing_col, min_itemsize, nan_rep, encoding, errors ): - if not block.is_object: return block.values @@ -4901,7 +4877,6 @@ def _maybe_convert_for_string_atom( # we cannot serialize this data, so report an exception on a column # by column basis for i in range(len(block.shape[0])): - col = block.iget(i) inferred_type = lib.infer_dtype(col, skipna=False) if inferred_type != "string": From 35f4d02146a6069ad98efdeb4a0734e9e728470d Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 00:16:52 +0700 Subject: [PATCH 07/14] DOC: delete unused parameter from docstring Note that there are some other params that are not in docstring. --- pandas/io/pytables.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8d831bbf2c723..d4a2efa0958c8 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1091,8 +1091,6 @@ def put( `__. encoding : str, default None Provide an encoding for strings. - dropna : bool, default False, do not write an ALL nan row to - The store settable by the option 'io.hdf.dropna_table'. track_times : bool, default True Parameter is propagated to 'create_table' method of 'PyTables'. If set to False it enables to have the same h5 files (same hashes) From f5df4c00b7c0adafe003ca5fb54336f498dde9bc Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 00:17:31 +0700 Subject: [PATCH 08/14] DOC: None in docstrings --- pandas/io/pytables.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d4a2efa0958c8..638d209108028 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -807,19 +807,19 @@ def select( ---------- key : str Object being retrieved from file. - where : list, default None + where : list or None List of Term (or convertible) objects, optional. - start : int, default None + start : int or None Row number to start selection. stop : int, default None Row number to stop selection. - columns : list, default None + columns : list or None A list of columns that if not None, will limit the return columns. - iterator : bool, default False + iterator : bool or False Returns an iterator. - chunksize : int, default None + chunksize : int or None Number or rows to include in iteration, return an iterator. - auto_close : bool, default False + auto_close : bool or False Should automatically close the store when finished. Returns @@ -1083,11 +1083,10 @@ def put( worse but allow more flexible operations like searching / selecting subsets of the data. append : bool, default False - This will force Table format, append the input data to the - existing. + This will force Table format, append the input data to the existing. data_columns : list, default None - List of columns to create as data columns, or True to - use all columns. See `here + List of columns to create as data columns, or True to use all columns. + See `here `__. encoding : str, default None Provide an encoding for strings. From 98b90491bdc7afac65f09419f28a321bb21b71b2 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 00:40:16 +0700 Subject: [PATCH 09/14] REF: avoid IndexError check --- pandas/io/pytables.py | 28 ++++++++++++++++++-------- pandas/tests/io/pytables/test_store.py | 8 ++++++++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 638d209108028..fc386018a4f8c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -10,7 +10,17 @@ import os import re from textwrap import dedent -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Optional, + Sequence, + Tuple, + Type, + Union, +) import warnings import numpy as np @@ -4998,7 +5008,7 @@ def _need_convert(kind: str) -> bool: return False -def _maybe_adjust_name(name: str, version) -> str: +def _maybe_adjust_name(name: str, version: Sequence[int]) -> str: """ Prior to 0.10.1, we named values blocks like: values_block_0 an the name values_0, adjust the given name if necessary. @@ -5012,12 +5022,14 @@ def _maybe_adjust_name(name: str, version) -> str: ------- str """ - with suppress(IndexError): - if version[0] == 0 and version[1] <= 10 and version[2] == 0: - m = re.search(r"values_block_(\d+)", name) - if m: - grp = m.groups()[0] - name = f"values_{grp}" + if isinstance(version, str) or len(version) < 3: + raise ValueError("Version is incorrect, expected sequence of 3 integers.") + + if version[0] == 0 and version[1] <= 10 and version[2] == 0: + m = re.search(r"values_block_(\d+)", name) + if m: + grp = m.groups()[0] + name = f"values_{grp}" return name diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index c1938db12a0bc..4bd250a3f75dc 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -45,6 +45,7 @@ ) from pandas.io.pytables import ( + _maybe_adjust_name, ClosedFileError, HDFStore, PossibleDataLossError, @@ -4921,3 +4922,10 @@ def test_unsuppored_hdf_file_error(self, datapath): with pytest.raises(ValueError, match=message): pd.read_hdf(data_path) + + +@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], '12', '123']) +def test_maybe_adjust_name_bad_version_raises(bad_version): + msg = "Version is incorrect, expected sequence of 3 integers" + with pytest.raises(ValueError, match=msg): + _maybe_adjust_name("values_block_0", version=bad_version) From d98ab2430ac30c508c7d07c5b0f56f2c6a41732b Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 00:46:16 +0700 Subject: [PATCH 10/14] Revert "REF: reduce for combining mask pair-wise" This reverts commit 65996e65413e36fd864e7a0231a9b2519060f2c6. --- pandas/io/pytables.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fc386018a4f8c..eef56acb81259 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -5,7 +5,6 @@ from contextlib import suppress import copy from datetime import date, tzinfo -from functools import reduce import itertools import os import re @@ -4282,8 +4281,9 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): # consolidate masks if len(masks): - combine_masks = lambda first, second: first & second - mask = reduce(combine_masks, masks) + mask = masks[0] + for m in masks[1:]: + mask = mask & m mask = mask.ravel() else: mask = None From c32086ee943a551e0d4e11a7a79fa087126abc24 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 00:50:30 +0700 Subject: [PATCH 11/14] LINT: fix quotes --- pandas/tests/io/pytables/test_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 4bd250a3f75dc..1e1c9e91faa4b 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -45,11 +45,11 @@ ) from pandas.io.pytables import ( - _maybe_adjust_name, ClosedFileError, HDFStore, PossibleDataLossError, Term, + _maybe_adjust_name, read_hdf, ) @@ -4924,7 +4924,7 @@ def test_unsuppored_hdf_file_error(self, datapath): pd.read_hdf(data_path) -@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], '12', '123']) +@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"]) def test_maybe_adjust_name_bad_version_raises(bad_version): msg = "Version is incorrect, expected sequence of 3 integers" with pytest.raises(ValueError, match=msg): From 571b965e4bf38558169a1da626ad62fe7127756e Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 01:57:31 +0700 Subject: [PATCH 12/14] REF: narrow down reason for exception --- pandas/io/pytables.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index eef56acb81259..dc4bb5207d23e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -697,6 +697,19 @@ def open(self, mode: str = "a", **kwargs): self._complevel, self._complib, fletcher32=self._fletcher32 ) + ver = tables.__version__ + if ver >= "3.1" and _table_file_open_policy_is_strict and self.is_open: + msg = dedent( + """\ + PyTables [{ver}] no longer supports opening multiple files + even in read-only mode on this HDF5 version [{hdf_version}]. + You can accept this and not open the same file multiple times at once, + upgrade the HDF5 version, or downgrade to PyTables 3.0.0 which allows + files to be opened multiple times at once. + """ + ) + raise ValueError(msg) + try: self._handle = tables.open_file(self._path, self._mode, **kwargs) except OSError as err: # pragma: no cover @@ -705,24 +718,6 @@ def open(self, mode: str = "a", **kwargs): self._handle = tables.open_file(self._path, "r", **kwargs) else: raise - - except ValueError as err: - # trap PyTables >= 3.1 FILE_OPEN_POLICY exception - # to provide an updated message - if "FILE_OPEN_POLICY" in str(err): - hdf_version = tables.get_hdf5_version() - err = ValueError( - f"PyTables [{tables.__version__}] no longer supports " - "opening multiple files\n" - "even in read-only mode on this HDF5 version " - f"[{hdf_version}]. You can accept this\n" - "and not open the same file multiple times at once,\n" - "upgrade the HDF5 version, or downgrade to PyTables 3.0.0 " - "which allows\n" - "files to be opened multiple times at once\n" - ) - raise err - except Exception as err: # trying to read from a non-existent file causes an error which # is not part of IOError, make it one From 262e9a0924a2e7086a19ee5e5186f99e9bf8adad Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 02:11:01 +0700 Subject: [PATCH 13/14] REF: remove what seems to be irrelevant exceptions --- pandas/io/pytables.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index dc4bb5207d23e..6d2d08db77954 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -710,20 +710,7 @@ def open(self, mode: str = "a", **kwargs): ) raise ValueError(msg) - try: - self._handle = tables.open_file(self._path, self._mode, **kwargs) - except OSError as err: # pragma: no cover - if "can not be written" in str(err): - print(f"Opening {self._path} in read-only mode") - self._handle = tables.open_file(self._path, "r", **kwargs) - else: - raise - except Exception as err: - # trying to read from a non-existent file causes an error which - # is not part of IOError, make it one - if self._mode == "r" and "Unable to open/create file" in str(err): - raise OSError(str(err)) from err - raise + self._handle = tables.open_file(self._path, self._mode, **kwargs) def close(self): """ From aaf075d41bfd7c745356ef342fb7988486487ef2 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov Date: Tue, 6 Oct 2020 02:16:26 +0700 Subject: [PATCH 14/14] CLN: update error handing with already opened file --- pandas/io/pytables.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6d2d08db77954..3e3330fa4378f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -697,16 +697,10 @@ def open(self, mode: str = "a", **kwargs): self._complevel, self._complib, fletcher32=self._fletcher32 ) - ver = tables.__version__ - if ver >= "3.1" and _table_file_open_policy_is_strict and self.is_open: - msg = dedent( - """\ - PyTables [{ver}] no longer supports opening multiple files - even in read-only mode on this HDF5 version [{hdf_version}]. - You can accept this and not open the same file multiple times at once, - upgrade the HDF5 version, or downgrade to PyTables 3.0.0 which allows - files to be opened multiple times at once. - """ + if _table_file_open_policy_is_strict and self.is_open: + msg = ( + "Cannot open HDF5 file, which is already opened, " + "even in read-only mode." ) raise ValueError(msg)