diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index c5addb93be38d..5b77428a0f6d7 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -5,7 +5,7 @@ numpy==1.8.1 cython==0.19.1 bottleneck==0.6.0 numexpr==2.2.2 -tables==2.3.1 +tables==3.0.0 matplotlib==1.3.1 openpyxl==1.6.2 xlsxwriter==0.4.6 diff --git a/doc/source/install.rst b/doc/source/install.rst index c30a086295f00..5595f60c6789c 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -26,7 +26,7 @@ Installing pandas Trying out pandas, no installation required! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The easiest way to start experimenting with pandas doesn't involve installing +The easiest way to start experimenting with pandas doesn't involve installing pandas at all. `Wakari `__ is a free service that provides a hosted @@ -35,10 +35,10 @@ pandas at all. Simply create an account, and have access to pandas from within your brower via an `IPython Notebook `__ in a few minutes. -Installing pandas with Anaconda +Installing pandas with Anaconda ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Installing pandas and the rest of the `NumPy `__ and +Installing pandas and the rest of the `NumPy `__ and `SciPy `__ stack can be a little difficult for inexperienced users. @@ -57,8 +57,8 @@ anything else, and without needing to wait for any software to be compiled. Installation instructions for `Anaconda `__ `can be found here `__. -A full list of the packages available as part of the -`Anaconda `__ distribution +A full list of the packages available as part of the +`Anaconda `__ distribution `can be found here `__. An additional advantage of installing with Anaconda is that you don't require @@ -78,7 +78,7 @@ If you want to have more control on which packages, or have a limited internet bandwidth, then installing pandas with `Miniconda `__ may be a better solution. -`Conda `__ is the package manager that the +`Conda `__ is the package manager that the `Anaconda `__ distribution is built upon. It is a package manager that is both cross-platform and language agnostic (it can play a similar role to a pip and virtualenv combination). @@ -90,7 +90,7 @@ minimal self contained Python installation, and then use the First you will need `Conda `__ to be installed and downloading and running the `Miniconda `__ -will do this for you. The installer +will do this for you. The installer `can be found here `__ The next step is to create a new conda environment (these are analogous to a @@ -98,7 +98,7 @@ virtualenv but they also allow you to specify precisely which Python version to install also). Run the following commands from a terminal window:: conda create -n name_of_my_env python - + This will create a minimal environment with only Python installed in it. To put your self inside this environment run:: @@ -108,7 +108,7 @@ On Windows the command is:: activate name_of_my_env -The final step required is to install pandas. This can be done with the +The final step required is to install pandas. This can be done with the following command:: conda install pandas @@ -143,7 +143,7 @@ pandas can be installed via pip from pip install pandas This will likely require the installation of a number of dependencies, -including NumPy, will require a compiler to compile required bits of code, +including NumPy, will require a compiler to compile required bits of code, and can take a few minutes to complete. Installing using your Linux distribution's package manager. @@ -259,6 +259,7 @@ Recommended Dependencies * `numexpr `__: for accelerating certain numerical operations. ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups. + If installed, must be Version 2.1 or higher. * `bottleneck `__: for accelerating certain types of ``nan`` evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. @@ -277,7 +278,7 @@ Optional Dependencies * `Cython `__: Only necessary to build development version. Version 0.17.1 or higher. * `SciPy `__: miscellaneous statistical functions - * `PyTables `__: necessary for HDF5-based storage + * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. * `matplotlib `__: for plotting * `statsmodels `__ diff --git a/doc/source/io.rst b/doc/source/io.rst index 40977aee44cdd..f4065d736a674 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2199,12 +2199,9 @@ the high performance HDF5 format using the excellent `PyTables `__ library. See the :ref:`cookbook ` for some advanced strategies -.. note:: +.. warning:: - ``PyTables`` 3.0.0 was recently released to enable support for Python 3. - pandas should be fully compatible (and previously written stores should be - backwards compatible) with all ``PyTables`` >= 2.3. For ``python >= 3.2``, - ``pandas >= 0.12.0`` is required for compatibility. + As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version). .. ipython:: python :suppress: diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index d15a48535f1eb..322bcba9664d9 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -11,7 +11,8 @@ users upgrade to this version. - The ``Categorical`` type was integrated as a first-class pandas type, see :ref:`here ` - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring ` - - New datetimelike properties accessor ``.dt`` for Series, see :ref:`Dateimelike Properties ` + - New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties ` + - dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`) - :ref:`Other Enhancements ` diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py index 82c68fb10e7d6..e3096a85ca7d7 100644 --- a/pandas/computation/eval.py +++ b/pandas/computation/eval.py @@ -42,9 +42,9 @@ def _check_engine(engine): "if 'numexpr' is not installed") else: ne_version = numexpr.__version__ - if ne_version < LooseVersion('2.0'): + if ne_version < LooseVersion('2.1'): raise ImportError("'numexpr' version is %s, " - "must be >= 2.0" % ne_version) + "must be >= 2.1" % ne_version) def _check_parser(parser): diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py index 47d3fce618f89..bd00dbbb444b6 100644 --- a/pandas/computation/expressions.py +++ b/pandas/computation/expressions.py @@ -13,7 +13,12 @@ try: import numexpr as ne - _NUMEXPR_INSTALLED = ne.__version__ >= LooseVersion('2.0') + ver = ne.__version__ + _NUMEXPR_INSTALLED = ver >= LooseVersion('2.1') + if not _NUMEXPR_INSTALLED: + warnings.warn("The installed version of numexpr {ver} is not supported " + "in pandas and will be not be used".format(ver=ver), UserWarning) + except ImportError: # pragma: no cover _NUMEXPR_INSTALLED = False diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 5489893df06b9..56d6ccd0abd9b 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -81,7 +81,6 @@ def _is_py3_complex_incompat(result, expected): _good_arith_ops = com.difference(_arith_ops_syms, _special_case_arith_ops_syms) - class TestEvalNumexprPandas(tm.TestCase): @classmethod @@ -1515,9 +1514,9 @@ def testit(): except ImportError: raise nose.SkipTest("no numexpr") else: - if ne.__version__ < LooseVersion('2.0'): + if ne.__version__ < LooseVersion('2.1'): with tm.assertRaisesRegexp(ImportError, "'numexpr' version is " - ".+, must be >= 2.0"): + ".+, must be >= 2.1"): testit() else: testit() diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 78e7c43de678f..989249994d953 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -228,20 +228,18 @@ class DuplicateWarning(Warning): # oh the troubles to reduce import time _table_mod = None -_table_supports_index = False _table_file_open_policy_is_strict = False def _tables(): global _table_mod - global _table_supports_index global _table_file_open_policy_is_strict if _table_mod is None: import tables _table_mod = tables # version requirements - ver = tables.__version__ - _table_supports_index = LooseVersion(ver) >= '2.3' + if LooseVersion(tables.__version__) < '3.0.0': + raise ImportError("PyTables version >= 3.0.0 is required") # set the file open policy # return the file open policy; this changes as of pytables 3.1 @@ -509,7 +507,7 @@ def open(self, mode='a', **kwargs): Parameters ---------- mode : {'a', 'w', 'r', 'r+'}, default 'a' - See HDFStore docstring or tables.openFile for info about modes + See HDFStore docstring or tables.open_file for info about modes """ tables = _tables() @@ -542,11 +540,11 @@ def open(self, mode='a', **kwargs): fletcher32=self._fletcher32) try: - self._handle = tables.openFile(self._path, self._mode, **kwargs) + self._handle = tables.open_file(self._path, self._mode, **kwargs) except (IOError) as e: # pragma: no cover if 'can not be written' in str(e): print('Opening %s in read-only mode' % self._path) - self._handle = tables.openFile(self._path, 'r', **kwargs) + self._handle = tables.open_file(self._path, 'r', **kwargs) else: raise @@ -561,7 +559,7 @@ def open(self, mode='a', **kwargs): "and not open the same file multiple times at once,\n" "upgrade the HDF5 version, or downgrade to PyTables 3.0.0 which allows\n" "files to be opened multiple times at once\n".format(version=tables.__version__, - hdf_version=tables.getHDF5Version())) + hdf_version=tables.get_hdf5_version())) raise e @@ -1018,9 +1016,6 @@ def create_table_index(self, key, **kwargs): # version requirements _tables() - if not _table_supports_index: - raise ValueError("PyTables >= 2.3 is required for table indexing") - s = self.get_storer(key) if s is None: return @@ -1037,7 +1032,7 @@ def groups(self): _tables() self._check_if_open() return [ - g for g in self._handle.walkNodes() + g for g in self._handle.walk_nodes() if (getattr(g._v_attrs, 'pandas_type', None) or getattr(g, 'table', None) or (isinstance(g, _table_mod.table.Table) and @@ -1050,7 +1045,7 @@ def get_node(self, key): try: if not key.startswith('/'): key = '/' + key - return self._handle.getNode(self.root, key) + return self._handle.get_node(self.root, key) except: return None @@ -1235,7 +1230,7 @@ def _write_to_group(self, key, value, format, index=True, append=False, # remove the node if we are not appending if group is not None and not append: - self._handle.removeNode(group, recursive=True) + self._handle.remove_node(group, recursive=True) group = None # we don't want to store a table node at all if are object is 0-len @@ -1257,7 +1252,7 @@ def _write_to_group(self, key, value, format, index=True, append=False, new_path += p group = self.get_node(new_path) if group is None: - group = self._handle.createGroup(path, p) + group = self._handle.create_group(path, p) path = new_path s = self._create_storer(group, format, value, append=append, @@ -2162,7 +2157,7 @@ def write(self, **kwargs): def delete(self, where=None, start=None, stop=None, **kwargs): """ support fully deleting the node in its entirety (only) - where specification must be None """ if where is None and start is None and stop is None: - self._handle.removeNode(self.group, recursive=True) + self._handle.remove_node(self.group, recursive=True) return None raise TypeError("cannot delete on an abstract storer") @@ -2404,7 +2399,7 @@ def write_array_empty(self, key, value): # ugly hack for length 0 axes arr = np.empty((1,) * value.ndim) - self._handle.createArray(self.group, key, arr) + self._handle.create_array(self.group, key, arr) getattr(self.group, key)._v_attrs.value_type = str(value.dtype) getattr(self.group, key)._v_attrs.shape = value.shape @@ -2414,7 +2409,7 @@ def _is_empty_array(self, shape): def write_array(self, key, value, items=None): if key in self.group: - self._handle.removeNode(self.group, key) + self._handle.remove_node(self.group, key) # Transform needed to interface with pytables row/col notation empty_array = self._is_empty_array(value.shape) @@ -2438,7 +2433,7 @@ def write_array(self, key, value, items=None): if atom is not None: # create an empty chunked array and fill it from value if not empty_array: - ca = self._handle.createCArray(self.group, key, atom, + ca = self._handle.create_carray(self.group, key, atom, value.shape, filters=self._filters) ca[:] = value @@ -2466,7 +2461,7 @@ def write_array(self, key, value, items=None): ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning) - vlarr = self._handle.createVLArray(self.group, key, + vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) else: @@ -2474,15 +2469,15 @@ def write_array(self, key, value, items=None): self.write_array_empty(key, value) else: if value.dtype.type == np.datetime64: - self._handle.createArray(self.group, key, value.view('i8')) + self._handle.create_array(self.group, key, value.view('i8')) getattr( self.group, key)._v_attrs.value_type = 'datetime64' elif value.dtype.type == np.timedelta64: - self._handle.createArray(self.group, key, value.view('i8')) + self._handle.create_array(self.group, key, value.view('i8')) getattr( self.group, key)._v_attrs.value_type = 'timedelta64' else: - self._handle.createArray(self.group, key, value) + self._handle.create_array(self.group, key, value) getattr(self.group, key)._v_attrs.transposed = transposed @@ -2586,7 +2581,7 @@ def write(self, obj, **kwargs): for name, ss in compat.iteritems(obj): key = 'sparse_series_%s' % name if key not in self.group._v_children: - node = self._handle.createGroup(self.group, key) + node = self._handle.create_group(self.group, key) else: node = getattr(self.group, key) s = SparseSeriesFixed(self.parent, node) @@ -2622,7 +2617,7 @@ def write(self, obj, **kwargs): for name, sdf in compat.iteritems(obj): key = 'sparse_frame_%s' % name if key not in self.group._v_children: - node = self._handle.createGroup(self.group, key) + node = self._handle.create_group(self.group, key) else: node = getattr(self.group, key) s = SparseFrameFixed(self.parent, node) @@ -3043,18 +3038,18 @@ def create_index(self, columns=None, optlevel=None, kind=None): cur_kind = index.kind if kind is not None and cur_kind != kind: - v.removeIndex() + v.remove_index() else: kw['kind'] = cur_kind if optlevel is not None and cur_optlevel != optlevel: - v.removeIndex() + v.remove_index() else: kw['optlevel'] = cur_optlevel # create the index if not v.is_indexed: - v.createIndex(**kw) + v.create_index(**kw) def read_axes(self, where, **kwargs): """create and return the axes sniffed from the table: return boolean @@ -3617,7 +3612,7 @@ def write(self, obj, axes=None, append=False, complib=None, chunksize=None, expectedrows=None, dropna=True, **kwargs): if not append and self.is_exists: - self._handle.removeNode(self.group, 'table') + self._handle.remove_node(self.group, 'table') # create the axes self.create_axes(axes=axes, obj=obj, validate=append, @@ -3636,7 +3631,7 @@ def write(self, obj, axes=None, append=False, complib=None, self.set_attrs() # create the table - table = self._handle.createTable(self.group, **options) + table = self._handle.create_table(self.group, **options) else: table = self.table @@ -3765,12 +3760,12 @@ def delete(self, where=None, start=None, stop=None, **kwargs): if where is None or not len(where): if start is None and stop is None: nrows = self.nrows - self._handle.removeNode(self.group, recursive=True) + self._handle.remove_node(self.group, recursive=True) else: # pytables<3.0 would remove a single row with stop=None if stop is None: stop = self.nrows - nrows = self.table.removeRows(start=start, stop=stop) + nrows = self.table.remove_rows(start=start, stop=stop) self.table.flush() return nrows @@ -3809,7 +3804,7 @@ def delete(self, where=None, start=None, stop=None, **kwargs): pg = groups.pop() for g in reversed(groups): rows = l.take(lrange(g, pg)) - table.removeRows(start=rows[rows.index[0] + table.remove_rows(start=rows[rows.index[0] ], stop=rows[rows.index[-1]] + 1) pg = g @@ -4352,10 +4347,10 @@ def select(self): generate the selection """ if self.condition is not None: - return self.table.table.readWhere(self.condition.format(), + return self.table.table.read_where(self.condition.format(), start=self.start, stop=self.stop) elif self.coordinates is not None: - return self.table.table.readCoordinates(self.coordinates) + return self.table.table.read_coordinates(self.coordinates) return self.table.table.read(start=self.start, stop=self.stop) def select_coords(self): @@ -4374,7 +4369,7 @@ def select_coords(self): stop += nrows if self.condition is not None: - return self.table.table.getWhereList(self.condition.format(), + return self.table.table.get_where_list(self.condition.format(), start=start, stop=stop, sort=True) elif self.coordinates is not None: diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 89809b47d76eb..2a0796e90e418 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -373,7 +373,7 @@ def test_repr(self): warnings.filterwarnings('always', category=PerformanceWarning) # make a random group in hdf space - store._handle.createGroup(store._handle.root,'bah') + store._handle.create_group(store._handle.root,'bah') repr(store) str(store) @@ -541,11 +541,8 @@ def test_open_args(self): store.close() - # only supported on pytable >= 3.0.0 - if LooseVersion(tables.__version__) >= '3.0.0': - - # the file should not have actually been written - self.assertFalse(os.path.exists(path)) + # the file should not have actually been written + self.assertFalse(os.path.exists(path)) def test_flush(self): @@ -881,8 +878,6 @@ def check(format,index): def test_encoding(self): - if LooseVersion(tables.__version__) < '3.0.0': - raise nose.SkipTest('tables version does not support proper encoding') if sys.byteorder != 'little': raise nose.SkipTest('system byteorder is not little') @@ -1471,29 +1466,6 @@ def col(t,column): store.put('f2', df) self.assertRaises(TypeError, store.create_table_index, 'f2') - # try to change the version supports flag - from pandas.io import pytables - pytables._table_supports_index = False - self.assertRaises(Exception, store.create_table_index, 'f') - - # test out some versions - original = tables.__version__ - - for v in ['2.2', '2.2b']: - pytables._table_mod = None - pytables._table_supports_index = False - tables.__version__ = v - self.assertRaises(Exception, store.create_table_index, 'f') - - for v in ['2.3.1', '2.3.1b', '2.4dev', '2.4', '3.0.0', '3.1.0', original]: - pytables._table_mod = None - pytables._table_supports_index = False - tables.__version__ = v - store.create_table_index('f') - pytables._table_mod = None - pytables._table_supports_index = False - tables.__version__ = original - def test_big_table_frame(self): raise nose.SkipTest('no big table frame')