From d39d5e7a49bb1ea28b7a02f486757d6ba0986639 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 3 Jun 2013 16:21:32 -0400
Subject: [PATCH 1/7] TST: travis to build numexpr/tables on py3k

---
 ci/install.sh | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/ci/install.sh b/ci/install.sh
index a091834a9570f..b748070db85aa 100755
--- a/ci/install.sh
+++ b/ci/install.sh
@@ -69,13 +69,11 @@ if ( ! $VENV_FILE_AVAILABLE ); then
         pip install $PIP_ARGS  cython
 
         if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then
-            # installed explicitly above, to get the library as well
-        #    sudo apt-get $APT_ARGS install libhdf5-serial-dev;
-            pip install numexpr
-            pip install tables
             pip install $PIP_ARGS xlwt
         fi
 
+        pip install numexpr
+        pip install tables
         pip install $PIP_ARGS matplotlib
         pip install $PIP_ARGS openpyxl
         pip install $PIP_ARGS xlrd>=0.9.0

From 870c6489fcd5b091deb6f5d952ad5a6d117e38cb Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 5 Jun 2013 13:29:22 -0400
Subject: [PATCH 2/7] DOC: docstring/release notes updates for py3k

DOC: v0.11.1 updates
---
 README.rst             |  1 -
 RELEASE.rst            | 15 +++++++++++++++
 doc/source/install.rst |  1 -
 doc/source/io.rst      |  9 ++++-----
 doc/source/v0.11.1.txt |  3 +++
 pandas/io/pytables.py  | 10 ++++++----
 6 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/README.rst b/README.rst
index daea702476ebc..85868176722bd 100644
--- a/README.rst
+++ b/README.rst
@@ -85,7 +85,6 @@ Optional dependencies
   - `Cython <http://www.cython.org>`__: Only necessary to build development version. Version 0.17.1 or higher.
   - `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
   - `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage
-     - Not yet supported on python >= 3
   - `matplotlib <http://matplotlib.sourceforge.net/>`__: for plotting
   - `statsmodels <http://statsmodels.sourceforge.net/>`__
      - Needed for parts of :mod:`pandas.stats`
diff --git a/RELEASE.rst b/RELEASE.rst
index 12d2389a8a59b..28c4ce8becbb0 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -63,6 +63,10 @@ pandas 0.11.1
       to append an index with a different name than the existing
     - support datelike columns with a timezone as data_columns (GH2852_)
     - table writing performance improvements.
+<<<<<<< HEAD
+=======
+    - support python3 (via ``PyTables 3.0.0``)
+>>>>>>> 116ab91... DOC: docstring/release notes updates for py3k
   - Add modulo operator to Series, DataFrame
   - Add ``date`` method to DatetimeIndex
   - Simplified the API and added a describe method to Categorical
@@ -79,10 +83,21 @@ pandas 0.11.1
 
 **API Changes**
 
+<<<<<<< HEAD
   - When removing an object from a ``HDFStore``, ``remove(key)`` raises
     ``KeyError`` if the key is not a valid store object.
   - In an ``HDFStore``, raise a ``TypeError`` on passing ``where`` or ``columns`` 
     to select with a Storer; these are invalid parameters at this time
+=======
+  - ``HDFStore``
+
+    - When removing an object, ``remove(key)`` raises
+      ``KeyError`` if the key is not a valid store object.
+    - raise a ``TypeError`` on passing ``where`` or ``columns`` 
+      to select with a Storer; these are invalid parameters at this time
+    - can now specify an ``encoding`` option to ``append/put`` 
+      to enable alternate encodings
+>>>>>>> 116ab91... DOC: docstring/release notes updates for py3k
   - The repr() for (Multi)Index now obeys display.max_seq_items rather
     then numpy threshold print options. (GH3426_, GH3466_)
   - Added mangle_dupe_cols option to read_table/csv, allowing users
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 6868969c1b968..9dc8064da45e3 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -95,7 +95,6 @@ Optional Dependencies
     version. Version 0.17.1 or higher.
   * `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
   * `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage
-     * Not yet supported on python >= 3
   * `matplotlib <http://matplotlib.sourceforge.net/>`__: for plotting
   * `statsmodels <http://statsmodels.sourceforge.net/>`__
      * Needed for parts of :mod:`pandas.stats`
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 802ab08e85932..1c615ca278668 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1300,12 +1300,11 @@ the high performance HDF5 format using the excellent `PyTables
 <http://www.pytables.org/>`__ library. See the :ref:`cookbook<cookbook.hdf>`
 for some advanced strategies
 
-.. warning::
+.. note::
 
-   ``PyTables`` 3.0.0 was recently released. This enables support for Python 3,
-   however, it has not been integrated into pandas as of yet. (Under Python 2,
-   ``PyTables`` version >= 2.3 is supported).
-    
+   ``PyTables`` 3.0.0 was recently released to enables support for Python 3.
+   Pandas should be fully compatible (and previously written stores should be
+   backwards compatible) with all ``PyTables`` >= 2.3
 
 .. ipython:: python
    :suppress:
diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
index b2fee1acbc4d6..badb364d214d1 100644
--- a/doc/source/v0.11.1.txt
+++ b/doc/source/v0.11.1.txt
@@ -237,6 +237,9 @@ Enhancements
          pd.get_option('a.b')
          pd.get_option('b.c')
 
+  - Support for ``HDFStore`` (via ``PyTables 3.0.0``) on Python3
+
+
 Bug Fixes
 ~~~~~~~~~
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 5a480e08effba..b4d312d55104f 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -522,15 +522,16 @@ def put(self, key, value, table=None, append=False, **kwargs):
 
         Parameters
         ----------
-        key : object
-        value : {Series, DataFrame, Panel}
-        table : boolean, default False
+        key      : object
+        value    : {Series, DataFrame, Panel}
+        table    : boolean, default False
             Write as a PyTables Table structure which may perform worse but
             allow more flexible operations like searching / selecting subsets
             of the data
-        append : boolean, default False
+        append   : boolean, default False
             For table data structures, append the input data to the existing
             table
+        encoding : default None, provide an encoding for strings
         """
         self._write_to_group(key, value, table=table, append=append, **kwargs)
 
@@ -595,6 +596,7 @@ def append(self, key, value, columns=None, **kwargs):
         nan_rep      : string to use as string nan represenation
         chunksize    : size to chunk the writing
         expectedrows : expected TOTAL row size of this table
+        encoding     : default None, provide an encoding for strings
 
         Notes
         -----

From aef951611f0552bfec14c211b7d350596d80f015 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 3 Jun 2013 17:41:01 -0400
Subject: [PATCH 3/7] ENH: provide py3k string decoding and compat

---
 RELEASE.rst                      |  12 ++
 pandas/io/pytables.py            | 290 +++++++++++++++++++------------
 pandas/io/tests/test_pytables.py |  15 ++
 pandas/lib.pyx                   |   9 +-
 4 files changed, 210 insertions(+), 116 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 28c4ce8becbb0..c05bb526ab715 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -64,9 +64,13 @@ pandas 0.11.1
     - support datelike columns with a timezone as data_columns (GH2852_)
     - table writing performance improvements.
 <<<<<<< HEAD
+<<<<<<< HEAD
 =======
     - support python3 (via ``PyTables 3.0.0``)
 >>>>>>> 116ab91... DOC: docstring/release notes updates for py3k
+=======
+    - support py3 (via ``PyTables 3.0.0``)
+>>>>>>> ab16d43... ENH: partial py3k support
   - Add modulo operator to Series, DataFrame
   - Add ``date`` method to DatetimeIndex
   - Simplified the API and added a describe method to Categorical
@@ -83,21 +87,29 @@ pandas 0.11.1
 
 **API Changes**
 
+<<<<<<< HEAD
 <<<<<<< HEAD
   - When removing an object from a ``HDFStore``, ``remove(key)`` raises
     ``KeyError`` if the key is not a valid store object.
   - In an ``HDFStore``, raise a ``TypeError`` on passing ``where`` or ``columns`` 
     to select with a Storer; these are invalid parameters at this time
 =======
+=======
+>>>>>>> ab16d43... ENH: partial py3k support
   - ``HDFStore``
 
     - When removing an object, ``remove(key)`` raises
       ``KeyError`` if the key is not a valid store object.
     - raise a ``TypeError`` on passing ``where`` or ``columns`` 
       to select with a Storer; these are invalid parameters at this time
+<<<<<<< HEAD
     - can now specify an ``encoding`` option to ``append/put`` 
       to enable alternate encodings
 >>>>>>> 116ab91... DOC: docstring/release notes updates for py3k
+=======
+    - can now specify an ``encoding`` option to ``append`` and ``select`` 
+      to enable alternate encodings
+>>>>>>> ab16d43... ENH: partial py3k support
   - The repr() for (Multi)Index now obeys display.max_seq_items rather
     then numpy threshold print options. (GH3426_, GH3466_)
   - Added mangle_dupe_cols option to read_table/csv, allowing users
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index b4d312d55104f..87590fe65b5bb 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -13,7 +13,8 @@
 
 import numpy as np
 from pandas import (
-    Series, TimeSeries, DataFrame, Panel, Panel4D, Index, MultiIndex, Int64Index
+    Series, TimeSeries, DataFrame, Panel, Panel4D, Index,
+    MultiIndex, Int64Index, Timestamp
 )
 from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
 from pandas.sparse.array import BlockIndex, IntIndex
@@ -27,6 +28,7 @@
 from pandas.core.index import Int64Index, _ensure_index
 import pandas.core.common as com
 from pandas.tools.merge import concat
+from pandas.util import py3compat
 
 import pandas.lib as lib
 import pandas.algos as algos
@@ -37,6 +39,9 @@
 # versioning attribute
 _version = '0.10.1'
 
+# PY3 encoding if we don't specify
+_default_encoding = 'UTF-8'
+
 class IncompatibilityWarning(Warning): pass
 incompatibility_doc = """
 where criteria is being ignored as this version [%s] is too old (or not-defined),
@@ -56,40 +61,40 @@ class PerformanceWarning(Warning): pass
 # map object types
 _TYPE_MAP = {
 
-    Series          : 'series',
-    SparseSeries    : 'sparse_series',
-    TimeSeries      : 'series',
-    DataFrame       : 'frame',
-    SparseDataFrame : 'sparse_frame',
-    Panel           : 'wide',
-    Panel4D         : 'ndim',
-    SparsePanel     : 'sparse_panel'
+    Series          : u'series',
+    SparseSeries    : u'sparse_series',
+    TimeSeries      : u'series',
+    DataFrame       : u'frame',
+    SparseDataFrame : u'sparse_frame',
+    Panel           : u'wide',
+    Panel4D         : u'ndim',
+    SparsePanel     : u'sparse_panel'
 }
 
 # storer class map
 _STORER_MAP = {
-    'TimeSeries'    : 'LegacySeriesStorer',
-    'Series'        : 'LegacySeriesStorer',
-    'DataFrame'     : 'LegacyFrameStorer',
-    'DataMatrix'    : 'LegacyFrameStorer',
-    'series'        : 'SeriesStorer',
-    'sparse_series' : 'SparseSeriesStorer',
-    'frame'         : 'FrameStorer',
-    'sparse_frame'  : 'SparseFrameStorer',
-    'wide'          : 'PanelStorer',
-    'sparse_panel'  : 'SparsePanelStorer',
+    u'TimeSeries'    : 'LegacySeriesStorer',
+    u'Series'        : 'LegacySeriesStorer',
+    u'DataFrame'     : 'LegacyFrameStorer',
+    u'DataMatrix'    : 'LegacyFrameStorer',
+    u'series'        : 'SeriesStorer',
+    u'sparse_series' : 'SparseSeriesStorer',
+    u'frame'         : 'FrameStorer',
+    u'sparse_frame'  : 'SparseFrameStorer',
+    u'wide'          : 'PanelStorer',
+    u'sparse_panel'  : 'SparsePanelStorer',
 }
 
 # table class map
 _TABLE_MAP = {
-    'generic_table'    : 'GenericTable',
-    'appendable_frame'      : 'AppendableFrameTable',
-    'appendable_multiframe' : 'AppendableMultiFrameTable',
-    'appendable_panel' : 'AppendablePanelTable',
-    'appendable_ndim'  : 'AppendableNDimTable',
-    'worm'             : 'WORMTable',
-    'legacy_frame'     : 'LegacyFrameTable',
-    'legacy_panel'     : 'LegacyPanelTable',
+    u'generic_table'    : 'GenericTable',
+    u'appendable_frame'      : 'AppendableFrameTable',
+    u'appendable_multiframe' : 'AppendableMultiFrameTable',
+    u'appendable_panel' : 'AppendablePanelTable',
+    u'appendable_ndim'  : 'AppendableNDimTable',
+    u'worm'             : 'WORMTable',
+    u'legacy_frame'     : 'LegacyFrameTable',
+    u'legacy_panel'     : 'LegacyPanelTable',
 }
 
 # axes map
@@ -201,7 +206,7 @@ class HDFStore(object):
     complevel : int, 1-9, default 0
             If a complib is specified compression will be applied
             where possible
-    complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None
+    complib : {'zliu', 'bzip2', 'lzo', 'blosc', None}, default None
             If complevel is > 0 apply compression to objects written
             in the store wherever possible
     fletcher32 : bool, default False
@@ -694,7 +699,7 @@ def create_table_index(self, key, **kwargs):
     def groups(self):
         """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """
         _tables()
-        return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != 'table') ]
+        return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u'table') ]
 
     def get_node(self, key):
         """ return the node with the key or None if it does not exist """
@@ -772,8 +777,8 @@ def error(t):
 
                 _tables()
                 if getattr(group,'table',None) or isinstance(group,_table_mod.table.Table):
-                    pt = 'frame_table'
-                    tt = 'generic_table'
+                    pt = u'frame_table'
+                    tt = u'generic_table'
                 else:
                     raise TypeError("cannot create a storer if the object is not existing nor a value are passed")
             else:
@@ -785,10 +790,10 @@ def error(t):
 
                 # we are actually a table
                 if table or append:
-                    pt += '_table'
+                    pt += u'_table'
 
         # a storer node
-        if 'table' not in pt:
+        if u'table' not in pt:
             try:
                 return globals()[_STORER_MAP[pt]](self, group, **kwargs)
             except:
@@ -800,26 +805,26 @@ def error(t):
             # if we are a writer, determin the tt
             if value is not None:
 
-                if pt == 'frame_table':
+                if pt == u'frame_table':
                     index = getattr(value,'index',None)
                     if index is not None:
                         if index.nlevels == 1:
-                            tt = 'appendable_frame'
+                            tt = u'appendable_frame'
                         elif index.nlevels > 1:
-                            tt = 'appendable_multiframe'
-                elif pt == 'wide_table':
-                    tt  = 'appendable_panel'
-                elif pt == 'ndim_table':
-                    tt = 'appendable_ndim'
+                            tt = u'appendable_multiframe'
+                elif pt == u'wide_table':
+                    tt  = u'appendable_panel'
+                elif pt == u'ndim_table':
+                    tt = u'appendable_ndim'
 
             else:
                 
                 # distiguish between a frame/table
-                tt = 'legacy_panel'
+                tt = u'legacy_panel'
                 try:
                     fields = group.table._v_attrs.fields
-                    if len(fields) == 1 and fields[0] == 'value':
-                        tt = 'legacy_frame'
+                    if len(fields) == 1 and fields[0] == u'value':
+                        tt = u'legacy_frame'
                 except:
                     pass
 
@@ -892,7 +897,7 @@ class TableIterator(object):
 
     def __init__(self, func, nrows, start=None, stop=None, chunksize=None):
         self.func   = func
-        self.nrows = nrows
+        self.nrows = nrows or 0
         self.start = start or 0
 
         if stop is None:
@@ -1017,7 +1022,7 @@ def infer(self, table):
         new_self.get_attr()
         return new_self
 
-    def convert(self, values, nan_rep):
+    def convert(self, values, nan_rep, encoding):
         """ set the values from this selection: take = take ownership """
         try:
             values = values[self.cname]
@@ -1032,13 +1037,13 @@ def convert(self, values, nan_rep):
         if self.index_name is not None:
             kwargs['name'] = self.index_name
         try:
-            self.values = Index(_maybe_convert(values, self.kind), **kwargs)
+            self.values = Index(_maybe_convert(values, self.kind, encoding), **kwargs)
         except:
 
             # if the output freq is different that what we recorded, then infer it
             if 'freq' in kwargs:
                 kwargs['freq'] = 'infer'
-            self.values = Index(_maybe_convert(values, self.kind), **kwargs)
+            self.values = Index(_maybe_convert(values, self.kind, encoding), **kwargs)
         return self
 
     def take_data(self):
@@ -1070,7 +1075,7 @@ def __iter__(self):
     def maybe_set_size(self, min_itemsize=None, **kwargs):
         """ maybe set a string col itemsize:
                min_itemsize can be an interger or a dict with this columns name with an integer size """
-        if self.kind == 'string':
+        if self.kind == u'string':
 
             if isinstance(min_itemsize, dict):
                 min_itemsize = min_itemsize.get(self.name)
@@ -1090,7 +1095,7 @@ def validate_col(self, itemsize=None):
 
         # validate this column for string truncation (or reset to the max size)
         dtype = getattr(self, 'dtype', None)
-        if self.kind == 'string':
+        if self.kind == u'string':
 
             c = self.col
             if c is not None:
@@ -1169,7 +1174,7 @@ class GenericIndexCol(IndexCol):
     def is_indexed(self):
         return False
 
-    def convert(self, values, nan_rep):
+    def convert(self, values, nan_rep, encoding):
         """ set the values from this selection: take = take ownership """
         
         self.values = Int64Index(np.arange(self.table.nrows))
@@ -1248,22 +1253,25 @@ def take_data(self):
     def set_kind(self):
         # set my kind if we can
         if self.dtype is not None:
-            if self.dtype.startswith('string'):
+            dtype = self.dtype
+            if dtype.startswith(u'string') or dtype.startswith(u'bytes'):
                 self.kind = 'string'
-            elif self.dtype.startswith('float'):
+            elif dtype.startswith(u'float'):
                 self.kind = 'float'
-            elif self.dtype.startswith('int'):
+            elif dtype.startswith(u'int') or dtype.startswith(u'uint'):
                 self.kind = 'integer'
-            elif self.dtype.startswith('date'):
+            elif dtype.startswith(u'date'):
                 self.kind = 'datetime'
-            elif self.dtype.startswith('bool'):
+            elif dtype.startswith(u'bool'):
                 self.kind = 'bool'
+            else:
+                raise AssertionError("cannot interpret dtype of [%s] in [%s]" % (dtype,self))
 
             # set my typ if we need
             if self.typ is None:
                 self.typ = getattr(self.description,self.cname,None)
 
-    def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, **kwargs):
+    def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=None, **kwargs):
         """ create and setup my atom from the block b """
 
         self.values = list(block.items)
@@ -1306,7 +1314,7 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, **kwargs):
         # this is basically a catchall; if say a datetime64 has nans then will
         # end up here ###
         elif inferred_type == 'string' or dtype == 'object':
-            self.set_atom_string(block, existing_col, min_itemsize, nan_rep)
+            self.set_atom_string(block, existing_col, min_itemsize, nan_rep, encoding)
         else:
             self.set_atom_data(block)
 
@@ -1315,7 +1323,7 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, **kwargs):
     def get_atom_string(self, block, itemsize):
         return _tables().StringCol(itemsize=itemsize, shape=block.shape[0])
 
-    def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
+    def set_atom_string(self, block, existing_col, min_itemsize, nan_rep, encoding):
         # fill nan items with myself
         block = block.fillna(nan_rep)
         data  = block.values
@@ -1336,7 +1344,7 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
 
 
         # itemsize is the maximum length of a string (along any dimension)
-        itemsize = lib.max_len_string_array(data.ravel())
+        itemsize = lib.max_len_string_array(com._ensure_object(data.ravel()))
 
         # specified min_itemsize?
         if isinstance(min_itemsize, dict):
@@ -1353,10 +1361,10 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep):
         self.itemsize = itemsize
         self.kind = 'string'
         self.typ = self.get_atom_string(block, itemsize)
-        self.set_data(self.convert_string_data(data, itemsize))
+        self.set_data(self.convert_string_data(data, itemsize, encoding))
 
-    def convert_string_data(self, data, itemsize):
-        return data.astype('S%s' % itemsize)
+    def convert_string_data(self, data, itemsize, encoding):
+        return _convert_string_array(data, encoding, itemsize)
 
     def get_atom_coltype(self):
         """ return the PyTables column class for this column """
@@ -1409,7 +1417,7 @@ def validate_attr(self, append):
                 raise ValueError("appended items dtype do not match existing items dtype"
                                 " in table!")
 
-    def convert(self, values, nan_rep):
+    def convert(self, values, nan_rep, encoding):
         """ set the data from this selection (and convert to the correct dtype if we can) """
         try:
             values = values[self.cname]
@@ -1421,7 +1429,7 @@ def convert(self, values, nan_rep):
         if self.dtype is not None:
 
             # reverse converts
-            if self.dtype == 'datetime64':
+            if self.dtype == u'datetime64':
                 # recreate the timezone
                 if self.tz is not None:
 
@@ -1434,10 +1442,10 @@ def convert(self, values, nan_rep):
                 else:
                     self.data = np.asarray(self.data, dtype='M8[ns]')
 
-            elif self.dtype == 'date':
+            elif self.dtype == u'date':
                 self.data = np.array(
                     [date.fromtimestamp(v) for v in self.data], dtype=object)
-            elif self.dtype == 'datetime':
+            elif self.dtype == u'datetime':
                 self.data = np.array(
                     [datetime.fromtimestamp(v) for v in self.data],
                     dtype=object)
@@ -1448,16 +1456,16 @@ def convert(self, values, nan_rep):
                 except:
                     self.data = self.data.astype('O')
 
-        # convert nans
-        if self.kind == 'string':
-            self.data = lib.array_replace_from_nan_rep(
-                self.data.ravel(), nan_rep).reshape(self.data.shape)
+        # convert nans / decode
+        if self.kind == u'string':
+            self.data = _unconvert_string_array(self.data, nan_rep=nan_rep, encoding=encoding)
+
         return self
 
     def get_attr(self):
         """ get the data for this colummn """
         self.values = getattr(self.attrs, self.kind_attr, None)
-        self.dtype = getattr(self.attrs, self.dtype_attr, None)
+        self.dtype  = getattr(self.attrs, self.dtype_attr, None)
         self.set_kind()
 
     def set_attr(self):
@@ -1473,7 +1481,7 @@ class DataIndexableCol(DataCol):
 
     @property
     def is_searchable(self):
-        return self.kind == 'string'
+        return self.kind == u'string'
 
     def get_atom_string(self, block, itemsize):
         return _tables().StringCol(itemsize=itemsize)
@@ -1702,7 +1710,7 @@ def read_array(self, key):
             else:
                 ret = data
 
-            if dtype == 'datetime64':
+            if dtype == u'datetime64':
                 ret = np.array(ret, dtype='M8[ns]')
 
         if transposed:
@@ -1713,13 +1721,13 @@ def read_array(self, key):
     def read_index(self, key):
         variety = getattr(self.attrs, '%s_variety' % key)
 
-        if variety == 'multi':
+        if variety == u'multi':
             return self.read_multi_index(key)
-        elif variety == 'block':
+        elif variety == u'block':
             return self.read_block_index(key)
-        elif variety == 'sparseint':
+        elif variety == u'sparseint':
             return self.read_sparse_intindex(key)
-        elif variety == 'regular':
+        elif variety == u'regular':
             _, index = self.read_index_node(getattr(self.group, key))
             return index
         else:  # pragma: no cover
@@ -1979,7 +1987,7 @@ def read(self, **kwargs):
         sp_values = self.read_array('sp_values')
         sp_index = self.read_index('sp_index')
         return SparseSeries(sp_values, index=index, sparse_index=sp_index,
-                            kind=self.kind or 'block', fill_value=self.fill_value,
+                            kind=self.kind or u'block', fill_value=self.fill_value,
                             name=self.name)
 
     def write(self, obj, **kwargs):
@@ -2167,6 +2175,7 @@ def __init__(self, *args, **kwargs):
         self.data_columns   = []
         self.info           = dict()
         self.nan_rep        = None
+        self.encoding       = None
         self.selection      = None
 
     @property
@@ -2227,7 +2236,7 @@ def nrows_expected(self):
     @property
     def is_exists(self):
         """ has this table been created """
-        return 'table' in self.group
+        return u'table' in self.group
 
     @property
     def storable(self):
@@ -2293,6 +2302,7 @@ def set_attrs(self):
         self.attrs.non_index_axes = self.non_index_axes
         self.attrs.data_columns = self.data_columns
         self.attrs.nan_rep      = self.nan_rep
+        self.attrs.encoding     = self.encoding
         self.attrs.levels       = self.levels
         self.set_info()
 
@@ -2302,6 +2312,7 @@ def get_attrs(self):
         self.data_columns     = getattr(self.attrs,'data_columns',None)   or []
         self.info             = getattr(self.attrs,'info',None)           or dict()
         self.nan_rep          = getattr(self.attrs,'nan_rep',None)
+        self.encoding         = getattr(self.attrs,'encoding',None)
         self.levels           = getattr(self.attrs,'levels',None)         or []
         t = self.table
         self.index_axes       = [ a.infer(t) for a in self.indexables if     a.is_an_indexable ]
@@ -2432,7 +2443,7 @@ def read_axes(self, where, **kwargs):
         # convert the data
         for a in self.axes:
             a.set_info(self.info)
-            a.convert(values, nan_rep=self.nan_rep)
+            a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding)
 
         return True
 
@@ -2464,7 +2475,7 @@ def validate_data_columns(self, data_columns, min_itemsize):
         # return valid columns in the order of our axis
         return [c for c in data_columns if c in axis_labels]
 
-    def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, **kwargs):
+    def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, encoding=None, **kwargs):
         """ create and return the axes
               leagcy tables create an indexable column, indexable index, non-indexable fields
 
@@ -2475,6 +2486,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
             validate: validate the obj against an existiing object already written
             min_itemsize: a dict of the min size for a column in bytes
             nan_rep : a values to use for string column nan_rep
+            encoding : the encoding for string values
             data_columns : a list of columns that we want to create separate to allow indexing (or True will force all colummns)
 
         """
@@ -2497,6 +2509,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
             axes         = [ a.axis for a in existing_table.index_axes]
             data_columns = existing_table.data_columns
             nan_rep      = existing_table.nan_rep
+            encoding     = existing_table.encoding
             self.info    = copy.copy(existing_table.info)
         else:
             existing_table = None
@@ -2509,9 +2522,16 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
         self.non_index_axes = []
         self.data_columns = []
 
+        # encoding
+        if encoding is None:
+            if py3compat.PY3:
+                encoding = _default_encoding
+        self.encoding = encoding
+
         # nan_representation
         if nan_rep is None:
             nan_rep = 'nan'
+            
         self.nan_rep = nan_rep
 
         # create axes to index and non_index
@@ -2521,7 +2541,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
             if i in axes:
                 name = obj._AXIS_NAMES[i]
                 index_axes_map[i] = _convert_index(
-                    a).set_name(name).set_axis(i)
+                    a, self.encoding).set_name(name).set_axis(i)
             else:
 
                 # we might be able to change the axes on the appending data if
@@ -2597,6 +2617,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                              existing_col=existing_col,
                              min_itemsize=min_itemsize,
                              nan_rep=nan_rep,
+                             encoding=encoding,
                              info=self.info,
                              **kwargs)
                 col.set_pos(j)
@@ -2718,7 +2739,7 @@ def read_column(self, column, where = None, **kwargs):
                 # column must be an indexable or a data column
                 c = getattr(self.table.cols, column)
                 a.set_info(self.info)
-                return Series(a.convert(c[:], nan_rep=self.nan_rep).take_data())
+                return Series(a.convert(c[:], nan_rep=self.nan_rep, encoding=self.encoding).take_data())
 
         raise KeyError("column [%s] not found in the table" % column)
 
@@ -2863,14 +2884,15 @@ class AppendableTable(LegacyTable):
 
     def write(self, obj, axes=None, append=False, complib=None,
               complevel=None, fletcher32=None, min_itemsize=None, chunksize=None,
-              expectedrows=None, **kwargs):
+              expectedrows=None, encoding=None, **kwargs):
 
         if not append and self.is_exists:
             self._handle.removeNode(self.group, 'table')
 
         # create the axes
         self.create_axes(axes=axes, obj=obj, validate=append,
-                         min_itemsize=min_itemsize, **kwargs)
+                         min_itemsize=min_itemsize, encoding=encoding,
+                         **kwargs)
 
         if not self.is_exists:
 
@@ -3173,7 +3195,7 @@ class AppendableNDimTable(AppendablePanelTable):
     ndim = 4
     obj_type = Panel4D
 
-def _convert_index(index):
+def _convert_index(index, encoding=None):
     index_name = getattr(index,'name',None)
 
     if isinstance(index, DatetimeIndex):
@@ -3213,7 +3235,7 @@ def _convert_index(index):
         # atom = _tables().ObjectAtom()
         # return np.asarray(values, dtype='O'), 'object', atom
 
-        converted = np.array(list(values), dtype=np.str_)
+        converted = _convert_string_array(values, encoding)
         itemsize = converted.dtype.itemsize
         return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
                         index_name=index_name)
@@ -3236,47 +3258,77 @@ def _convert_index(index):
                         index_name=index_name)
 
 def _unconvert_index(data, kind):
-    if kind == 'datetime64':
+    if kind == u'datetime64':
         index = DatetimeIndex(data)
-    elif kind == 'datetime':
+    elif kind == u'datetime':
         index = np.array([datetime.fromtimestamp(v) for v in data],
                          dtype=object)
-    elif kind == 'date':
+    elif kind == u'date':
         index = np.array([date.fromtimestamp(v) for v in data], dtype=object)
-    elif kind in ('string', 'integer', 'float'):
+    elif kind in (u'string', u'integer', u'float'):
         index = np.array(data)
-    elif kind == 'object':
+    elif kind == u'object':
         index = np.array(data[0])
     else:  # pragma: no cover
         raise ValueError('unrecognized index type %s' % kind)
     return index
 
 def _unconvert_index_legacy(data, kind, legacy=False):
-    if kind == 'datetime':
+    if kind == u'datetime':
         index = lib.time64_to_datetime(data)
-    elif kind in ('string', 'integer'):
+    elif kind in (u'string', u'integer'):
         index = np.array(data, dtype=object)
     else:  # pragma: no cover
         raise ValueError('unrecognized index type %s' % kind)
     return index
 
-def _maybe_convert(values, val_kind):
+def _convert_string_array(data, encoding, itemsize=None):
+
+    # encode if needed
+    if encoding is not None:
+        f = np.vectorize(lambda x: x.encode(encoding))
+        data = f(data)
+
+    # create the sized dtype
+    if itemsize is None:
+        itemsize = lib.max_len_string_array(com._ensure_object(data.ravel()))
+
+    data = np.array(data,dtype="S%d" % itemsize)
+    return data
+
+def _unconvert_string_array(data, nan_rep=None, encoding=None):
+    """ deserialize a string array, possibly decoding """
+    shape = data.shape
+    data = np.array(data.ravel(),dtype=object)
+    if encoding is not None:
+        f = np.vectorize(lambda x: x.decode(encoding),otypes=[np.object])
+        data = f(data)
+
+    if nan_rep is None:
+        nan_rep = 'nan'
+            
+    data = lib.string_array_replace_from_nan_rep(data, nan_rep)
+    return data.reshape(shape)
+
+def _maybe_convert(values, val_kind, encoding):
     if _need_convert(val_kind):
-        conv = _get_converter(val_kind)
+        conv = _get_converter(val_kind, encoding)
         # conv = np.frompyfunc(conv, 1, 1)
         values = conv(values)
     return values
 
-def _get_converter(kind):
+def _get_converter(kind, encoding):
     if kind == 'datetime64':
         return lambda x: np.array(x, dtype='M8[ns]')
-    if kind == 'datetime':
+    elif kind == 'datetime':
         return lib.convert_timestamps
+    elif kind == 'string':
+        return lambda x: _unconvert_string_array(x,encoding=encoding)
     else:  # pragma: no cover
         raise ValueError('invalid kind %s' % kind)
 
 def _need_convert(kind):
-    if kind in ('datetime', 'datetime64'):
+    if kind in ('datetime', 'datetime64', 'string'):
         return True
     return False
 
@@ -3290,6 +3342,7 @@ class Term(object):
             >, >=, <, <=, =, != (not equal) are allowed
     value : a value or list of values (required)
     queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
+    encoding : an encoding that will encode the query terms
 
     Returns
     -------
@@ -3303,14 +3356,14 @@ class Term(object):
     >>> Term('index', ['20121114','20121114'])
     >>> Term('index', datetime(2012,11,14))
     >>> Term('major_axis>20121114')
-    >>> Term('minor_axis', ['A','B'])
+    >>> Term('minor_axis', ['A','U'])
     """
 
     _ops = ['<=', '<', '>=', '>', '!=', '==', '=']
     _search = re.compile("^\s*(?P<field>\w+)\s*(?P<op>%s)\s*(?P<value>.+)\s*$" % '|'.join(_ops))
     _max_selectors = 31
 
-    def __init__(self, field, op=None, value=None, queryables=None):
+    def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
         self.field = None
         self.op = None
         self.value = None
@@ -3318,6 +3371,12 @@ def __init__(self, field, op=None, value=None, queryables=None):
         self.filter = None
         self.condition = None
 
+        if py3compat.PY3:
+            if encoding is None:
+                encoding = _default_encoding
+
+        self.encoding = encoding
+
         # unpack lists/tuples in field
         while(isinstance(field, (tuple, list))):
             f = field
@@ -3446,7 +3505,7 @@ def eval(self):
 
                 self.condition = '(%s %s %s)' % (
                     self.field, self.op, values[0][0])
-
+                        
             else:
 
                 raise TypeError("passing a filterable condition to a non-table indexer [%s]" % str(self))
@@ -3454,32 +3513,39 @@ def eval(self):
     def convert_value(self, v):
         """ convert the expression that is in the term to something that is accepted by pytables """
 
-        if self.kind == 'datetime64' or self.kind == 'datetime' :
+        def stringify(value):
+            value = str(value)
+            if self.encoding is not None:
+                value = value.encode(self.encoding)
+            return value
+
+        kind = self.kind
+        if kind == u'datetime64' or kind == u'datetime' :
             v = lib.Timestamp(v)
             if v.tz is not None:
                 v = v.tz_convert('UTC')
             return [v.value, v]
-        elif isinstance(v, datetime) or hasattr(v, 'timetuple') or self.kind == 'date':
+        elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
             v = time.mktime(v.timetuple())
             return [v, Timestamp(v) ]
-        elif self.kind == 'integer':
+        elif kind == u'integer':
             v = int(float(v))
             return [v, v]
-        elif self.kind == 'float':
+        elif kind == u'float':
             v = float(v)
             return [v, v]
-        elif self.kind == 'bool':
+        elif kind == u'bool':
             if isinstance(v, basestring):
-                v = not str(v).strip().lower() in ["false", "f", "no", "n", "none", "0", "[]", "{}", ""]
+                v = not stringify(v).strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
             else:
                 v = bool(v)
             return [v, v]
         elif not isinstance(v, basestring):
-            v = str(v)
+            v = stringify(v)
             return [v, v]
 
         # string quoting
-        return ["'" + v + "'", v]
+        return [stringify("'" + v + "'"), stringify(v)]
 
 
 class Coordinates(object):
@@ -3533,6 +3599,8 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
                 conds = [t.condition for t in self.terms if t.condition is not None]
                 if len(conds):
                     self.condition = "(%s)" % ' & '.join(conds)
+                    #if self.table.encoding is not None:
+                    #    self.condition = self.condition.encode(self.table.encoding)
                 self.filter = []
                 for t in self.terms:
                     if t.filter is not None:
@@ -3555,7 +3623,7 @@ def generate(self, where):
                     where = [where]
 
         queryables = self.table.queryables()
-        return [Term(c, queryables=queryables) for c in where]
+        return [Term(c, queryables=queryables, encoding=self.table.encoding) for c in where]
 
     def select(self):
         """
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index d0f03774f2070..7a7bca02b1cd2 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -474,6 +474,20 @@ def test_append(self):
             store.append('uints', uint_data, data_columns=['u08','u16','u32']) # 64-bit indices not yet supported
             tm.assert_frame_equal(store['uints'], uint_data)
 
+    def test_encoding(self):
+        
+        with ensure_clean(self.path) as store:
+            df = DataFrame(dict(A='foo',B='bar'),index=range(5))
+            df.loc[2,'A'] = np.nan
+            df.loc[3,'B'] = np.nan
+            _maybe_remove(store, 'df')
+            store.append('df', df, encoding='ascii')
+            tm.assert_frame_equal(store['df'], df)
+
+            expected = df.reindex(columns=['A'])
+            result = store.select('df',Term('columns=A',encoding='ascii'))
+            tm.assert_frame_equal(result,expected)
+
     def test_append_some_nans(self):
 
         with ensure_clean(self.path) as store:
@@ -556,6 +570,7 @@ def test_append_some_nans(self):
     def test_append_frame_column_oriented(self):
 
         with ensure_clean(self.path) as store:
+            import pdb; pdb.set_trace()
             # column oriented
             df = tm.makeTimeDataFrame()
             _maybe_remove(store, 'df1')
diff --git a/pandas/lib.pyx b/pandas/lib.pyx
index 15791a984ecc5..a80ad5b7d0208 100644
--- a/pandas/lib.pyx
+++ b/pandas/lib.pyx
@@ -14,6 +14,7 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
                       Py_INCREF, PyTuple_SET_ITEM,
                       PyList_Check, PyFloat_Check,
                       PyString_Check,
+		      PyBytes_Check,
                       PyTuple_SetItem,
                       PyTuple_New,
                       PyObject_SetAttrString)
@@ -762,7 +763,7 @@ def max_len_string_array(ndarray[object, ndim=1] arr):
     m = 0
     for i from 0 <= i < length:
         v = arr[i]
-        if PyString_Check(v):
+        if PyString_Check(v) or PyBytes_Check(v):
             l = len(v)
 
             if l > m:
@@ -772,11 +773,10 @@ def max_len_string_array(ndarray[object, ndim=1] arr):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def array_replace_from_nan_rep(ndarray[object, ndim=1] arr, object nan_rep, object replace = None):
+def string_array_replace_from_nan_rep(ndarray[object, ndim=1] arr, object nan_rep, object replace = None):
     """ replace the values in the array with replacement if they are nan_rep; return the same array """
 
-    cdef int length = arr.shape[0]
-    cdef int i = 0
+    cdef int length = arr.shape[0], i = 0
     if replace is None:
         replace = np.nan
 
@@ -788,7 +788,6 @@ def array_replace_from_nan_rep(ndarray[object, ndim=1] arr, object nan_rep, obje
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-
 def write_csv_rows(list data, list data_index, int nlevels, list cols, object writer):
 
     cdef int N, j, i, ncols

From 8bbfb2e3e18f59860d845c48d6fdf29d22f42877 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Tue, 4 Jun 2013 22:04:28 -0400
Subject: [PATCH 4/7] ENH: added TermValue, and do readWhere with condvars

---
 pandas/io/pytables.py            | 248 ++++++++++++++++++-------------
 pandas/io/tests/test_pytables.py |   6 +-
 2 files changed, 152 insertions(+), 102 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 87590fe65b5bb..b36985d65569a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -19,7 +19,7 @@
 from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
 from pandas.sparse.array import BlockIndex, IntIndex
 from pandas.tseries.api import PeriodIndex, DatetimeIndex
-from pandas.core.common import adjoin, isnull
+from pandas.core.common import adjoin, isnull, is_list_like
 from pandas.core.algorithms import match, unique, factorize
 from pandas.core.categorical import Categorical
 from pandas.core.common import _asarray_tuplesafe, _try_sort
@@ -42,6 +42,18 @@
 # PY3 encoding if we don't specify
 _default_encoding = 'UTF-8'
 
+def _ensure_decoded(s):
+    """ if we have bytes, decode them to unicde """
+    if isinstance(s, np.bytes_):
+        s = s.decode('UTF-8')
+    return s
+def _ensure_encoding(encoding):
+    # set the encoding if we need
+    if encoding is None:
+        if py3compat.PY3:
+            encoding = _default_encoding
+    return encoding
+
 class IncompatibilityWarning(Warning): pass
 incompatibility_doc = """
 where criteria is being ignored as this version [%s] is too old (or not-defined),
@@ -768,8 +780,8 @@ def error(t):
             raise TypeError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % 
                             (t,group,type(value),table,append,kwargs))
         
-        pt = getattr(group._v_attrs,'pandas_type',None)
-        tt = getattr(group._v_attrs,'table_type',None)
+        pt = _ensure_decoded(getattr(group._v_attrs,'pandas_type',None))
+        tt = _ensure_decoded(getattr(group._v_attrs,'table_type',None))
 
         # infer the pt from the passed value
         if pt is None:
@@ -833,7 +845,7 @@ def error(t):
         except:
             error('_TABLE_MAP')
 
-    def _write_to_group(self, key, value, index=True, table=False, append=False, complib=None, **kwargs):
+    def _write_to_group(self, key, value, index=True, table=False, append=False, complib=None, encoding=None, **kwargs):
         group = self.get_node(key)
 
         # remove the node if we are not appending
@@ -858,7 +870,7 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com
                     group = self._handle.createGroup(path, p)
                 path = new_path
 
-        s = self._create_storer(group, value, table=table, append=append, **kwargs)
+        s = self._create_storer(group, value, table=table, append=append, encoding=encoding, **kwargs)
         if append:
             # raise if we are trying to append to a non-table,
             #       or a table that exists (and we are putting)
@@ -1075,7 +1087,7 @@ def __iter__(self):
     def maybe_set_size(self, min_itemsize=None, **kwargs):
         """ maybe set a string col itemsize:
                min_itemsize can be an interger or a dict with this columns name with an integer size """
-        if self.kind == u'string':
+        if _ensure_decoded(self.kind) == u'string':
 
             if isinstance(min_itemsize, dict):
                 min_itemsize = min_itemsize.get(self.name)
@@ -1095,7 +1107,7 @@ def validate_col(self, itemsize=None):
 
         # validate this column for string truncation (or reset to the max size)
         dtype = getattr(self, 'dtype', None)
-        if self.kind == u'string':
+        if _ensure_decoded(self.kind) == u'string':
 
             c = self.col
             if c is not None:
@@ -1225,7 +1237,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, data=None, bloc
         super(DataCol, self).__init__(
             values=values, kind=kind, typ=typ, cname=cname, **kwargs)
         self.dtype = None
-        self.dtype_attr = "%s_dtype" % self.name
+        self.dtype_attr = u"%s_dtype" % self.name
         self.set_data(data)
 
     def __repr__(self):
@@ -1253,7 +1265,7 @@ def take_data(self):
     def set_kind(self):
         # set my kind if we can
         if self.dtype is not None:
-            dtype = self.dtype
+            dtype = _ensure_decoded(self.dtype)
             if dtype.startswith(u'string') or dtype.startswith(u'bytes'):
                 self.kind = 'string'
             elif dtype.startswith(u'float'):
@@ -1427,9 +1439,10 @@ def convert(self, values, nan_rep, encoding):
 
         # convert to the correct dtype
         if self.dtype is not None:
+            dtype = _ensure_decoded(self.dtype)
 
             # reverse converts
-            if self.dtype == u'datetime64':
+            if dtype == u'datetime64':
                 # recreate the timezone
                 if self.tz is not None:
 
@@ -1442,22 +1455,22 @@ def convert(self, values, nan_rep, encoding):
                 else:
                     self.data = np.asarray(self.data, dtype='M8[ns]')
 
-            elif self.dtype == u'date':
+            elif dtype == u'date':
                 self.data = np.array(
                     [date.fromtimestamp(v) for v in self.data], dtype=object)
-            elif self.dtype == u'datetime':
+            elif dtype == u'datetime':
                 self.data = np.array(
                     [datetime.fromtimestamp(v) for v in self.data],
                     dtype=object)
             else:
 
                 try:
-                    self.data = self.data.astype(self.dtype)
+                    self.data = self.data.astype(dtype)
                 except:
                     self.data = self.data.astype('O')
 
         # convert nans / decode
-        if self.kind == u'string':
+        if _ensure_decoded(self.kind) == u'string':
             self.data = _unconvert_string_array(self.data, nan_rep=nan_rep, encoding=encoding)
 
         return self
@@ -1481,7 +1494,7 @@ class DataIndexableCol(DataCol):
 
     @property
     def is_searchable(self):
-        return self.kind == u'string'
+        return _ensure_decoded(self.kind) == u'string'
 
     def get_atom_string(self, block, itemsize):
         return _tables().StringCol(itemsize=itemsize)
@@ -1514,9 +1527,10 @@ class Storer(object):
     ndim        = None
     is_table    = False
 
-    def __init__(self, parent, group, **kwargs):
+    def __init__(self, parent, group, encoding=None, **kwargs):
         self.parent      = parent
         self.group       = group
+        self.encoding    = _ensure_encoding(encoding)
         self.set_version()
 
     @property
@@ -1525,7 +1539,7 @@ def is_old_version(self):
 
     def set_version(self):
         """ compute and set our version """
-        version = getattr(self.group._v_attrs,'pandas_version',None)
+        version = _ensure_decoded(getattr(self.group._v_attrs,'pandas_version',None))
         try:
             self.version = tuple([int(x) for x in version.split('.')])
             if len(self.version) == 2:
@@ -1535,7 +1549,7 @@ def set_version(self):
 
     @property
     def pandas_type(self):
-        return getattr(self.group._v_attrs, 'pandas_type', None)
+        return _ensure_decoded(getattr(self.group._v_attrs, 'pandas_type', None))
 
     def __repr__(self):
         """ return a pretty representatgion of myself """
@@ -1684,11 +1698,19 @@ def validate_read(self, kwargs):
     def is_exists(self):
         return True
 
+    def set_attrs(self):
+        """ set our object attributes """
+        self.attrs.encoding = self.encoding
+
     def get_attrs(self):
         """ retrieve our attributes """
+        self.encoding = _ensure_encoding(getattr(self.attrs,'encoding',None))
         for n in self.attributes:
             setattr(self,n,getattr(self.attrs, n, None))
 
+    def write(self, obj, **kwargs):
+        self.set_attrs()
+
     def read_array(self, key):
         """ read an array for the specified node (off of group """
         import tables
@@ -1719,7 +1741,7 @@ def read_array(self, key):
             return ret
 
     def read_index(self, key):
-        variety = getattr(self.attrs, '%s_variety' % key)
+        variety = _ensure_decoded(getattr(self.attrs, '%s_variety' % key))
 
         if variety == u'multi':
             return self.read_multi_index(key)
@@ -1745,7 +1767,7 @@ def write_index(self, key, index):
             self.write_sparse_intindex(key, index)
         else:
             setattr(self.attrs, '%s_variety' % key, 'regular')
-            converted = _convert_index(index).set_name('index')
+            converted = _convert_index(index,self.encoding).set_name('index')
             self.write_array(key, converted.values)
             node = getattr(self.group, key)
             node._v_attrs.kind = converted.kind
@@ -1792,7 +1814,7 @@ def write_multi_index(self, key, index):
                                                  index.names)):
             # write the level
             level_key = '%s_level%d' % (key, i)
-            conv_level = _convert_index(lev).set_name(level_key)
+            conv_level = _convert_index(lev, self.encoding).set_name(level_key)
             self.write_array(level_key, conv_level.values)
             node = getattr(self.group, level_key)
             node._v_attrs.kind = conv_level.kind
@@ -1843,16 +1865,15 @@ def read_index_node(self, node):
             kwargs['tz'] = node._v_attrs['tz']
 
         if kind in ('date', 'datetime'):
-            index = factory(_unconvert_index(data, kind), dtype=object,
+            index = factory(_unconvert_index(data, kind, encoding=self.encoding), dtype=object,
                             **kwargs)
         else:
-            index = factory(_unconvert_index(data, kind), **kwargs)
+            index = factory(_unconvert_index(data, kind, encoding=self.encoding), **kwargs)
 
         index.name = name
 
         return name, index
 
-
     def write_array_empty(self, key, value):
         """ write a 0-len array """
 
@@ -1932,7 +1953,7 @@ def read_index_legacy(self, key):
         node = getattr(self.group,key)
         data = node[:]
         kind = node._v_attrs.kind
-        return _unconvert_index_legacy(data, kind)
+        return _unconvert_index_legacy(data, kind, encoding=self.encoding)
 
 class LegacySeriesStorer(LegacyStorer):
 
@@ -1952,7 +1973,7 @@ def read(self, **kwargs):
         return DataFrame(values, index=index, columns=columns)
 
 class SeriesStorer(GenericStorer):
-    pandas_kind = 'series'
+    pandas_kind = u'series'
     attributes = ['name']
 
     @property
@@ -1973,12 +1994,13 @@ def read(self, **kwargs):
         return Series(values, index=index, name=self.name)
 
     def write(self, obj, **kwargs):
+        super(SeriesStorer, self).write(obj, **kwargs)
         self.write_index('index', obj.index)
         self.write_array('values', obj.values)
         self.attrs.name = obj.name
 
 class SparseSeriesStorer(GenericStorer):
-    pandas_kind = 'sparse_series'
+    pandas_kind = u'sparse_series'
     attributes = ['name','fill_value','kind']
 
     def read(self, **kwargs):
@@ -1991,6 +2013,7 @@ def read(self, **kwargs):
                             name=self.name)
 
     def write(self, obj, **kwargs):
+        super(SparseSeriesStorer, self).write(obj, **kwargs)
         self.write_index('index', obj.index)
         self.write_index('sp_index', obj.sp_index)
         self.write_array('sp_values', obj.sp_values)
@@ -1999,7 +2022,7 @@ def write(self, obj, **kwargs):
         self.attrs.kind = obj.kind
 
 class SparseFrameStorer(GenericStorer):
-    pandas_kind = 'sparse_frame'
+    pandas_kind = u'sparse_frame'
     attributes = ['default_kind','default_fill_value']
 
     def read(self, **kwargs):
@@ -2017,6 +2040,7 @@ def read(self, **kwargs):
 
     def write(self, obj, **kwargs):
         """ write it as a collection of individual sparse series """
+        super(SparseFrameStorer, self).write(obj, **kwargs)
         for name, ss in obj.iteritems():
             key = 'sparse_series_%s' % name
             if key not in self.group._v_children:
@@ -2030,7 +2054,7 @@ def write(self, obj, **kwargs):
         self.write_index('columns', obj.columns)
 
 class SparsePanelStorer(GenericStorer):
-    pandas_kind = 'sparse_panel'
+    pandas_kind = u'sparse_panel'
     attributes = ['default_kind','default_fill_value']
 
     def read(self, **kwargs):
@@ -2048,6 +2072,7 @@ def read(self, **kwargs):
                            default_fill_value=self.default_fill_value)
 
     def write(self, obj, **kwargs):
+        super(SparsePanelStorer, self).write(obj, **kwargs)
         self.attrs.default_fill_value = obj.default_fill_value
         self.attrs.default_kind       = obj.default_kind
         self.write_index('items', obj.items)
@@ -2115,6 +2140,7 @@ def read(self, **kwargs):
         return self.obj_type(BlockManager(blocks, axes))
 
     def write(self, obj, **kwargs):
+        super(BlockManagerStorer, self).write(obj, **kwargs)
         data = obj._data
         if not data.is_consolidated():
             data = data.consolidate()
@@ -2132,11 +2158,11 @@ def write(self, obj, **kwargs):
             self.write_index('block%d_items' % i, blk.items)
 
 class FrameStorer(BlockManagerStorer):
-    pandas_kind = 'frame'
+    pandas_kind = u'frame'
     obj_type    = DataFrame
 
 class PanelStorer(BlockManagerStorer):
-    pandas_kind = 'wide'
+    pandas_kind = u'wide'
     obj_type    = Panel
     is_shape_reversed = True
     
@@ -2161,7 +2187,7 @@ class Table(Storer):
         levels        : the names of levels
 
         """
-    pandas_kind = 'wide_table'
+    pandas_kind = u'wide_table'
     table_type  = None
     levels      = 1
     is_table    = True
@@ -2175,7 +2201,6 @@ def __init__(self, *args, **kwargs):
         self.data_columns   = []
         self.info           = dict()
         self.nan_rep        = None
-        self.encoding       = None
         self.selection      = None
 
     @property
@@ -2312,7 +2337,7 @@ def get_attrs(self):
         self.data_columns     = getattr(self.attrs,'data_columns',None)   or []
         self.info             = getattr(self.attrs,'info',None)           or dict()
         self.nan_rep          = getattr(self.attrs,'nan_rep',None)
-        self.encoding         = getattr(self.attrs,'encoding',None)
+        self.encoding         = _ensure_encoding(getattr(self.attrs,'encoding',None))
         self.levels           = getattr(self.attrs,'levels',None)         or []
         t = self.table
         self.index_axes       = [ a.infer(t) for a in self.indexables if     a.is_an_indexable ]
@@ -2475,7 +2500,7 @@ def validate_data_columns(self, data_columns, min_itemsize):
         # return valid columns in the order of our axis
         return [c for c in data_columns if c in axis_labels]
 
-    def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, encoding=None, **kwargs):
+    def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, **kwargs):
         """ create and return the axes
               leagcy tables create an indexable column, indexable index, non-indexable fields
 
@@ -2506,11 +2531,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
         if self.infer_axes():
             existing_table = self.copy()
             existing_table.infer_axes()
-            axes         = [ a.axis for a in existing_table.index_axes]
-            data_columns = existing_table.data_columns
-            nan_rep      = existing_table.nan_rep
-            encoding     = existing_table.encoding
-            self.info    = copy.copy(existing_table.info)
+            axes          = [ a.axis for a in existing_table.index_axes]
+            data_columns  = existing_table.data_columns
+            nan_rep       = existing_table.nan_rep
+            self.encoding = existing_table.encoding
+            self.info     = copy.copy(existing_table.info)
         else:
             existing_table = None
 
@@ -2522,12 +2547,6 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
         self.non_index_axes = []
         self.data_columns = []
 
-        # encoding
-        if encoding is None:
-            if py3compat.PY3:
-                encoding = _default_encoding
-        self.encoding = encoding
-
         # nan_representation
         if nan_rep is None:
             nan_rep = 'nan'
@@ -2617,7 +2636,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                              existing_col=existing_col,
                              min_itemsize=min_itemsize,
                              nan_rep=nan_rep,
-                             encoding=encoding,
+                             encoding=self.encoding,
                              info=self.info,
                              **kwargs)
                 col.set_pos(j)
@@ -2748,7 +2767,7 @@ class WORMTable(Table):
          table. writing is a one-time operation the data are stored in a format
          that allows for searching the data on disk
          """
-    table_type = 'worm'
+    table_type = u'worm'
 
     def read(self, **kwargs):
         """ read the indicies and the indexing array, calculate offset rows and
@@ -2773,7 +2792,7 @@ class LegacyTable(Table):
                    IndexCol(name='column', axis=2,
                             pos=1, index_kind='columns_kind'),
                    DataCol(name='fields', cname='values', kind_attr='fields', pos=2)]
-    table_type = 'legacy'
+    table_type = u'legacy'
     ndim = 3
 
     def write(self, **kwargs):
@@ -2863,8 +2882,8 @@ def read(self, where=None, columns=None, **kwargs):
 
 class LegacyFrameTable(LegacyTable):
     """ support the legacy frame table """
-    pandas_kind = 'frame_table'
-    table_type = 'legacy_frame'
+    pandas_kind = u'frame_table'
+    table_type = u'legacy_frame'
     obj_type = Panel
 
     def read(self, *args, **kwargs):
@@ -2873,25 +2892,25 @@ def read(self, *args, **kwargs):
 
 class LegacyPanelTable(LegacyTable):
     """ support the legacy panel table """
-    table_type = 'legacy_panel'
+    table_type = u'legacy_panel'
     obj_type = Panel
 
 
 class AppendableTable(LegacyTable):
     """ suppor the new appendable table formats """
     _indexables = None
-    table_type = 'appendable'
+    table_type = u'appendable'
 
     def write(self, obj, axes=None, append=False, complib=None,
               complevel=None, fletcher32=None, min_itemsize=None, chunksize=None,
-              expectedrows=None, encoding=None, **kwargs):
+              expectedrows=None, **kwargs):
 
         if not append and self.is_exists:
             self._handle.removeNode(self.group, 'table')
 
         # create the axes
         self.create_axes(axes=axes, obj=obj, validate=append,
-                         min_itemsize=min_itemsize, encoding=encoding,
+                         min_itemsize=min_itemsize,
                          **kwargs)
 
         if not self.is_exists:
@@ -3043,8 +3062,8 @@ def delete(self, where=None, **kwargs):
 
 class AppendableFrameTable(AppendableTable):
     """ suppor the new appendable table formats """
-    pandas_kind = 'frame_table'
-    table_type = 'appendable_frame'
+    pandas_kind = u'frame_table'
+    table_type = u'appendable_frame'
     ndim = 2
     obj_type = DataFrame
     
@@ -3098,8 +3117,8 @@ def read(self, where=None, columns=None, **kwargs):
 
 class GenericTable(AppendableFrameTable):
     """ a table that read/writes the generic pytables table format """
-    pandas_kind = 'frame_table'
-    table_type = 'generic_table'
+    pandas_kind = u'frame_table'
+    table_type = u'generic_table'
     ndim = 2
     obj_type = DataFrame
 
@@ -3143,13 +3162,13 @@ def write(self, **kwargs):
 
 class AppendableMultiFrameTable(AppendableFrameTable):
     """ a frame with a multi-index """
-    table_type = 'appendable_multiframe'
+    table_type = u'appendable_multiframe'
     obj_type = DataFrame
     ndim = 2
 
     @property
     def table_type_short(self):
-        return 'appendable_multi'
+        return u'appendable_multi'
 
     def write(self, obj, data_columns=None, **kwargs):
         if data_columns is None:
@@ -3174,7 +3193,7 @@ def read(self, columns=None, **kwargs):
 
 class AppendablePanelTable(AppendableTable):
     """ suppor the new appendable table formats """
-    table_type = 'appendable_panel'
+    table_type = u'appendable_panel'
     ndim = 3
     obj_type = Panel
 
@@ -3191,7 +3210,7 @@ def is_transposed(self):
 
 class AppendableNDimTable(AppendablePanelTable):
     """ suppor the new appendable table formats """
-    table_type = 'appendable_ndim'
+    table_type = u'appendable_ndim'
     ndim = 4
     obj_type = Panel4D
 
@@ -3257,7 +3276,8 @@ def _convert_index(index, encoding=None):
         return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
                         index_name=index_name)
 
-def _unconvert_index(data, kind):
+def _unconvert_index(data, kind, encoding=None):
+    kind = _ensure_decoded(kind)
     if kind == u'datetime64':
         index = DatetimeIndex(data)
     elif kind == u'datetime':
@@ -3265,19 +3285,24 @@ def _unconvert_index(data, kind):
                          dtype=object)
     elif kind == u'date':
         index = np.array([date.fromtimestamp(v) for v in data], dtype=object)
-    elif kind in (u'string', u'integer', u'float'):
+    elif kind in (u'integer', u'float'):
         index = np.array(data)
+    elif kind in (u'string'):
+        index = _unconvert_string_array(data, nan_rep=None, encoding=encoding)
     elif kind == u'object':
         index = np.array(data[0])
     else:  # pragma: no cover
         raise ValueError('unrecognized index type %s' % kind)
     return index
 
-def _unconvert_index_legacy(data, kind, legacy=False):
+def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
+    kind = _ensure_decoded(kind)
     if kind == u'datetime':
         index = lib.time64_to_datetime(data)
-    elif kind in (u'string', u'integer'):
+    elif kind in (u'integer'):
         index = np.array(data, dtype=object)
+    elif kind in (u'string'):
+        index = _unconvert_string_array(data, nan_rep=None, encoding=encoding)
     else:  # pragma: no cover
         raise ValueError('unrecognized index type %s' % kind)
     return index
@@ -3300,6 +3325,10 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
     """ deserialize a string array, possibly decoding """
     shape = data.shape
     data = np.array(data.ravel(),dtype=object)
+
+    # guard against a None encoding in PY3 (because of a legacy
+    # where the passed encoding is actually None)
+    encoding = _ensure_encoding(encoding)
     if encoding is not None:
         f = np.vectorize(lambda x: x.decode(encoding),otypes=[np.object])
         data = f(data)
@@ -3318,6 +3347,7 @@ def _maybe_convert(values, val_kind, encoding):
     return values
 
 def _get_converter(kind, encoding):
+    kind = _ensure_decoded(kind)
     if kind == 'datetime64':
         return lambda x: np.array(x, dtype='M8[ns]')
     elif kind == 'datetime':
@@ -3343,7 +3373,8 @@ class Term(object):
     value : a value or list of values (required)
     queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
     encoding : an encoding that will encode the query terms
-
+    i : my term id number
+    
     Returns
     -------
     a Term object
@@ -3363,18 +3394,18 @@ class Term(object):
     _search = re.compile("^\s*(?P<field>\w+)\s*(?P<op>%s)\s*(?P<value>.+)\s*$" % '|'.join(_ops))
     _max_selectors = 31
 
-    def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
+    def __init__(self, field, op=None, value=None, queryables=None, i=None, encoding=None):
         self.field = None
         self.op = None
         self.value = None
         self.q = queryables or dict()
         self.filter = None
-        self.condition = None
-
-        if py3compat.PY3:
-            if encoding is None:
-                encoding = _default_encoding
 
+        if i is None:
+            i = 0
+        self.i = i
+        self.condition = None
+        self.condvars = dict()
         self.encoding = encoding
 
         # unpack lists/tuples in field
@@ -3427,7 +3458,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
         if self.field is None or self.op is None or self.value is None:
             raise ValueError("Could not create this term [%s]" % str(self))
 
-        # = vs ==
+         # = vs ==
         if self.op == '=':
             self.op = '=='
 
@@ -3436,7 +3467,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
             if hasattr(self.value, '__iter__') and len(self.value) > 1:
                 raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self))
 
-        if not hasattr(self.value, '__iter__'):
+        if not is_list_like(self.value):
             self.value = [self.value]
 
         if len(self.q):
@@ -3462,6 +3493,16 @@ def kind(self):
         """ the kind of my field """
         return self.q.get(self.field)
 
+    def generate(self, v, i=None):
+        """ create and return the op string for this TermValue
+            add the variable to condvars """
+        if i is None:
+            i = 0
+
+        cv = "_%s_%s_%s" % (self.field,self.i,i)
+        self.condvars[cv] = v.converted
+        return "(%s %s %s)" % (self.field, self.op, cv)
+
     def eval(self):
         """ set the numexpr expression for this term """
 
@@ -3472,39 +3513,38 @@ def eval(self):
         if self.is_in_table:
             values = [self.convert_value(v) for v in self.value]
         else:
-            values = [[v, v] for v in self.value]
+            values = [TermValue(v,v,self.kind) for v in self.value]
 
         # equality conditions
         if self.op in ['==', '!=']:
 
             # our filter op expression
             if self.op == '!=':
-                filter_op = lambda axis, values: not axis.isin(values)
+                filter_op = lambda axis, vals: not axis.isin(vals)
             else:
-                filter_op = lambda axis, values: axis.isin(values)
+                filter_op = lambda axis, vals: axis.isin(vals)
 
 
             if self.is_in_table:
 
                 # too many values to create the expression?
                 if len(values) <= self._max_selectors:
-                    self.condition = "(%s)" % ' | '.join(
-                        ["(%s %s %s)" % (self.field, self.op, v[0]) for v in values])
+                    vs = [ self.generate(v, i) for i, v in enumerate(values) ]
+                    self.condition = "(%s)" % ' | '.join(vs)
 
                 # use a filter after reading
                 else:
-                    self.filter = (self.field, filter_op, Index([v[1] for v in values]))
+                    self.filter = (self.field, filter_op, Index([v.value for v in values]))
 
             else:
 
-                self.filter = (self.field, filter_op, Index([v[1] for v in values]))
+                self.filter = (self.field, filter_op, Index([v.value for v in values]))
 
         else:
 
             if self.is_in_table:
 
-                self.condition = '(%s %s %s)' % (
-                    self.field, self.op, values[0][0])
+                self.condition = self.generate(values[0])
                         
             else:
 
@@ -3524,29 +3564,36 @@ def stringify(value):
             v = lib.Timestamp(v)
             if v.tz is not None:
                 v = v.tz_convert('UTC')
-            return [v.value, v]
+            return TermValue(v,v.value,kind)
         elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
             v = time.mktime(v.timetuple())
-            return [v, Timestamp(v) ]
+            return TermValue(v,Timestamp(v),kind)
         elif kind == u'integer':
             v = int(float(v))
-            return [v, v]
+            return TermValue(v,v,kind)
         elif kind == u'float':
             v = float(v)
-            return [v, v]
+            return TermValue(v,v,kind)
         elif kind == u'bool':
             if isinstance(v, basestring):
                 v = not stringify(v).strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
             else:
                 v = bool(v)
-            return [v, v]
+            return TermValue(v,v,kind)
         elif not isinstance(v, basestring):
             v = stringify(v)
-            return [v, v]
+            return TermValue(v,stringify(v),u'string')
 
         # string quoting
-        return [stringify("'" + v + "'"), stringify(v)]
+        return TermValue(v,stringify(v),u'string')
 
+class TermValue(object):
+    """ hold a term value the we use to construct a condition/filter """
+
+    def __init__(self, value, converted, kind):
+        self.value = value
+        self.converted = converted
+        self.kind = kind
 
 class Coordinates(object):
     """ holds a returned coordinates list, useful to select the same rows from different tables
@@ -3585,6 +3632,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
         self.start = start
         self.stop = stop
         self.condition = None
+        self.condvars = dict()
         self.filter = None
         self.terms = None
         self.coordinates = None
@@ -3596,11 +3644,11 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
 
             # create the numexpr & the filter
             if self.terms:
-                conds = [t.condition for t in self.terms if t.condition is not None]
-                if len(conds):
-                    self.condition = "(%s)" % ' & '.join(conds)
-                    #if self.table.encoding is not None:
-                    #    self.condition = self.condition.encode(self.table.encoding)
+                terms = [ t for t in self.terms if t.condition is not None ]
+                if len(terms):
+                    self.condition = "(%s)" % ' & '.join([ t.condition for t in terms ])
+                    for t in terms:
+                        self.condvars.update(t.condvars)
                 self.filter = []
                 for t in self.terms:
                     if t.filter is not None:
@@ -3623,14 +3671,14 @@ def generate(self, where):
                     where = [where]
 
         queryables = self.table.queryables()
-        return [Term(c, queryables=queryables, encoding=self.table.encoding) for c in where]
+        return [Term(c, queryables=queryables, i=i, encoding=self.table.encoding) for i, c in enumerate(where)]
 
     def select(self):
         """
         generate the selection
         """
         if self.condition is not None:
-            return self.table.table.readWhere(self.condition, start=self.start, stop=self.stop)
+            return self.table.table.readWhere(self.condition, condvars=self.condvars, start=self.start, stop=self.stop)
         elif self.coordinates is not None:
             return self.table.table.readCoordinates(self.coordinates)
         return self.table.table.read(start=self.start, stop=self.stop)
@@ -3642,7 +3690,7 @@ def select_coords(self):
         if self.condition is None:
             return np.arange(self.table.nrows)
 
-        return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True)
+        return self.table.table.getWhereList(self.condition, condvars=self.condvars, start=self.start, stop=self.stop, sort=True)
 
 
 ### utilities ###
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 7a7bca02b1cd2..edaf905ce7b75 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -115,7 +115,7 @@ def roundtrip(key, obj,**kwargs):
             
             o = tm.makeTimeSeries()
             assert_series_equal(o, roundtrip('series',o))
-            
+
             o = tm.makeStringSeries()
             assert_series_equal(o, roundtrip('string_series',o))
             
@@ -570,7 +570,7 @@ def test_append_some_nans(self):
     def test_append_frame_column_oriented(self):
 
         with ensure_clean(self.path) as store:
-            import pdb; pdb.set_trace()
+
             # column oriented
             df = tm.makeTimeDataFrame()
             _maybe_remove(store, 'df1')
@@ -2560,6 +2560,7 @@ def test_legacy_0_10_read(self):
         # legacy from 0.10
         try:
             store = HDFStore(tm.get_data_path('legacy_hdf/legacy_0.10.h5'), 'r')
+            str(store)
             for k in store.keys():
                 store.select(k)
         finally:
@@ -2569,6 +2570,7 @@ def test_legacy_0_11_read(self):
         # legacy from 0.11
         try:
             store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_0.11.h5'), 'r')
+            str(store)
             df = store.select('df')
             df1 = store.select('df1')
             mi = store.select('mi')

From cd7b115d53df8cb43e2cc76b62e58cd101114344 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 5 Jun 2013 13:23:14 -0400
Subject: [PATCH 5/7] TST: fixed do_copy testing BUG: more encoding/decoding
 issues

---
 pandas/io/pytables.py            | 33 ++++++++++++++++----------------
 pandas/io/tests/test_pytables.py | 24 +++++++++++++++--------
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index b36985d65569a..86edb7a43ec47 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -765,9 +765,9 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None
                     index = False
                     if propindexes:
                         index = [ a.name for a in s.axes if a.is_indexed ]
-                        new_store.append(k,data, index=index, data_columns=getattr(s,'data_columns',None))
+                    new_store.append(k, data, index=index, data_columns=getattr(s,'data_columns',None), encoding=s.encoding)
                 else:
-                    new_store.put(k,data)
+                    new_store.put(k, data, encoding=s.encoding)
 
         return new_store
 
@@ -1043,13 +1043,13 @@ def convert(self, values, nan_rep, encoding):
 
         kwargs = dict()
         if self.freq is not None:
-            kwargs['freq'] = self.freq
+            kwargs['freq'] = _ensure_decoded(self.freq)
         if self.tz is not None:
-            kwargs['tz'] = self.tz
+            kwargs['tz'] = _ensure_decoded(self.tz)
         if self.index_name is not None:
-            kwargs['name'] = self.index_name
+            kwargs['name'] = _ensure_decoded(self.index_name)
         try:
-            self.values = Index(_maybe_convert(values, self.kind, encoding), **kwargs)
+            self.values = Index(_maybe_convert(values, self.kind, self.encoding), **kwargs)
         except:
 
             # if the output freq is different that what we recorded, then infer it
@@ -1706,7 +1706,7 @@ def get_attrs(self):
         """ retrieve our attributes """
         self.encoding = _ensure_encoding(getattr(self.attrs,'encoding',None))
         for n in self.attributes:
-            setattr(self,n,getattr(self.attrs, n, None))
+            setattr(self,n,_ensure_decoded(getattr(self.attrs, n, None)))
 
     def write(self, obj, **kwargs):
         self.set_attrs()
@@ -1847,7 +1847,7 @@ def read_multi_index(self, key):
 
     def read_index_node(self, node):
         data = node[:]
-        kind = node._v_attrs.kind
+        kind = _ensure_decoded(node._v_attrs.kind)
         name = None
 
         if 'name' in node._v_attrs:
@@ -1858,13 +1858,13 @@ def read_index_node(self, node):
         factory = self._get_index_factory(index_class)
 
         kwargs = {}
-        if 'freq' in node._v_attrs:
+        if u'freq' in node._v_attrs:
             kwargs['freq'] = node._v_attrs['freq']
 
-        if 'tz' in node._v_attrs:
+        if u'tz' in node._v_attrs:
             kwargs['tz'] = node._v_attrs['tz']
 
-        if kind in ('date', 'datetime'):
+        if kind in (u'date', u'datetime'):
             index = factory(_unconvert_index(data, kind, encoding=self.encoding), dtype=object,
                             **kwargs)
         else:
@@ -2077,7 +2077,7 @@ def write(self, obj, **kwargs):
         self.attrs.default_kind       = obj.default_kind
         self.write_index('items', obj.items)
 
-        for name, sdf in obj.iteritems():
+        for name, sdf in obj.iterkv():
             key = 'sparse_frame_%s' % name
             if key not in self.group._v_children:
                 node = self._handle.createGroup(self.group, key)
@@ -3358,7 +3358,8 @@ def _get_converter(kind, encoding):
         raise ValueError('invalid kind %s' % kind)
 
 def _need_convert(kind):
-    if kind in ('datetime', 'datetime64', 'string'):
+    kind = _ensure_decoded(kind)
+    if kind in (u'datetime', u'datetime64', u'string'):
         return True
     return False
 
@@ -3464,7 +3465,7 @@ def __init__(self, field, op=None, value=None, queryables=None, i=None, encoding
 
         # we have valid conditions
         if self.op in ['>', '>=', '<', '<=']:
-            if hasattr(self.value, '__iter__') and len(self.value) > 1:
+            if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,basestring):
                 raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self))
 
         if not is_list_like(self.value):
@@ -3559,7 +3560,7 @@ def stringify(value):
                 value = value.encode(self.encoding)
             return value
 
-        kind = self.kind
+        kind = _ensure_decoded(self.kind)
         if kind == u'datetime64' or kind == u'datetime' :
             v = lib.Timestamp(v)
             if v.tz is not None:
@@ -3576,7 +3577,7 @@ def stringify(value):
             return TermValue(v,v,kind)
         elif kind == u'bool':
             if isinstance(v, basestring):
-                v = not stringify(v).strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
+                v = not v.strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
             else:
                 v = bool(v)
             return TermValue(v,v,kind)
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index edaf905ce7b75..8b3d4a475d952 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -17,6 +17,7 @@
 from pandas.tests.test_series import assert_series_equal
 from pandas.tests.test_frame import assert_frame_equal
 from pandas import concat, Timestamp
+from pandas.util import py3compat
 
 from numpy.testing.decorators import slow
 
@@ -1276,8 +1277,14 @@ def test_unimplemented_dtypes_table_columns(self):
 
         with ensure_clean(self.path) as store:
 
+            l = [('date', datetime.date(2001, 1, 2))]
+
+            # py3 ok for unicode
+            if not py3compat.PY3:
+                l.append(('unicode', u'\u03c3'))
+    
             ### currently not supported dtypes ####
-            for n, f in [('unicode', u'\u03c3'), ('date', datetime.date(2001, 1, 2))]:
+            for n, f in l:
                 df = tm.makeDataFrame()
                 df[n] = f
                 self.assertRaises(
@@ -2602,24 +2609,25 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
 
                 # check indicies & nrows
                 for k in tstore.keys():
-                    if tstore.is_table(k):
+                    if tstore.get_storer(k).is_table:
                         new_t = tstore.get_storer(k)
                         orig_t = store.get_storer(k)
 
                         self.assert_(orig_t.nrows == new_t.nrows)
-                        for a in orig_t.axes:
-                            if a.is_indexed:
-                                self.assert_(new_t[a.name].is_indexed == True)
 
-            except (Exception), detail:
-                pass
+                        # check propindixes
+                        if propindexes:
+                            for a in orig_t.axes:
+                                if a.is_indexed:
+                                    self.assert_(new_t[a.name].is_indexed == True)
+
             finally:
                 safe_close(store)
                 safe_close(tstore)
                 safe_remove(new_f)
 
         do_copy()
-        do_copy(keys = ['df'])
+        do_copy(keys = ['/a','/b','/df1_mixed'])
         do_copy(propindexes = False)
 
         # new table

From 2f7f9bd162b018e56871e9f17308c82894c8e5ff Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 5 Jun 2013 14:09:07 -0400
Subject: [PATCH 6/7] DOC: release notes update

---
 RELEASE.rst           | 27 +++------------------------
 pandas/io/pytables.py | 14 +++++++++-----
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index c05bb526ab715..3a347246be8dd 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -63,14 +63,7 @@ pandas 0.11.1
       to append an index with a different name than the existing
     - support datelike columns with a timezone as data_columns (GH2852_)
     - table writing performance improvements.
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-    - support python3 (via ``PyTables 3.0.0``)
->>>>>>> 116ab91... DOC: docstring/release notes updates for py3k
-=======
-    - support py3 (via ``PyTables 3.0.0``)
->>>>>>> ab16d43... ENH: partial py3k support
+    - support python3 (via ``PyTables 3.0.0``) (GH3750_)
   - Add modulo operator to Series, DataFrame
   - Add ``date`` method to DatetimeIndex
   - Simplified the API and added a describe method to Categorical
@@ -87,29 +80,14 @@ pandas 0.11.1
 
 **API Changes**
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-  - When removing an object from a ``HDFStore``, ``remove(key)`` raises
-    ``KeyError`` if the key is not a valid store object.
-  - In an ``HDFStore``, raise a ``TypeError`` on passing ``where`` or ``columns`` 
-    to select with a Storer; these are invalid parameters at this time
-=======
-=======
->>>>>>> ab16d43... ENH: partial py3k support
   - ``HDFStore``
 
     - When removing an object, ``remove(key)`` raises
       ``KeyError`` if the key is not a valid store object.
     - raise a ``TypeError`` on passing ``where`` or ``columns`` 
       to select with a Storer; these are invalid parameters at this time
-<<<<<<< HEAD
     - can now specify an ``encoding`` option to ``append/put`` 
-      to enable alternate encodings
->>>>>>> 116ab91... DOC: docstring/release notes updates for py3k
-=======
-    - can now specify an ``encoding`` option to ``append`` and ``select`` 
-      to enable alternate encodings
->>>>>>> ab16d43... ENH: partial py3k support
+      to enable alternate encodings (GH3750_)
   - The repr() for (Multi)Index now obeys display.max_seq_items rather
     then numpy threshold print options. (GH3426_, GH3466_)
   - Added mangle_dupe_cols option to read_table/csv, allowing users
@@ -315,6 +293,7 @@ pandas 0.11.1
 .. _GH3740: https://github.com/pydata/pandas/issues/3740
 .. _GH3748: https://github.com/pydata/pandas/issues/3748
 .. _GH3741: https://github.com/pydata/pandas/issues/3741
+.. _GH3750: https://github.com/pydata/pandas/issues/3750
 
 pandas 0.11.0
 =============
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 86edb7a43ec47..e772d95d04ee7 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -218,7 +218,7 @@ class HDFStore(object):
     complevel : int, 1-9, default 0
             If a complib is specified compression will be applied
             where possible
-    complib : {'zliu', 'bzip2', 'lzo', 'blosc', None}, default None
+    complib : {'zlib', 'bzip2', 'lzo', 'blosc', None}, default None
             If complevel is > 0 apply compression to objects written
             in the store wherever possible
     fletcher32 : bool, default False
@@ -711,7 +711,8 @@ def create_table_index(self, key, **kwargs):
     def groups(self):
         """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """
         _tables()
-        return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u'table') ]
+        return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(
+            g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u'table') ]
 
     def get_node(self, key):
         """ return the node with the key or None if it does not exist """
@@ -731,7 +732,8 @@ def get_storer(self, key):
         s.infer_axes()
         return s
 
-    def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None, fletcher32 = False, overwrite = True):
+    def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None,
+             fletcher32 = False, overwrite = True):
         """ copy the existing store to a new file, upgrading in place
 
             Parameters
@@ -845,7 +847,8 @@ def error(t):
         except:
             error('_TABLE_MAP')
 
-    def _write_to_group(self, key, value, index=True, table=False, append=False, complib=None, encoding=None, **kwargs):
+    def _write_to_group(self, key, value, index=True, table=False, append=False,
+                        complib=None, encoding=None, **kwargs):
         group = self.get_node(key)
 
         # remove the node if we are not appending
@@ -870,7 +873,8 @@ def _write_to_group(self, key, value, index=True, table=False, append=False, com
                     group = self._handle.createGroup(path, p)
                 path = new_path
 
-        s = self._create_storer(group, value, table=table, append=append, encoding=encoding, **kwargs)
+        s = self._create_storer(group, value, table=table, append=append,
+                                encoding=encoding, **kwargs)
         if append:
             # raise if we are trying to append to a non-table,
             #       or a table that exists (and we are putting)

From fb25ac1e4573de4ffb61ea4ac6f5ab3ced000837 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Wed, 5 Jun 2013 19:21:47 -0400
Subject: [PATCH 7/7] BUG: fix numpy 1.6.1 issues; remove need for condvars and
 use literals in

     the numexpr expressions
---
 pandas/io/pytables.py | 47 +++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index e772d95d04ee7..b1b7b80e5fd23 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3314,8 +3314,8 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
 def _convert_string_array(data, encoding, itemsize=None):
 
     # encode if needed
-    if encoding is not None:
-        f = np.vectorize(lambda x: x.encode(encoding))
+    if encoding is not None and len(data):
+        f = np.vectorize(lambda x: x.encode(encoding), otypes=[np.object])
         data = f(data)
 
     # create the sized dtype
@@ -3333,7 +3333,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
     # guard against a None encoding in PY3 (because of a legacy
     # where the passed encoding is actually None)
     encoding = _ensure_encoding(encoding)
-    if encoding is not None:
+    if encoding is not None and len(data):
         f = np.vectorize(lambda x: x.decode(encoding),otypes=[np.object])
         data = f(data)
 
@@ -3378,7 +3378,6 @@ class Term(object):
     value : a value or list of values (required)
     queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
     encoding : an encoding that will encode the query terms
-    i : my term id number
     
     Returns
     -------
@@ -3399,18 +3398,13 @@ class Term(object):
     _search = re.compile("^\s*(?P<field>\w+)\s*(?P<op>%s)\s*(?P<value>.+)\s*$" % '|'.join(_ops))
     _max_selectors = 31
 
-    def __init__(self, field, op=None, value=None, queryables=None, i=None, encoding=None):
+    def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
         self.field = None
         self.op = None
         self.value = None
         self.q = queryables or dict()
         self.filter = None
-
-        if i is None:
-            i = 0
-        self.i = i
         self.condition = None
-        self.condvars = dict()
         self.encoding = encoding
 
         # unpack lists/tuples in field
@@ -3498,15 +3492,10 @@ def kind(self):
         """ the kind of my field """
         return self.q.get(self.field)
 
-    def generate(self, v, i=None):
-        """ create and return the op string for this TermValue
-            add the variable to condvars """
-        if i is None:
-            i = 0
-
-        cv = "_%s_%s_%s" % (self.field,self.i,i)
-        self.condvars[cv] = v.converted
-        return "(%s %s %s)" % (self.field, self.op, cv)
+    def generate(self, v):
+        """ create and return the op string for this TermValue """
+        val = v.tostring(self.encoding)
+        return "(%s %s %s)" % (self.field, self.op, val)
 
     def eval(self):
         """ set the numexpr expression for this term """
@@ -3534,7 +3523,7 @@ def eval(self):
 
                 # too many values to create the expression?
                 if len(values) <= self._max_selectors:
-                    vs = [ self.generate(v, i) for i, v in enumerate(values) ]
+                    vs = [ self.generate(v) for v in values ]
                     self.condition = "(%s)" % ' | '.join(vs)
 
                 # use a filter after reading
@@ -3600,6 +3589,15 @@ def __init__(self, value, converted, kind):
         self.converted = converted
         self.kind = kind
 
+    def tostring(self, encoding):
+        """ quote the string if not encoded
+            else encode and return """
+        if self.kind == u'string':
+            if encoding is not None:
+                return self.converted
+            return '"%s"' % self.converted
+        return self.converted
+
 class Coordinates(object):
     """ holds a returned coordinates list, useful to select the same rows from different tables
 
@@ -3637,7 +3635,6 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
         self.start = start
         self.stop = stop
         self.condition = None
-        self.condvars = dict()
         self.filter = None
         self.terms = None
         self.coordinates = None
@@ -3652,8 +3649,6 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
                 terms = [ t for t in self.terms if t.condition is not None ]
                 if len(terms):
                     self.condition = "(%s)" % ' & '.join([ t.condition for t in terms ])
-                    for t in terms:
-                        self.condvars.update(t.condvars)
                 self.filter = []
                 for t in self.terms:
                     if t.filter is not None:
@@ -3676,14 +3671,14 @@ def generate(self, where):
                     where = [where]
 
         queryables = self.table.queryables()
-        return [Term(c, queryables=queryables, i=i, encoding=self.table.encoding) for i, c in enumerate(where)]
+        return [Term(c, queryables=queryables, encoding=self.table.encoding) for c in where]
 
     def select(self):
         """
         generate the selection
         """
         if self.condition is not None:
-            return self.table.table.readWhere(self.condition, condvars=self.condvars, start=self.start, stop=self.stop)
+            return self.table.table.readWhere(self.condition, start=self.start, stop=self.stop)
         elif self.coordinates is not None:
             return self.table.table.readCoordinates(self.coordinates)
         return self.table.table.read(start=self.start, stop=self.stop)
@@ -3695,7 +3690,7 @@ def select_coords(self):
         if self.condition is None:
             return np.arange(self.table.nrows)
 
-        return self.table.table.getWhereList(self.condition, condvars=self.condvars, start=self.start, stop=self.stop, sort=True)
+        return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True)
 
 
 ### utilities ###