pandas-dev · jreback · Dec 10, 2019 · Dec 8, 2019 · Dec 8, 2019 · Dec 8, 2019
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -2247,16 +2247,7 @@ def set_data(self, data: Union[np.ndarray, ABCExtensionArray]):
         assert data is not None
         assert self.dtype is None
 
-        if is_categorical_dtype(data.dtype):
-            data = data.codes
-
-        # For datetime64tz we need to drop the TZ in tests TODO: why?
-        dtype_name = data.dtype.name.split("[")[0]
-
-        if data.dtype.kind in ["m", "M"]:
-            data = np.asarray(data.view("i8"))
-            # TODO: we used to reshape for the dt64tz case, but no longer
-            #  doing that doesnt seem to break anything.  why?
+        data, dtype_name = _get_data_and_dtype_name(data)
 
         self.data = data
         self.dtype = dtype_name
@@ -2318,6 +2309,9 @@ def get_atom_coltype(cls, kind: str) -> Type["Col"]:
         if kind.startswith("uint"):
             k4 = kind[4:]
             col_name = f"UInt{k4}Col"
+        elif kind.startswith("period"):
+            # we store as integer
+            col_name = "Int64Col"
         else:
             kcap = kind.capitalize()
             col_name = f"{kcap}Col"
@@ -4612,36 +4606,45 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"):
     assert isinstance(name, str)
 
     index_name = index.name
+    converted, dtype_name = _get_data_and_dtype_name(index)
+    kind = _dtype_to_kind(dtype_name)
+    atom = DataIndexableCol._get_atom(converted)
 
     if isinstance(index, DatetimeIndex):
-        converted = index.asi8
+        assert isinstance(converted, np.ndarray) and converted.dtype == "i8"
+        assert kind == "datetime64", kind
+        assert isinstance(atom, _tables().Int64Col), atom.dtype
         return IndexCol(
             name,
-            converted,
-            "datetime64",
-            _tables().Int64Col(),
+            values=converted,
+            kind=kind,
+            typ=atom,
             freq=index.freq,
             tz=index.tz,
             index_name=index_name,
         )
     elif isinstance(index, TimedeltaIndex):
-        converted = index.asi8
+        assert isinstance(converted, np.ndarray) and converted.dtype == "i8"
+        assert kind == "timedelta64", kind
+        assert isinstance(atom, _tables().Int64Col), atom.dtype
         return IndexCol(
             name,
-            converted,
-            "timedelta64",
-            _tables().Int64Col(),
+            values=converted,
+            kind=kind,
+            typ=atom,
             freq=index.freq,
             index_name=index_name,
         )
     elif isinstance(index, (Int64Index, PeriodIndex)):
-        atom = _tables().Int64Col()
         # avoid to store ndarray of Period objects
+        assert isinstance(converted, np.ndarray) and converted.dtype == "i8"
+        assert kind == "integer", kind
+        assert isinstance(atom, _tables().Int64Col), atom.dtype
         return IndexCol(
             name,
-            index._ndarray_values,
-            "integer",
-            atom,
+            values=converted,
+            kind=kind,
+            typ=atom,
             freq=getattr(index, "freq", None),
             index_name=index_name,
         )
@@ -4661,8 +4664,6 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"):
             name, converted, "date", _tables().Time32Col(), index_name=index_name,
         )
     elif inferred_type == "string":
-        # atom = _tables().ObjectAtom()
-        # return np.asarray(values, dtype='O'), 'object', atom
 
         converted = _convert_string_array(values, encoding, errors)
         itemsize = converted.dtype.itemsize
@@ -4676,28 +4677,24 @@ def _convert_index(name: str, index: Index, encoding=None, errors="strict"):
 
     elif inferred_type == "integer":
         # take a guess for now, hope the values fit
-        atom = _tables().Int64Col()
+        assert isinstance(converted, np.ndarray) and converted.dtype == "i8"
+        assert kind == "integer", kind
+        assert isinstance(atom, _tables().Int64Col), atom.dtype
         return IndexCol(
-            name,
-            np.asarray(values, dtype=np.int64),
-            "integer",
-            atom,
-            index_name=index_name,
+            name, values=converted, kind=kind, typ=atom, index_name=index_name,
         )
     elif inferred_type == "floating":
-        atom = _tables().Float64Col()
+        assert isinstance(converted, np.ndarray) and converted.dtype == "f8"
+        assert kind == "float", kind
+        assert isinstance(atom, _tables().Float64Col), atom.dtype
         return IndexCol(
-            name,
-            np.asarray(values, dtype=np.float64),
-            "float",
-            atom,
-            index_name=index_name,
+            name, values=converted, kind=kind, typ=atom, index_name=index_name,
         )
     else:
+        assert isinstance(converted, np.ndarray) and converted.dtype == object
+        assert kind == "object", kind
         atom = _tables().ObjectAtom()
-        return IndexCol(
-            name, np.asarray(values, dtype="O"), "object", atom, index_name=index_name,
-        )
+        return IndexCol(name, converted, kind, atom, index_name=index_name,)
 
 
 def _unconvert_index(data, kind: str, encoding=None, errors="strict"):
@@ -4924,21 +4921,47 @@ def _dtype_to_kind(dtype_str: str) -> str:
         kind = "complex"
     elif dtype_str.startswith("int") or dtype_str.startswith("uint"):
         kind = "integer"
-    elif dtype_str.startswith("date"):
-        # in tests this is always "datetime64"
-        kind = "datetime"
+    elif dtype_str.startswith("datetime64"):
+        kind = "datetime64"
     elif dtype_str.startswith("timedelta"):
-        kind = "timedelta"
+        kind = "timedelta64"
     elif dtype_str.startswith("bool"):
         kind = "bool"
     elif dtype_str.startswith("category"):
         kind = "category"
+    elif dtype_str.startswith("period"):
+        # We store the `freq` attr so we can restore from integers
+        kind = "integer"
+    elif dtype_str == "object":
+        kind = "object"
     else:
         raise ValueError(f"cannot interpret dtype of [{dtype_str}]")
 
     return kind
 
 
+def _get_data_and_dtype_name(data: Union[np.ndarray, ABCExtensionArray]):
+    """
+    Convert the passed data into a storable form and a dtype string.
+    """
+    if is_categorical_dtype(data.dtype):
+        data = data.codes
+
+    # For datetime64tz we need to drop the TZ in tests TODO: why?
+    dtype_name = data.dtype.name.split("[")[0]
+
+    if data.dtype.kind in ["m", "M"]:
+        data = np.asarray(data.view("i8"))
+        # TODO: we used to reshape for the dt64tz case, but no longer
+        #  doing that doesnt seem to break anything.  why?
+
+    elif isinstance(data, PeriodIndex):
+        data = data.asi8
+
+    data = np.asarray(data)
+    return data, dtype_name
+
+
 class Selection:
     """
     Carries out a selection operation on a tables.Table object.