jreback
diff --git a/‎doc/source/whatsnew/v0.20.0.txt
+2-2 b/‎doc/source/whatsnew/v0.20.0.txt
+2-2
diff --git a/‎pandas/util/hashing.pyx renamed to ‎pandas/_libs/hashing.pyx b/‎pandas/util/hashing.pyx renamed to ‎pandas/_libs/hashing.pyx
diff --git a/‎pandas/core/categorical.py
+1-1 b/‎pandas/core/categorical.py
+1-1
diff --git a/‎pandas/core/generic.py
+1-1 b/‎pandas/core/generic.py
+1-1
diff --git a/‎pandas/core/indexes/multi.py
+2-2 b/‎pandas/core/indexes/multi.py
+2-2
diff --git a/‎pandas/core/series.py
+1-1 b/‎pandas/core/series.py
+1-1
diff --git a/‎pandas/util/importing.py renamed to ‎pandas/core/util/__init__.py b/‎pandas/util/importing.py renamed to ‎pandas/core/util/__init__.py
diff --git a/‎pandas/core/util/hashing.py
+282 b/‎pandas/core/util/hashing.py
+282
diff --git a/‎pandas/io/api.py
+1-1 b/‎pandas/io/api.py
+1-1
diff --git a/‎pandas/util/clipboard/__init__.py renamed to ‎pandas/io/clipboard/__init__.py b/‎pandas/util/clipboard/__init__.py renamed to ‎pandas/io/clipboard/__init__.py
diff --git a/‎pandas/io/clipboard.py renamed to ‎pandas/io/clipboard/clipboard.py
+2-2 b/‎pandas/io/clipboard.py renamed to ‎pandas/io/clipboard/clipboard.py
+2-2
diff --git a/‎pandas/util/clipboard/clipboards.py renamed to ‎pandas/io/clipboard/clipboards.py b/‎pandas/util/clipboard/clipboards.py renamed to ‎pandas/io/clipboard/clipboards.py
diff --git a/‎pandas/util/clipboard/exceptions.py renamed to ‎pandas/io/clipboard/exceptions.py b/‎pandas/util/clipboard/exceptions.py renamed to ‎pandas/io/clipboard/exceptions.py
diff --git a/‎pandas/util/clipboard/windows.py renamed to ‎pandas/io/clipboard/windows.py b/‎pandas/util/clipboard/windows.py renamed to ‎pandas/io/clipboard/windows.py
diff --git a/‎pandas/io/formats/console.py
+1-1 b/‎pandas/io/formats/console.py
+1-1
diff --git a/‎pandas/io/formats/format.py
+1-1 b/‎pandas/io/formats/format.py
+1-1
diff --git a/‎pandas/util/terminal.py renamed to ‎pandas/io/formats/terminal.py b/‎pandas/util/terminal.py renamed to ‎pandas/io/formats/terminal.py
diff --git a/‎pandas/tests/io/formats/test_format.py
+1-1 b/‎pandas/tests/io/formats/test_format.py
+1-1
@@ -1238,10 +1238,10 @@ If indicated, a deprecation warning will be issued if you reference theses modul
     "pandas.types", "pandas.core.dtypes", ""
     "pandas.io.sas.saslib", "pandas.io.sas.libsas", ""
     "pandas._join", "pandas._libs.join", ""
-    "pandas._hash", "pandas.util.libhashing", ""
+    "pandas._hash", "pandas.util._hashing", ""
     "pandas._period", "pandas._libs.period", ""
     "pandas._sparse", "pandas.core.sparse.libsparse", ""
-    "pandas._testing", "pandas.util.libtesting", ""
+    "pandas._testing", "pandas.util._testing", ""
     "pandas._window", "pandas.core.libwindow", ""
 
 
 
@@ -37,7 +37,7 @@
 from pandas.util.decorators import (Appender, cache_readonly,
                                     deprecate_kwarg, Substitution)
 
-from pandas.util.terminal import get_terminal_size
+from pandas.io.formats.terminal import get_terminal_size
 from pandas.util.validators import validate_bool_kwarg
 from pandas.core.config import get_option
 
 
@@ -1382,7 +1382,7 @@ def to_clipboard(self, excel=None, sep=None, **kwargs):
           - Windows: none
           - OS X: none
         """
-        from pandas.io import clipboard
+        from pandas.io.clipboard import clipboard
         clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs)
 
     def to_xarray(self):
 
@@ -718,7 +718,7 @@ def _inferred_type_levels(self):
     @cache_readonly
     def _hashed_values(self):
         """ return a uint64 ndarray of my hashed values """
-        from pandas.util.hashing import hash_tuples
+        from pandas.core.util.hashing import hash_tuples
         return hash_tuples(self)
 
     def _hashed_indexing_key(self, key):
@@ -740,7 +740,7 @@ def _hashed_indexing_key(self, key):
         we need to stringify if we have mixed levels
 
         """
-        from pandas.util.hashing import hash_tuples
+        from pandas.core.util.hashing import hash_tuples
 
         if not isinstance(key, tuple):
             return hash_tuples(key)
 
@@ -60,7 +60,7 @@
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.indexes.period import PeriodIndex
 from pandas import compat
-from pandas.util.terminal import get_terminal_size
+from pandas.io.formats.terminal import get_terminal_size
 from pandas.compat import zip, u, OrderedDict, StringIO
 from pandas.compat.numpy import function as nv
 
 
@@ -0,0 +1,282 @@
+"""
+data hash pandas / numpy objects
+"""
+import itertools
+
+import numpy as np
+from pandas._libs import hashing
+from pandas._libs.lib import is_bool_array
+from pandas.core.dtypes.generic import (
+    ABCMultiIndex,
+    ABCIndexClass,
+    ABCSeries,
+    ABCDataFrame)
+from pandas.core.dtypes.common import (
+    is_categorical_dtype, is_numeric_dtype,
+    is_datetime64_dtype, is_timedelta64_dtype,
+    is_list_like)
+
+# 16 byte long hashing key
+_default_hash_key = '0123456789123456'
+
+
+def _combine_hash_arrays(arrays, num_items):
+    """
+    Parameters
+    ----------
+    arrays : generator
+    num_items : int
+
+    Should be the same as CPython's tupleobject.c
+    """
+    try:
+        first = next(arrays)
+    except StopIteration:
+        return np.array([], dtype=np.uint64)
+
+    arrays = itertools.chain([first], arrays)
+
+    mult = np.uint64(1000003)
+    out = np.zeros_like(first) + np.uint64(0x345678)
+    for i, a in enumerate(arrays):
+        inverse_i = num_items - i
+        out ^= a
+        out *= mult
+        mult += np.uint64(82520 + inverse_i + inverse_i)
+    assert i + 1 == num_items, 'Fed in wrong num_items'
+    out += np.uint64(97531)
+    return out
+
+
+def hash_pandas_object(obj, index=True, encoding='utf8', hash_key=None,
+                       categorize=True):
+    """
+    Return a data hash of the Index/Series/DataFrame
+
+    .. versionadded:: 0.19.2
+
+    Parameters
+    ----------
+    index : boolean, default True
+        include the index in the hash (if Series/DataFrame)
+    encoding : string, default 'utf8'
+        encoding for data & key when strings
+    hash_key : string key to encode, default to _default_hash_key
+    categorize : bool, default True
+        Whether to first categorize object arrays before hashing. This is more
+        efficient when the array contains duplicate values.
+
+        .. versionadded:: 0.20.0
+
+    Returns
+    -------
+    Series of uint64, same length as the object
+
+    """
+    from pandas import Series
+    if hash_key is None:
+        hash_key = _default_hash_key
+
+    if isinstance(obj, ABCMultiIndex):
+        return Series(hash_tuples(obj, encoding, hash_key),
+                      dtype='uint64', copy=False)
+
+    if isinstance(obj, ABCIndexClass):
+        h = hash_array(obj.values, encoding, hash_key,
+                       categorize).astype('uint64', copy=False)
+        h = Series(h, index=obj, dtype='uint64', copy=False)
+    elif isinstance(obj, ABCSeries):
+        h = hash_array(obj.values, encoding, hash_key,
+                       categorize).astype('uint64', copy=False)
+        if index:
+            index_iter = (hash_pandas_object(obj.index,
+                                             index=False,
+                                             encoding=encoding,
+                                             hash_key=hash_key,
+                                             categorize=categorize).values
+                          for _ in [None])
+            arrays = itertools.chain([h], index_iter)
+            h = _combine_hash_arrays(arrays, 2)
+
+        h = Series(h, index=obj.index, dtype='uint64', copy=False)
+
+    elif isinstance(obj, ABCDataFrame):
+        hashes = (hash_array(series.values) for _, series in obj.iteritems())
+        num_items = len(obj.columns)
+        if index:
+            index_hash_generator = (hash_pandas_object(obj.index,
+                                                       index=False,
+                                                       encoding=encoding,
+                                                       hash_key=hash_key,
+                                                       categorize=categorize).values  # noqa
+                                    for _ in [None])
+            num_items += 1
+            hashes = itertools.chain(hashes, index_hash_generator)
+        h = _combine_hash_arrays(hashes, num_items)
+
+        h = Series(h, index=obj.index, dtype='uint64', copy=False)
+    else:
+        raise TypeError("Unexpected type for hashing %s" % type(obj))
+    return h
+
+
+def hash_tuples(vals, encoding='utf8', hash_key=None):
+    """
+    Hash an MultiIndex / list-of-tuples efficiently
+
+    .. versionadded:: 0.20.0
+
+    Parameters
+    ----------
+    vals : MultiIndex, list-of-tuples, or single tuple
+    encoding : string, default 'utf8'
+    hash_key : string key to encode, default to _default_hash_key
+
+    Returns
+    -------
+    ndarray of hashed values array
+    """
+
+    is_tuple = False
+    if isinstance(vals, tuple):
+        vals = [vals]
+        is_tuple = True
+    elif not is_list_like(vals):
+        raise TypeError("must be convertible to a list-of-tuples")
+
+    from pandas import Categorical, MultiIndex
+
+    if not isinstance(vals, ABCMultiIndex):
+        vals = MultiIndex.from_tuples(vals)
+
+    # create a list-of-Categoricals
+    vals = [Categorical(vals.labels[level],
+                        vals.levels[level],
+                        ordered=False,
+                        fastpath=True)
+            for level in range(vals.nlevels)]
+
+    # hash the list-of-ndarrays
+    hashes = (_hash_categorical(cat,
+                                encoding=encoding,
+                                hash_key=hash_key)
+              for cat in vals)
+    h = _combine_hash_arrays(hashes, len(vals))
+    if is_tuple:
+        h = h[0]
+
+    return h
+
+
+def _hash_categorical(c, encoding, hash_key):
+    """
+    Hash a Categorical by hashing its categories, and then mapping the codes
+    to the hashes
+
+    Parameters
+    ----------
+    c : Categorical
+    encoding : string, default 'utf8'
+    hash_key : string key to encode, default to _default_hash_key
+
+    Returns
+    -------
+    ndarray of hashed values array, same size as len(c)
+    """
+    hashed = hash_array(c.categories.values, encoding, hash_key,
+                        categorize=False)
+
+    # we have uint64, as we don't directly support missing values
+    # we don't want to use take_nd which will coerce to float
+    # instead, directly construt the result with a
+    # max(np.uint64) as the missing value indicator
+    #
+    # TODO: GH 15362
+
+    mask = c.isnull()
+    if len(hashed):
+        result = hashed.take(c.codes)
+    else:
+        result = np.zeros(len(mask), dtype='uint64')
+
+    if mask.any():
+        result[mask] = np.iinfo(np.uint64).max
+
+    return result
+
+
+def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
+    """
+    Given a 1d array, return an array of deterministic integers.
+
+    .. versionadded:: 0.19.2
+
+    Parameters
+    ----------
+    vals : ndarray, Categorical
+    encoding : string, default 'utf8'
+        encoding for data & key when strings
+    hash_key : string key to encode, default to _default_hash_key
+    categorize : bool, default True
+        Whether to first categorize object arrays before hashing. This is more
+        efficient when the array contains duplicate values.
+
+        .. versionadded:: 0.20.0
+
+    Returns
+    -------
+    1d uint64 numpy array of hash values, same length as the vals
+
+    """
+
+    if not hasattr(vals, 'dtype'):
+        raise TypeError("must pass a ndarray-like")
+
+    if hash_key is None:
+        hash_key = _default_hash_key
+
+    # For categoricals, we hash the categories, then remap the codes to the
+    # hash values. (This check is above the complex check so that we don't ask
+    # numpy if categorical is a subdtype of complex, as it will choke.
+    if is_categorical_dtype(vals.dtype):
+        return _hash_categorical(vals, encoding, hash_key)
+
+    # we'll be working with everything as 64-bit values, so handle this
+    # 128-bit value early
+    if np.issubdtype(vals.dtype, np.complex128):
+        return hash_array(vals.real) + 23 * hash_array(vals.imag)
+
+    # First, turn whatever array this is into unsigned 64-bit ints, if we can
+    # manage it.
+    if is_bool_array(vals):
+        vals = vals.astype('u8')
+    elif (is_datetime64_dtype(vals) or
+          is_timedelta64_dtype(vals)):
+        vals = vals.view('i8').astype('u8', copy=False)
+    elif (is_numeric_dtype(vals) and vals.dtype.itemsize <= 8):
+        vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8')
+    else:
+        # With repeated values, its MUCH faster to categorize object dtypes,
+        # then hash and rename categories. We allow skipping the categorization
+        # when the values are known/likely to be unique.
+        if categorize:
+            from pandas import factorize, Categorical, Index
+            codes, categories = factorize(vals, sort=False)
+            cat = Categorical(codes, Index(categories),
+                              ordered=False, fastpath=True)
+            return _hash_categorical(cat, encoding, hash_key)
+
+        try:
+            vals = hashing.hash_object_array(vals, hash_key, encoding)
+        except TypeError:
+            # we have mixed types
+            vals = hashing.hash_object_array(vals.astype(str).astype(object),
+                                             hash_key, encoding)
+
+    # Then, redistribute these 64-bit ints within the space of 64-bit ints
+    vals ^= vals >> 30
+    vals *= np.uint64(0xbf58476d1ce4e5b9)
+    vals ^= vals >> 27
+    vals *= np.uint64(0x94d049bb133111eb)
+    vals ^= vals >> 31
+    return vals
@@ -5,7 +5,7 @@
 # flake8: noqa
 
 from pandas.io.parsers import read_csv, read_table, read_fwf
-from pandas.io.clipboard import read_clipboard
+from pandas.io.clipboard.clipboard import read_clipboard
 from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
 from pandas.io.pytables import HDFStore, get_store, read_hdf
 from pandas.io.json import read_json
 
@@ -26,7 +26,7 @@ def read_clipboard(sep='\s+', **kwargs):  # pragma: no cover
         raise NotImplementedError(
             'reading from clipboard only supports utf-8 encoding')
 
-    from pandas.util.clipboard import clipboard_get
+    from pandas.io.clipboard import clipboard_get
     from pandas.io.parsers import read_table
     text = clipboard_get()
 
@@ -92,7 +92,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover
     if encoding is not None and encoding.lower().replace('-', '') != 'utf8':
         raise ValueError('clipboard only supports utf-8 encoding')
 
-    from pandas.util.clipboard import clipboard_set
+    from pandas.io.clipboard import clipboard_set
     if excel is None:
         excel = True
 
 
@@ -4,7 +4,7 @@
 
 import sys
 import locale
-from pandas.util.terminal import get_terminal_size
+from pandas.io.formats.terminal import get_terminal_size
 
 # -----------------------------------------------------------------------------
 # Global formatting options
 
@@ -30,7 +30,7 @@
 from pandas import compat
 from pandas.compat import (StringIO, lzip, range, map, zip, u,
                            OrderedDict, unichr)
-from pandas.util.terminal import get_terminal_size
+from pandas.io.formats.terminal import get_terminal_size
 from pandas.core.config import get_option, set_option
 from pandas.io.common import _get_handle, UnicodeWriter, _expand_user
 from pandas.io.formats.printing import adjoin, justify, pprint_thing
 
@@ -29,7 +29,7 @@
 import pandas.io.formats.printing as printing
 
 import pandas.util.testing as tm
-from pandas.util.terminal import get_terminal_size
+from pandas.io.formats.terminal import get_terminal_size
 from pandas.core.config import (set_option, get_option, option_context,
                                 reset_option)