From 6d31057d0b1b9bb6f2840771a9aff3860f2b9b00 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Tue, 20 Dec 2016 12:39:09 -0500
Subject: [PATCH] DOC, TST, BUG: Improve uint64 core/algos behavior

1) duplicated()

Updates documentation to describe the "values"
parameter in the signature, adds tests for uint64,
and refactors to use duplicated_uint64.

2) mode()

Updates documentation to describe the "values"
parameter in the signature, adds tests for uint64,
and reactors to use mode_uint64.

3) unique()

Uses UInt64HashTable to patch a uint64 overflow bug
analogous to that seen in Series.unique (patched in
gh-14915).

4) Types API

Introduces "is_signed_integer_dtype" and "is_unsigned
_integer_dtype" to the public API. Used in refactoring/
patching of 1-3.
---
 doc/source/whatsnew/v0.20.0.txt         |   1 +
 pandas/api/tests/test_api.py            |   6 +-
 pandas/core/algorithms.py               |  50 ++++--
 pandas/hashtable.pyx                    | 161 +-----------------
 pandas/src/hashtable_func_helper.pxi.in | 209 +++++++++++++++++++++---
 pandas/tests/series/test_analytics.py   | 132 ++++++++++-----
 pandas/tests/test_algos.py              | 121 +++++++++++++-
 pandas/types/api.py                     |   2 +
 pandas/types/common.py                  |  12 ++
 9 files changed, 457 insertions(+), 237 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index b32b9fbbab04e..ffd2fa90dc9e6 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -298,5 +298,6 @@ Bug Fixes
 
 
 - Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`)
+- Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`)
 - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
 - Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)
diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py
index 49aa31c375e25..bc126447213ca 100644
--- a/pandas/api/tests/test_api.py
+++ b/pandas/api/tests/test_api.py
@@ -153,10 +153,10 @@ class TestTypes(Base, tm.TestCase):
                'is_floating_dtype', 'is_int64_dtype', 'is_integer',
                'is_integer_dtype', 'is_number', 'is_numeric_dtype',
                'is_object_dtype', 'is_scalar', 'is_sparse',
-               'is_string_dtype',
+               'is_string_dtype', 'is_signed_integer_dtype',
                'is_timedelta64_dtype', 'is_timedelta64_ns_dtype',
-               'is_period', 'is_period_dtype',
-               'is_re', 'is_re_compilable',
+               'is_unsigned_integer_dtype', 'is_period',
+               'is_period_dtype', 'is_re', 'is_re_compilable',
                'is_dict_like', 'is_iterator',
                'is_list_like', 'is_hashable',
                'is_named_tuple', 'is_sequence',
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index e51774ce4d9b4..1a967bdd7a1a3 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -9,7 +9,9 @@
 from pandas import compat, lib, tslib, _np_version_under1p8
 from pandas.types.cast import _maybe_promote
 from pandas.types.generic import ABCSeries, ABCIndex
-from pandas.types.common import (is_integer_dtype,
+from pandas.types.common import (is_unsigned_integer_dtype,
+                                 is_signed_integer_dtype,
+                                 is_integer_dtype,
                                  is_int64_dtype,
                                  is_categorical_dtype,
                                  is_extension_type,
@@ -479,8 +481,9 @@ def _value_counts_arraylike(values, dropna=True):
         keys, counts = htable.value_count_float64(values, dropna)
     else:
         values = _ensure_object(values)
+        keys, counts = htable.value_count_object(values, dropna)
+
         mask = isnull(values)
-        keys, counts = htable.value_count_object(values, mask)
         if not dropna and mask.any():
             keys = np.insert(keys, 0, np.NaN)
             counts = np.insert(counts, 0, mask.sum())
@@ -490,12 +493,14 @@ def _value_counts_arraylike(values, dropna=True):
 
 def duplicated(values, keep='first'):
     """
-    Return boolean ndarray denoting duplicate values
+    Return boolean ndarray denoting duplicate values.
 
     .. versionadded:: 0.19.0
 
     Parameters
     ----------
+    values : ndarray-like
+        Array over which to check for duplicate values.
     keep : {'first', 'last', False}, default 'first'
         - ``first`` : Mark duplicates as ``True`` except for the first
           occurrence.
@@ -521,9 +526,12 @@ def duplicated(values, keep='first'):
     elif isinstance(values, (ABCSeries, ABCIndex)):
         values = values.values
 
-    if is_integer_dtype(dtype):
+    if is_signed_integer_dtype(dtype):
         values = _ensure_int64(values)
         duplicated = htable.duplicated_int64(values, keep=keep)
+    elif is_unsigned_integer_dtype(dtype):
+        values = _ensure_uint64(values)
+        duplicated = htable.duplicated_uint64(values, keep=keep)
     elif is_float_dtype(dtype):
         values = _ensure_float64(values)
         duplicated = htable.duplicated_float64(values, keep=keep)
@@ -535,7 +543,19 @@ def duplicated(values, keep='first'):
 
 
 def mode(values):
-    """Returns the mode or mode(s) of the passed Series or ndarray (sorted)"""
+    """
+    Returns the mode(s) of an array.
+
+    Parameters
+    ----------
+    values : array-like
+        Array over which to check for duplicate values.
+
+    Returns
+    -------
+    mode : Series
+    """
+
     # must sort because hash order isn't necessarily defined.
     from pandas.core.series import Series
 
@@ -547,23 +567,23 @@ def mode(values):
         constructor = Series
 
     dtype = values.dtype
-    if is_integer_dtype(values):
+    if is_signed_integer_dtype(values):
         values = _ensure_int64(values)
-        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)
-
+        result = constructor(np.sort(htable.mode_int64(values)), dtype=dtype)
+    elif is_unsigned_integer_dtype(values):
+        values = _ensure_uint64(values)
+        result = constructor(np.sort(htable.mode_uint64(values)), dtype=dtype)
     elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
         dtype = values.dtype
         values = values.view(np.int64)
-        result = constructor(sorted(htable.mode_int64(values)), dtype=dtype)
-
+        result = constructor(np.sort(htable.mode_int64(values)), dtype=dtype)
     elif is_categorical_dtype(values):
         result = constructor(values.mode())
     else:
-        mask = isnull(values)
         values = _ensure_object(values)
-        res = htable.mode_object(values, mask)
+        res = htable.mode_object(values)
         try:
-            res = sorted(res)
+            res = np.sort(res)
         except TypeError as e:
             warn("Unable to sort modes: %s" % e)
         result = constructor(res, dtype=dtype)
@@ -893,8 +913,10 @@ def _hashtable_algo(f, values, return_dtype=None):
     dtype = values.dtype
     if is_float_dtype(dtype):
         return f(htable.Float64HashTable, _ensure_float64)
-    elif is_integer_dtype(dtype):
+    elif is_signed_integer_dtype(dtype):
         return f(htable.Int64HashTable, _ensure_int64)
+    elif is_unsigned_integer_dtype(dtype):
+        return f(htable.UInt64HashTable, _ensure_uint64)
     elif is_datetime64_dtype(dtype):
         return_dtype = return_dtype or 'M8[ns]'
         return f(htable.Int64HashTable, _ensure_int64).view(return_dtype)
diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
index ce760b49fabc0..276b0679070dc 100644
--- a/pandas/hashtable.pyx
+++ b/pandas/hashtable.pyx
@@ -22,6 +22,8 @@ cdef extern from "numpy/npy_math.h":
 cimport cython
 cimport numpy as cnp
 
+from pandas.lib import checknull
+
 cnp.import_array()
 cnp.import_ufunc()
 
@@ -117,165 +119,6 @@ cdef class Int64Factorizer:
         return labels
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cdef build_count_table_object(ndarray[object] values,
-                              ndarray[uint8_t, cast=True] mask,
-                              kh_pymap_t *table):
-    cdef:
-        khiter_t k
-        Py_ssize_t i, n = len(values)
-        int ret = 0
-
-    kh_resize_pymap(table, n // 10)
-
-    for i in range(n):
-        if mask[i]:
-            continue
-
-        val = values[i]
-        k = kh_get_pymap(table, <PyObject*> val)
-        if k != table.n_buckets:
-            table.vals[k] += 1
-        else:
-            k = kh_put_pymap(table, <PyObject*> val, &ret)
-            table.vals[k] = 1
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cpdef value_count_object(ndarray[object] values,
-                         ndarray[uint8_t, cast=True] mask):
-    cdef:
-        Py_ssize_t i
-        kh_pymap_t *table
-        int k
-
-    table = kh_init_pymap()
-    build_count_table_object(values, mask, table)
-
-    i = 0
-    result_keys = np.empty(table.n_occupied, dtype=object)
-    result_counts = np.zeros(table.n_occupied, dtype=np.int64)
-    for k in range(table.n_buckets):
-        if kh_exist_pymap(table, k):
-            result_keys[i] = <object> table.keys[k]
-            result_counts[i] = table.vals[k]
-            i += 1
-    kh_destroy_pymap(table)
-
-    return result_keys, result_counts
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
-    cdef:
-        int count, max_count = 2
-        int j = -1 # so you can do +=
-        int k
-        ndarray[object] modes
-        kh_pymap_t *table
-
-    table = kh_init_pymap()
-    build_count_table_object(values, mask, table)
-
-    modes = np.empty(table.n_buckets, dtype=np.object_)
-    for k in range(table.n_buckets):
-        if kh_exist_pymap(table, k):
-            count = table.vals[k]
-
-            if count == max_count:
-                j += 1
-            elif count > max_count:
-                max_count = count
-                j = 0
-            else:
-                continue
-            modes[j] = <object> table.keys[k]
-
-    kh_destroy_pymap(table)
-
-    return modes[:j + 1]
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def mode_int64(int64_t[:] values):
-    cdef:
-        int count, max_count = 2
-        int j = -1 # so you can do +=
-        int k
-        kh_int64_t *table
-        ndarray[int64_t] modes
-
-    table = kh_init_int64()
-
-    build_count_table_int64(values, table, 0)
-
-    modes = np.empty(table.n_buckets, dtype=np.int64)
-
-    with nogil:
-        for k in range(table.n_buckets):
-            if kh_exist_int64(table, k):
-                count = table.vals[k]
-
-                if count == max_count:
-                    j += 1
-                elif count > max_count:
-                    max_count = count
-                    j = 0
-                else:
-                    continue
-                modes[j] = table.keys[k]
-
-    kh_destroy_int64(table)
-
-    return modes[:j + 1]
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def duplicated_object(ndarray[object] values, object keep='first'):
-    cdef:
-        Py_ssize_t i, n
-        dict seen = dict()
-        object row
-
-    n = len(values)
-    cdef ndarray[uint8_t] result = np.zeros(n, dtype=np.uint8)
-
-    if keep == 'last':
-        for i from n > i >= 0:
-            row = values[i]
-            if row in seen:
-                result[i] = 1
-            else:
-                seen[row] = i
-                result[i] = 0
-    elif keep == 'first':
-        for i from 0 <= i < n:
-            row = values[i]
-            if row in seen:
-                result[i] = 1
-            else:
-                seen[row] = i
-                result[i] = 0
-    elif keep is False:
-        for i from 0 <= i < n:
-            row = values[i]
-            if row in seen:
-                result[i] = 1
-                result[seen[row]] = 1
-            else:
-                seen[row] = i
-                result[i] = 0
-    else:
-        raise ValueError('keep must be either "first", "last" or False')
-
-    return result.view(np.bool_)
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def unique_label_indices(ndarray[int64_t, ndim=1] labels):
diff --git a/pandas/src/hashtable_func_helper.pxi.in b/pandas/src/hashtable_func_helper.pxi.in
index f3e16cfd32963..c292256767315 100644
--- a/pandas/src/hashtable_func_helper.pxi.in
+++ b/pandas/src/hashtable_func_helper.pxi.in
@@ -10,105 +10,272 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 
 {{py:
 
-# name
-dtypes = ['float64', 'int64', 'uint64']
+# dtype, ttype
+dtypes = [('float64', 'float64'),
+          ('uint64', 'uint64'),
+          ('object', 'pymap'),
+          ('int64', 'int64')]
 
 }}
 
-{{for dtype in dtypes}}
+{{for dtype, ttype in dtypes}}
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
+{{if dtype == 'object'}}
+cdef build_count_table_{{dtype}}(ndarray[{{dtype}}] values,
+                                 kh_{{ttype}}_t *table, bint dropna):
+{{else}}
 cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
-                                 kh_{{dtype}}_t *table, bint dropna):
+                                 kh_{{ttype}}_t *table, bint dropna):
+{{endif}}
     cdef:
         khiter_t k
         Py_ssize_t i, n = len(values)
+
+        {{if dtype != 'object'}}
         {{dtype}}_t val
+        {{endif}}
+
         int ret = 0
 
+    {{if dtype == 'object'}}
+    kh_resize_{{ttype}}(table, n // 10)
+
+    for i in range(n):
+        val = values[i]
+
+        if not checknull(val) or not dropna:
+            k = kh_get_{{ttype}}(table, <PyObject*> val)
+            if k != table.n_buckets:
+                table.vals[k] += 1
+            else:
+                k = kh_put_{{ttype}}(table, <PyObject*> val, &ret)
+                table.vals[k] = 1
+    {{else}}
     with nogil:
-        kh_resize_{{dtype}}(table, n)
+        kh_resize_{{ttype}}(table, n)
 
         for i in range(n):
             val = values[i]
             if val == val or not dropna:
-                k = kh_get_{{dtype}}(table, val)
+                k = kh_get_{{ttype}}(table, val)
                 if k != table.n_buckets:
                     table.vals[k] += 1
                 else:
-                    k = kh_put_{{dtype}}(table, val, &ret)
+                    k = kh_put_{{ttype}}(table, val, &ret)
                     table.vals[k] = 1
+    {{endif}}
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
+{{if dtype == 'object'}}
+cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
+{{else}}
 cpdef value_count_{{dtype}}({{dtype}}_t[:] values, bint dropna):
+{{endif}}
     cdef:
         Py_ssize_t i=0
-        kh_{{dtype}}_t *table
+        kh_{{ttype}}_t *table
+
+        {{if dtype != 'object'}}
         {{dtype}}_t[:] result_keys
         int64_t[:] result_counts
+        {{endif}}
+
         int k
 
-    table = kh_init_{{dtype}}()
+    table = kh_init_{{ttype}}()
+    {{if dtype == 'object'}}
+    build_count_table_{{dtype}}(values, table, 1)
+    {{else}}
     build_count_table_{{dtype}}(values, table, dropna)
+    {{endif}}
 
     result_keys = np.empty(table.n_occupied, dtype=np.{{dtype}})
     result_counts = np.zeros(table.n_occupied, dtype=np.int64)
 
+    {{if dtype == 'object'}}
+    for k in range(table.n_buckets):
+        if kh_exist_{{ttype}}(table, k):
+            result_keys[i] = <{{dtype}}> table.keys[k]
+            result_counts[i] = table.vals[k]
+            i += 1
+    {{else}}
     with nogil:
         for k in range(table.n_buckets):
-            if kh_exist_{{dtype}}(table, k):
+            if kh_exist_{{ttype}}(table, k):
                 result_keys[i] = table.keys[k]
                 result_counts[i] = table.vals[k]
                 i += 1
-    kh_destroy_{{dtype}}(table)
+    {{endif}}
 
+    kh_destroy_{{ttype}}(table)
+
+    {{if dtype == 'object'}}
+    return result_keys, result_counts
+    {{else}}
     return np.asarray(result_keys), np.asarray(result_counts)
+    {{endif}}
 
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def duplicated_{{dtype}}({{dtype}}_t[:] values,
-                         object keep='first'):
+{{if dtype == 'object'}}
+
+
+def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'):
+{{else}}
+
+
+def duplicated_{{dtype}}({{dtype}}_t[:] values, object keep='first'):
+{{endif}}
     cdef:
         int ret = 0, k
+        {{if dtype != 'object'}}
         {{dtype}}_t value
+        {{endif}}
         Py_ssize_t i, n = len(values)
-        kh_{{dtype}}_t * table = kh_init_{{dtype}}()
+        kh_{{ttype}}_t * table = kh_init_{{ttype}}()
         ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool')
 
-    kh_resize_{{dtype}}(table, min(n, _SIZE_HINT_LIMIT))
+    kh_resize_{{ttype}}(table, min(n, _SIZE_HINT_LIMIT))
 
     if keep not in ('last', 'first', False):
         raise ValueError('keep must be either "first", "last" or False')
 
     if keep == 'last':
+        {{if dtype == 'object'}}
+        for i from n > i >= 0:
+            kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
+            out[i] = ret == 0
+        {{else}}
         with nogil:
-            for i from n > i >=0:
-                kh_put_{{dtype}}(table, values[i], &ret)
+            for i from n > i >= 0:
+                kh_put_{{ttype}}(table, values[i], &ret)
                 out[i] = ret == 0
+        {{endif}}
     elif keep == 'first':
+        {{if dtype == 'object'}}
+        for i from 0 <= i < n:
+            kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
+            out[i] = ret == 0
+        {{else}}
         with nogil:
             for i from 0 <= i < n:
-                kh_put_{{dtype}}(table, values[i], &ret)
+                kh_put_{{ttype}}(table, values[i], &ret)
                 out[i] = ret == 0
+        {{endif}}
     else:
+        {{if dtype == 'object'}}
+        for i from 0 <= i < n:
+            value = values[i]
+            k = kh_get_{{ttype}}(table, <PyObject*> value)
+            if k != table.n_buckets:
+                out[table.vals[k]] = 1
+                out[i] = 1
+            else:
+                k = kh_put_{{ttype}}(table, <PyObject*> value, &ret)
+                table.keys[k] = <PyObject*> value
+                table.vals[k] = i
+                out[i] = 0
+        {{else}}
         with nogil:
             for i from 0 <= i < n:
                 value = values[i]
-                k = kh_get_{{dtype}}(table, value)
+                k = kh_get_{{ttype}}(table, value)
                 if k != table.n_buckets:
                     out[table.vals[k]] = 1
                     out[i] = 1
                 else:
-                    k = kh_put_{{dtype}}(table, value, &ret)
+                    k = kh_put_{{ttype}}(table, value, &ret)
                     table.keys[k] = value
                     table.vals[k] = i
                     out[i] = 0
-    kh_destroy_{{dtype}}(table)
+        {{endif}}
+    kh_destroy_{{ttype}}(table)
     return out
 
 {{endfor}}
+
+#----------------------------------------------------------------------
+# Mode Computations
+#----------------------------------------------------------------------
+
+{{py:
+
+# dtype, ctype, table_type, npy_dtype
+dtypes = [('int64', 'int64_t', 'int64', 'int64'),
+          ('uint64', 'uint64_t', 'uint64', 'uint64'),
+          ('object', 'object', 'pymap', 'object_')]
+}}
+
+{{for dtype, ctype, table_type, npy_dtype in dtypes}}
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+
+{{if dtype == 'object'}}
+
+
+def mode_{{dtype}}(ndarray[{{ctype}}] values):
+{{else}}
+
+
+def mode_{{dtype}}({{ctype}}[:] values):
+{{endif}}
+    cdef:
+        int count, max_count = 2
+        int j = -1 # so you can do +=
+        int k
+        kh_{{table_type}}_t *table
+        ndarray[{{ctype}}] modes
+
+    table = kh_init_{{table_type}}()
+    {{if dtype == 'object'}}
+    build_count_table_{{dtype}}(values, table, 1)
+    {{else}}
+    build_count_table_{{dtype}}(values, table, 0)
+    {{endif}}
+
+    modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}})
+
+    {{if dtype != 'object'}}
+    with nogil:
+        for k in range(table.n_buckets):
+            if kh_exist_{{table_type}}(table, k):
+                count = table.vals[k]
+
+                if count == max_count:
+                    j += 1
+                elif count > max_count:
+                    max_count = count
+                    j = 0
+                else:
+                    continue
+
+                modes[j] = table.keys[k]
+    {{else}}
+    for k in range(table.n_buckets):
+        if kh_exist_{{table_type}}(table, k):
+            count = table.vals[k]
+
+            if count == max_count:
+                j += 1
+            elif count > max_count:
+                max_count = count
+                j = 0
+            else:
+                continue
+
+            modes[j] = <object> table.keys[k]
+    {{endif}}
+
+    kh_destroy_{{table_type}}(table)
+
+    return modes[:j + 1]
+
+{{endfor}}
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index d4c209d4532e4..3896e255f0c2f 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -10,8 +10,8 @@
 import numpy as np
 import pandas as pd
 
-from pandas import (Series, DataFrame, isnull, notnull, bdate_range,
-                    date_range, _np_version_under1p10)
+from pandas import (Series, Categorical, DataFrame, isnull, notnull,
+                    bdate_range, date_range, _np_version_under1p10)
 from pandas.core.index import MultiIndex
 from pandas.tseries.index import Timestamp
 from pandas.tseries.tdi import Timedelta
@@ -128,45 +128,99 @@ def test_median(self):
         self.assertAlmostEqual(np.median(int_ts), int_ts.median())
 
     def test_mode(self):
-        s = Series([12, 12, 11, 10, 19, 11])
-        exp = Series([11, 12])
-        assert_series_equal(s.mode(), exp)
-
-        assert_series_equal(
-            Series([1, 2, 3]).mode(), Series(
-                [], dtype='int64'))
-
-        lst = [5] * 20 + [1] * 10 + [6] * 25
-        np.random.shuffle(lst)
-        s = Series(lst)
-        assert_series_equal(s.mode(), Series([6]))
-
-        s = Series([5] * 10)
-        assert_series_equal(s.mode(), Series([5]))
-
-        s = Series(lst)
-        s[0] = np.nan
-        assert_series_equal(s.mode(), Series([6.]))
-
-        s = Series(list('adfasbasfwewefwefweeeeasdfasnbam'))
-        assert_series_equal(s.mode(), Series(['e']))
-
-        s = Series(['2011-01-03', '2013-01-02', '1900-05-03'], dtype='M8[ns]')
-        assert_series_equal(s.mode(), Series([], dtype="M8[ns]"))
-        s = Series(['2011-01-03', '2013-01-02', '1900-05-03', '2011-01-03',
-                    '2013-01-02'], dtype='M8[ns]')
-        assert_series_equal(s.mode(), Series(['2011-01-03', '2013-01-02'],
-                                             dtype='M8[ns]'))
-
-        # GH 5986
-        s = Series(['1 days', '-1 days', '0 days'], dtype='timedelta64[ns]')
-        assert_series_equal(s.mode(), Series([], dtype='timedelta64[ns]'))
+        # No mode should be found.
+        exp = Series([], dtype=np.float64)
+        tm.assert_series_equal(Series([]).mode(), exp)
+
+        exp = Series([], dtype=np.int64)
+        tm.assert_series_equal(Series([1]).mode(), exp)
+
+        exp = Series([], dtype=np.object)
+        tm.assert_series_equal(Series(['a', 'b', 'c']).mode(), exp)
+
+        # Test numerical data types.
+        exp_single = [1]
+        data_single = [1] * 5 + [2] * 3
+
+        exp_multi = [1, 3]
+        data_multi = [1] * 5 + [2] * 3 + [3] * 5
+
+        for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
+            s = Series(data_single, dtype=dt)
+            exp = Series(exp_single, dtype=dt)
+            tm.assert_series_equal(s.mode(), exp)
+
+            s = Series(data_multi, dtype=dt)
+            exp = Series(exp_multi, dtype=dt)
+            tm.assert_series_equal(s.mode(), exp)
+
+        # Test string and object types.
+        exp = ['b']
+        data = ['a'] * 2 + ['b'] * 3
+
+        s = Series(data, dtype='c')
+        exp = Series(exp, dtype='c')
+        tm.assert_series_equal(s.mode(), exp)
+
+        exp = ['bar']
+        data = ['foo'] * 2 + ['bar'] * 3
+
+        for dt in [str, object]:
+            s = Series(data, dtype=dt)
+            exp = Series(exp, dtype=dt)
+            tm.assert_series_equal(s.mode(), exp)
+
+        # Test datetime types.
+        exp = Series([], dtype="M8[ns]")
+        s = Series(['2011-01-03', '2013-01-02',
+                    '1900-05-03'], dtype='M8[ns]')
+        tm.assert_series_equal(s.mode(), exp)
+
+        exp = Series(['2011-01-03', '2013-01-02'], dtype='M8[ns]')
+        s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
+                    '2011-01-03', '2013-01-02'], dtype='M8[ns]')
+        tm.assert_series_equal(s.mode(), exp)
+
+        # gh-5986: Test timedelta types.
+        exp = Series([], dtype='timedelta64[ns]')
+        s = Series(['1 days', '-1 days', '0 days'],
+                   dtype='timedelta64[ns]')
+        tm.assert_series_equal(s.mode(), exp)
 
+        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
         s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
-                    '2 min', '2 min'],
-                   dtype='timedelta64[ns]')
-        assert_series_equal(s.mode(), Series(['2 min', '1 day'],
-                                             dtype='timedelta64[ns]'))
+                    '2 min', '2 min'], dtype='timedelta64[ns]')
+        tm.assert_series_equal(s.mode(), exp)
+
+        # Test mixed dtype.
+        exp = Series(['foo'])
+        s = Series([1, 'foo', 'foo'])
+        tm.assert_series_equal(s.mode(), exp)
+
+        # Test for uint64 overflow.
+        exp = Series([2**63], dtype=np.uint64)
+        s = Series([1, 2**63, 2**63], dtype=np.uint64)
+        tm.assert_series_equal(s.mode(), exp)
+
+        exp = Series([], dtype=np.uint64)
+        s = Series([1, 2**63], dtype=np.uint64)
+        tm.assert_series_equal(s.mode(), exp)
+
+        # Test category dtype.
+        c = Categorical([1, 2])
+        exp = Categorical([], categories=[1, 2])
+        exp = Series(exp, dtype='category')
+        tm.assert_series_equal(Series(c).mode(), exp)
+
+        c = Categorical([1, 'a', 'a'])
+        exp = Categorical(['a'], categories=[1, 'a'])
+        exp = Series(exp, dtype='category')
+        tm.assert_series_equal(Series(c).mode(), exp)
+
+        c = Categorical([1, 1, 2, 3, 3])
+        exp = Categorical([1, 3], categories=[1, 2, 3])
+        exp = Series(exp, dtype='category')
+        tm.assert_series_equal(Series(c).mode(), exp)
 
     def test_prod(self):
         self._check_stat_op('prod', np.prod)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 7f1745edbb816..e360089928000 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -365,6 +365,11 @@ def test_timedelta64_dtype_array_returned(self):
         tm.assert_numpy_array_equal(result, expected)
         self.assertEqual(result.dtype, expected.dtype)
 
+    def test_uint64_overflow(self):
+        s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64)
+        exp = np.array([1, 2, 2**63], dtype=np.uint64)
+        tm.assert_numpy_array_equal(algos.unique(s), exp)
+
 
 class TestIsin(tm.TestCase):
     _multiprocess_can_split_ = True
@@ -672,7 +677,9 @@ def test_numeric_object_likes(self):
                  np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j,
                            2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]),
                  np.array(['a', 'b', 'a', 'e', 'c',
-                           'b', 'd', 'a', 'e', 'f'], dtype=object)]
+                           'b', 'd', 'a', 'e', 'f'], dtype=object),
+                 np.array([1, 2**63, 1, 3**5, 10,
+                           2**63, 39, 1, 3**5, 7], dtype=np.uint64)]
 
         exp_first = np.array([False, False, True, False, False,
                               True, False, True, True, False])
@@ -1202,6 +1209,118 @@ def test_int64_add_overflow():
                                        b_mask=np.array([False, True]))
 
 
+class TestMode(tm.TestCase):
+
+    def test_no_mode(self):
+        exp = Series([], dtype=np.float64)
+        tm.assert_series_equal(algos.mode([]), exp)
+
+        exp = Series([], dtype=np.int)
+        tm.assert_series_equal(algos.mode([1]), exp)
+
+        exp = Series([], dtype=np.object)
+        tm.assert_series_equal(algos.mode(['a', 'b', 'c']), exp)
+
+    def test_number_mode(self):
+        exp_single = [1]
+        data_single = [1] * 5 + [2] * 3
+
+        exp_multi = [1, 3]
+        data_multi = [1] * 5 + [2] * 3 + [3] * 5
+
+        for dt in np.typecodes['AllInteger'] + np.typecodes['Float']:
+            s = Series(data_single, dtype=dt)
+            exp = Series(exp_single, dtype=dt)
+            tm.assert_series_equal(algos.mode(s), exp)
+
+            s = Series(data_multi, dtype=dt)
+            exp = Series(exp_multi, dtype=dt)
+            tm.assert_series_equal(algos.mode(s), exp)
+
+    def test_strobj_mode(self):
+        exp = ['b']
+        data = ['a'] * 2 + ['b'] * 3
+
+        s = Series(data, dtype='c')
+        exp = Series(exp, dtype='c')
+        tm.assert_series_equal(algos.mode(s), exp)
+
+        exp = ['bar']
+        data = ['foo'] * 2 + ['bar'] * 3
+
+        for dt in [str, object]:
+            s = Series(data, dtype=dt)
+            exp = Series(exp, dtype=dt)
+            tm.assert_series_equal(algos.mode(s), exp)
+
+    def test_datelike_mode(self):
+        exp = Series([], dtype="M8[ns]")
+        s = Series(['2011-01-03', '2013-01-02',
+                    '1900-05-03'], dtype='M8[ns]')
+        tm.assert_series_equal(algos.mode(s), exp)
+
+        exp = Series(['2011-01-03', '2013-01-02'], dtype='M8[ns]')
+        s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
+                    '2011-01-03', '2013-01-02'], dtype='M8[ns]')
+        tm.assert_series_equal(algos.mode(s), exp)
+
+    def test_timedelta_mode(self):
+        exp = Series([], dtype='timedelta64[ns]')
+        s = Series(['1 days', '-1 days', '0 days'],
+                   dtype='timedelta64[ns]')
+        tm.assert_series_equal(algos.mode(s), exp)
+
+        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
+        s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
+                    '2 min', '2 min'], dtype='timedelta64[ns]')
+        tm.assert_series_equal(algos.mode(s), exp)
+
+    def test_mixed_dtype(self):
+        exp = Series(['foo'])
+        s = Series([1, 'foo', 'foo'])
+        tm.assert_series_equal(algos.mode(s), exp)
+
+    def test_uint64_overflow(self):
+        exp = Series([2**63], dtype=np.uint64)
+        s = Series([1, 2**63, 2**63], dtype=np.uint64)
+        tm.assert_series_equal(algos.mode(s), exp)
+
+        exp = Series([], dtype=np.uint64)
+        s = Series([1, 2**63], dtype=np.uint64)
+        tm.assert_series_equal(algos.mode(s), exp)
+
+    def test_categorical(self):
+        c = Categorical([1, 2])
+        exp = Series([], dtype=np.int64)
+        tm.assert_series_equal(algos.mode(c), exp)
+
+        c = Categorical([1, 'a', 'a'])
+        exp = Series(['a'], dtype=object)
+        tm.assert_series_equal(algos.mode(c), exp)
+
+        c = Categorical([1, 1, 2, 3, 3])
+        exp = Series([1, 3], dtype=np.int64)
+        tm.assert_series_equal(algos.mode(c), exp)
+
+    def test_index(self):
+        idx = Index([1, 2, 3])
+        exp = Series([], dtype=np.int64)
+        tm.assert_series_equal(algos.mode(idx), exp)
+
+        idx = Index([1, 'a', 'a'])
+        exp = Series(['a'], dtype=object)
+        tm.assert_series_equal(algos.mode(idx), exp)
+
+        idx = Index([1, 1, 2, 3, 3])
+        exp = Series([1, 3], dtype=np.int64)
+        tm.assert_series_equal(algos.mode(idx), exp)
+
+        exp = Series(['2 min', '1 day'], dtype='timedelta64[ns]')
+        idx = Index(['1 day', '1 day', '-1 day', '-1 day 2 min',
+                     '2 min', '2 min'], dtype='timedelta64[ns]')
+        tm.assert_series_equal(algos.mode(idx), exp)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/types/api.py b/pandas/types/api.py
index 096dc2f84aa67..c809cb3614a8c 100644
--- a/pandas/types/api.py
+++ b/pandas/types/api.py
@@ -44,6 +44,8 @@
                      is_floating_dtype,
                      is_bool_dtype,
                      is_complex_dtype,
+                     is_signed_integer_dtype,
+                     is_unsigned_integer_dtype,
 
                      # like
                      is_re,
diff --git a/pandas/types/common.py b/pandas/types/common.py
index 06c8ef6e35cd7..96eb6d6968bfb 100644
--- a/pandas/types/common.py
+++ b/pandas/types/common.py
@@ -155,6 +155,18 @@ def is_integer_dtype(arr_or_dtype):
             not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
+def is_signed_integer_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return (issubclass(tipo, np.signedinteger) and
+            not issubclass(tipo, (np.datetime64, np.timedelta64)))
+
+
+def is_unsigned_integer_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return (issubclass(tipo, np.unsignedinteger) and
+            not issubclass(tipo, (np.datetime64, np.timedelta64)))
+
+
 def is_int64_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.int64)