diff --git a/asv_bench/benchmarks/parser_vb.py b/asv_bench/benchmarks/parser_vb.py
index 04f25034638cd..6dc8bffd6dac9 100644
--- a/asv_bench/benchmarks/parser_vb.py
+++ b/asv_bench/benchmarks/parser_vb.py
@@ -114,6 +114,27 @@ def teardown(self):
         os.remove('test.csv')
 
 
+class read_csv_categorical(object):
+    goal_time = 0.2
+
+    def setup(self):
+        N = 100000
+        group1 = ['aaaaaaaa', 'bbbbbbb', 'cccccccc', 'dddddddd', 'eeeeeeee']
+        df = DataFrame({'a': np.random.choice(group1, N).astype('object'),
+                        'b': np.random.choice(group1, N).astype('object'),
+                        'c': np.random.choice(group1, N).astype('object')})
+        df.to_csv('strings.csv', index=False)
+
+    def time_read_csv_categorical_post(self):
+        read_csv('strings.csv').apply(pd.Categorical)
+
+    def time_read_csv_categorical_direct(self):
+        read_csv('strings.csv', dtype='category')
+
+    def teardown(self):
+        os.remove('strings.csv')
+
+
 class read_table_multiple_date(object):
     goal_time = 0.2
 
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 2866371cce61a..7917e6b4cdfce 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -500,6 +500,43 @@ worth trying.
    data that was read in. It is important to note that the overall column will be
    marked with a ``dtype`` of ``object``, which is used for columns with mixed dtypes.
 
+.. _io.categorical:
+
+Specifying Categorical dtype
+''''''''''''''''''''''''''''
+
+.. versionadded:: 0.19.0
+
+``Categorical`` columns can be parsed directly by specifying ``dtype='category'``
+
+.. ipython:: python
+
+   data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+
+   pd.read_csv(StringIO(data))
+   pd.read_csv(StringIO(data)).dtypes
+   pd.read_csv(StringIO(data), dtype='category').dtypes
+
+Individual columns can be parsed as a ``Categorical`` using a dict specification
+
+.. ipython:: python
+
+   pd.read_csv(StringIO(data), dtype={'col1': 'category'}).dtypes
+
+.. note::
+
+   The resulting categories will always be parsed as strings (object dtype).
+   If the categories are numeric they can be converted using the
+   :func:`to_numeric` function, or as appropriate, another converter
+   such as :func:`to_datetime`.
+
+   .. ipython:: python
+
+      df = pd.read_csv(StringIO(data), dtype='category')
+      df.dtypes
+      df['col3']
+      df['col3'].cat.categories = pd.to_numeric(df['col3'].cat.categories)
+      df['col3']
 
 
 Naming and Using Columns
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 59a106291dad8..6c995a6989a38 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -12,6 +12,7 @@ Highlights include:
 - :func:`merge_asof` for asof-style time-series joining, see :ref:`here <whatsnew_0190.enhancements.asof_merge>`
 - ``.rolling()`` are now time-series aware, see :ref:`here <whatsnew_0190.enhancements.rolling_ts>`
 - pandas development api, see :ref:`here <whatsnew_0190.dev_api>`
+- :func:`read_csv` now supports parsing ``Categorical`` data, see :ref:`here <whatsnew_0190.enhancements.read_csv_categorical>`
 
 .. contents:: What's new in v0.19.0
     :local:
@@ -195,6 +196,14 @@ default of the index) in a DataFrame.
 :func:`read_csv` has improved support for duplicate column names
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+.. ipython:: python
+   :suppress:
+
+   from pandas.compat import StringIO
+
+.. _whatsnew_0190.enhancements.read_csv_dupe_col_names_support:
+
+
 :ref:`Duplicate column names <io.dupe_names>` are now supported in :func:`read_csv` whether
 they are in the file or passed in as the ``names`` parameter (:issue:`7160`, :issue:`9424`)
 
@@ -222,6 +231,46 @@ New behaviour:
 
    In [2]: pd.read_csv(StringIO(data), names=names)
 
+
+.. _whatsnew_0190.enhancements.read_csv_categorical:
+
+:func:`read_csv` supports parsing ``Categorical`` directly
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The :func:`read_csv` function now supports parsing a ``Categorical`` column when
+specified as a dtype (:issue:`10153`).  Depending on the structure of the data,
+this can result in a faster parse time and lower memory usage compared to
+converting to ``Categorical`` after parsing.  See the io :ref:`docs here <io.categorical>`
+
+.. ipython:: python
+
+   data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+
+   pd.read_csv(StringIO(data))
+   pd.read_csv(StringIO(data)).dtypes
+   pd.read_csv(StringIO(data), dtype='category').dtypes
+
+Individual columns can be parsed as a ``Categorical`` using a dict specification
+
+.. ipython:: python
+
+   pd.read_csv(StringIO(data), dtype={'col1': 'category'}).dtypes
+
+.. note::
+
+   The resulting categories will always be parsed as strings (object dtype).
+   If the categories are numeric they can be converted using the
+   :func:`to_numeric` function, or as appropriate, another converter
+   such as :func:`to_datetime`.
+
+   .. ipython:: python
+
+      df = pd.read_csv(StringIO(data), dtype='category')
+      df.dtypes
+      df['col3']
+      df['col3'].cat.categories = pd.to_numeric(df['col3'].cat.categories)
+      df['col3']
+
 .. _whatsnew_0190.enhancements.semi_month_offsets:
 
 Semi-Month Offsets
diff --git a/pandas/io/tests/parser/c_parser_only.py b/pandas/io/tests/parser/c_parser_only.py
index 103c9fa2b7ce8..4cea9e1d6b595 100644
--- a/pandas/io/tests/parser/c_parser_only.py
+++ b/pandas/io/tests/parser/c_parser_only.py
@@ -12,9 +12,10 @@
 
 import pandas as pd
 import pandas.util.testing as tm
-from pandas import DataFrame, Series, Index, MultiIndex
+from pandas import DataFrame, Series, Index, MultiIndex, Categorical
 from pandas import compat
 from pandas.compat import StringIO, range, lrange
+from pandas.types.dtypes import CategoricalDtype
 
 
 class CParserTests(object):
@@ -135,6 +136,11 @@ def test_passing_dtype(self):
                               dtype={'A': 'timedelta64', 'B': 'float64'},
                               index_col=0)
 
+            # valid but unsupported - fixed width unicode string
+            self.assertRaises(TypeError, self.read_csv, path,
+                              dtype={'A': 'U8'},
+                              index_col=0)
+
         # see gh-12048: empty frame
         actual = self.read_csv(StringIO('A,B'), dtype=str)
         expected = DataFrame({'A': [], 'B': []}, index=[], dtype=str)
@@ -184,6 +190,92 @@ def test_pass_dtype(self):
         self.assertEqual(result['one'].dtype, 'u1')
         self.assertEqual(result['two'].dtype, 'object')
 
+    def test_categorical_dtype(self):
+        # GH 10153
+        data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['a', 'a', 'b']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype=CategoricalDtype())
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={'a': 'category',
+                                                      'b': 'category',
+                                                      'c': CategoricalDtype()})
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={'b': 'category'})
+        expected = pd.DataFrame({'a': [1, 1, 2],
+                                 'b': Categorical(['a', 'a', 'b']),
+                                 'c': [3.4, 3.4, 4.5]})
+        tm.assert_frame_equal(actual, expected)
+
+        actual = self.read_csv(StringIO(data), dtype={1: 'category'})
+        tm.assert_frame_equal(actual, expected)
+
+        # unsorted
+        data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['b', 'b', 'a']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+        # missing
+        data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+        expected = pd.DataFrame({'a': Categorical(['1', '1', '2']),
+                                 'b': Categorical(['b', np.nan, 'a']),
+                                 'c': Categorical(['3.4', '3.4', '4.5'])})
+        actual = self.read_csv(StringIO(data), dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_encoding(self):
+        # GH 10153
+        pth = tm.get_data_path('unicode_series.csv')
+        encoding = 'latin-1'
+        expected = self.read_csv(pth, header=None, encoding=encoding)
+        expected[1] = Categorical(expected[1])
+        actual = self.read_csv(pth, header=None, encoding=encoding,
+                               dtype={1: 'category'})
+        tm.assert_frame_equal(actual, expected)
+
+        pth = tm.get_data_path('utf16_ex.txt')
+        encoding = 'utf-16'
+        expected = self.read_table(pth, encoding=encoding)
+        expected = expected.apply(Categorical)
+        actual = self.read_table(pth, encoding=encoding, dtype='category')
+        tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_chunksize(self):
+        # GH 10153
+        data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+        expecteds = [pd.DataFrame({'a': [1, 1],
+                                   'b': Categorical(['a', 'b'])}),
+                     pd.DataFrame({'a': [1, 2],
+                                   'b': Categorical(['b', 'c'])},
+                                  index=[2, 3])]
+        actuals = self.read_csv(StringIO(data), dtype={'b': 'category'},
+                                chunksize=2)
+
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
     def test_pass_dtype_as_recarray(self):
         if compat.is_platform_windows() and self.low_memory:
             raise nose.SkipTest(
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index e72e2f90a5213..5af82be5b741b 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -25,6 +25,7 @@ cdef extern from "Python.h":
 cdef extern from "stdlib.h":
     void memcpy(void *dst, void *src, size_t n)
 
+cimport cython
 cimport numpy as cnp
 
 from numpy cimport ndarray, uint8_t, uint64_t
@@ -33,6 +34,15 @@ import numpy as np
 cimport util
 
 import pandas.lib as lib
+from pandas.types.common import (is_categorical_dtype, CategoricalDtype,
+                                 is_integer_dtype, is_float_dtype,
+                                 is_bool_dtype, is_object_dtype,
+                                 is_string_dtype, is_datetime64_dtype,
+                                 pandas_dtype)
+from pandas.core.categorical import Categorical
+from pandas.core.algorithms import take_1d
+from pandas.types.concat import union_categoricals
+from pandas import Index
 
 import time
 import os
@@ -399,11 +409,12 @@ cdef class TextReader:
 
         self._set_quoting(quotechar, quoting)
 
-        # TODO: endianness just a placeholder?
+
+        dtype_order = ['int64', 'float64', 'bool', 'object']
         if quoting == QUOTE_NONNUMERIC:
-            self.dtype_cast_order = ['<f8', '<i8', '|b1', '|O8']
-        else:
-            self.dtype_cast_order = ['<i8', '<f8', '|b1', '|O8']
+            # consistent with csv module semantics, cast all to float
+            dtype_order = dtype_order[1:]
+        self.dtype_cast_order = [np.dtype(x) for x in dtype_order]
 
         if comment is not None:
             if len(comment) > 1:
@@ -472,15 +483,10 @@ cdef class TextReader:
         self.encoding = encoding
 
         if isinstance(dtype, dict):
-            conv = {}
-            for k in dtype:
-                v = dtype[k]
-                if isinstance(v, basestring):
-                    v = np.dtype(v)
-                conv[k] = v
-            dtype = conv
+            dtype = {k: pandas_dtype(dtype[k])
+                     for k in dtype}
         elif dtype is not None:
-            dtype = np.dtype(dtype)
+            dtype = pandas_dtype(dtype)
 
         self.dtype = dtype
 
@@ -689,6 +695,7 @@ cdef class TextReader:
             int status
             Py_ssize_t size
             char *errors = "strict"
+            cdef StringPath path = _string_path(self.c_encoding)
 
         header = []
 
@@ -718,20 +725,18 @@ cdef class TextReader:
                     field_count = self.parser.line_fields[hr]
                     start = self.parser.line_start[hr]
 
-                # TODO: Py3 vs. Py2
                 counts = {}
                 unnamed_count = 0
                 for i in range(field_count):
                     word = self.parser.words[start + i]
 
-                    if self.c_encoding == NULL and not PY3:
+                    if path == CSTRING:
                         name = PyBytes_FromString(word)
-                    else:
-                        if self.c_encoding == NULL or self.c_encoding == b'utf-8':
-                            name = PyUnicode_FromString(word)
-                        else:
-                            name = PyUnicode_Decode(word, strlen(word),
-                                                    self.c_encoding, errors)
+                    elif path == UTF8:
+                        name = PyUnicode_FromString(word)
+                    elif path == ENCODED:
+                        name = PyUnicode_Decode(word, strlen(word),
+                                                self.c_encoding, errors)
 
                     if name == '':
                         if self.has_mi_columns:
@@ -1076,17 +1081,12 @@ cdef class TextReader:
                     col_dtype = self.dtype[i]
             else:
                 if self.dtype.names:
-                    col_dtype = self.dtype.descr[i][1]
+                    # structured array
+                    col_dtype = np.dtype(self.dtype.descr[i][1])
                 else:
                     col_dtype = self.dtype
 
             if col_dtype is not None:
-                if not isinstance(col_dtype, basestring):
-                    if isinstance(col_dtype, np.dtype):
-                        col_dtype = col_dtype.str
-                    else:
-                        col_dtype = np.dtype(col_dtype).str
-
                 col_res, na_count = self._convert_with_dtype(col_dtype, i, start, end,
                                                              na_filter, 1, na_hashset, na_flist)
 
@@ -1104,7 +1104,7 @@ cdef class TextReader:
                         dt, i, start, end, na_filter, 0, na_hashset, na_flist)
                 except OverflowError:
                     col_res, na_count = self._convert_with_dtype(
-                        '|O8', i, start, end, na_filter, 0, na_hashset, na_flist)
+                        np.dtype('object'), i, start, end, na_filter, 0, na_hashset, na_flist)
 
                 if col_res is not None:
                     break
@@ -1136,90 +1136,88 @@ cdef class TextReader:
                              bint user_dtype,
                              kh_str_t *na_hashset,
                              object na_flist):
-        if dtype[1] == 'i' or dtype[1] == 'u':
-            result, na_count = _try_int64(self.parser, i, start, end,
-                                          na_filter, na_hashset)
+        if is_integer_dtype(dtype):
+            result, na_count = _try_int64(self.parser, i, start, end, na_filter,
+                                          na_hashset)
             if user_dtype and na_count is not None:
                 if na_count > 0:
                     raise ValueError("Integer column has NA values in "
-                                    "column {column}".format(column=i))
+                                     "column {column}".format(column=i))
 
-            if result is not None and dtype[1:] != 'i8':
+            if result is not None and dtype != 'int64':
                 result = result.astype(dtype)
 
             return result, na_count
 
-        elif dtype[1] == 'f':
+        elif is_float_dtype(dtype):
             result, na_count = _try_double(self.parser, i, start, end,
                                            na_filter, na_hashset, na_flist)
 
-            if result is not None and dtype[1:] != 'f8':
+            if result is not None and dtype != 'float64':
                 result = result.astype(dtype)
             return result, na_count
 
-        elif dtype[1] == 'b':
+        elif is_bool_dtype(dtype):
             result, na_count = _try_bool_flex(self.parser, i, start, end,
                                               na_filter, na_hashset,
                                               self.true_set, self.false_set)
             return result, na_count
-        elif dtype[1] == 'c':
-            raise NotImplementedError("the dtype %s is not supported for parsing" % dtype)
-
-        elif dtype[1] == 'S':
+        elif dtype.kind == 'S':
             # TODO: na handling
-            width = int(dtype[2:])
+            width = dtype.itemsize
             if width > 0:
                 result = _to_fw_string(self.parser, i, start, end, width)
                 return result, 0
 
             # treat as a regular string parsing
             return self._string_convert(i, start, end, na_filter,
-                                       na_hashset)
-        elif dtype[1] == 'U':
-            width = int(dtype[2:])
+                                        na_hashset)
+        elif dtype.kind == 'U':
+            width = dtype.itemsize
             if width > 0:
-                raise NotImplementedError("the dtype %s is not supported for parsing" % dtype)
+                raise TypeError("the dtype %s is not supported for parsing" % dtype)
 
             # unicode variable width
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
-
-
-        elif dtype[1] == 'O':
+        elif is_categorical_dtype(dtype):
+            codes, cats, na_count = _categorical_convert(self.parser, i, start,
+                                                         end, na_filter, na_hashset,
+                                                         self.c_encoding)
+            # sort categories and recode if necessary
+            cats = Index(cats)
+            if not cats.is_monotonic_increasing:
+                unsorted = cats.copy()
+                cats = cats.sort_values()
+                indexer = cats.get_indexer(unsorted)
+                codes = take_1d(indexer, codes, fill_value=-1)
+
+            return Categorical(codes, categories=cats, ordered=False,
+                               fastpath=True), na_count
+        elif is_object_dtype(dtype):
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
+        elif is_datetime64_dtype(dtype):
+            raise TypeError("the dtype %s is not supported for parsing, "
+                            "pass this column using parse_dates instead" % dtype)
         else:
-            if dtype[1] == 'M':
-                 raise TypeError("the dtype %s is not supported for parsing, "
-                                 "pass this column using parse_dates instead" % dtype)
             raise TypeError("the dtype %s is not supported for parsing" % dtype)
 
     cdef _string_convert(self, Py_ssize_t i, int start, int end,
                          bint na_filter, kh_str_t *na_hashset):
-        if PY3:
-            if self.c_encoding != NULL:
-                if self.c_encoding == b"utf-8":
-                    return _string_box_utf8(self.parser, i, start, end,
-                                            na_filter, na_hashset)
-                else:
-                    return _string_box_decode(self.parser, i, start, end,
-                                              na_filter, na_hashset,
-                                              self.c_encoding)
-            else:
-                return _string_box_utf8(self.parser, i, start, end,
-                                        na_filter, na_hashset)
-        else:
-            if self.c_encoding != NULL:
-                if self.c_encoding == b"utf-8":
-                    return _string_box_utf8(self.parser, i, start, end,
-                                            na_filter, na_hashset)
-                else:
-                    return _string_box_decode(self.parser, i, start, end,
-                                              na_filter, na_hashset,
-                                              self.c_encoding)
-            else:
-                return _string_box_factorize(self.parser, i, start, end,
-                                             na_filter, na_hashset)
+
+        cdef StringPath path = _string_path(self.c_encoding)
+
+        if path == UTF8:
+            return _string_box_utf8(self.parser, i, start, end, na_filter,
+                                    na_hashset)
+        elif path == ENCODED:
+            return _string_box_decode(self.parser, i, start, end,
+                                      na_filter, na_hashset, self.c_encoding)
+        elif path == CSTRING:
+            return _string_box_factorize(self.parser, i, start, end,
+                                         na_filter, na_hashset)
+
 
     def _get_converter(self, i, name):
         if self.converters is None:
@@ -1331,6 +1329,19 @@ def _maybe_upcast(arr):
 
     return arr
 
+cdef enum StringPath:
+     CSTRING
+     UTF8
+     ENCODED
+
+# factored out logic to pick string converter
+cdef inline StringPath _string_path(char *encoding):
+    if encoding != NULL and encoding != b"utf-8":
+        return ENCODED
+    elif PY3 or encoding != NULL:
+        return UTF8
+    else:
+        return CSTRING
 # ----------------------------------------------------------------------
 # Type conversions / inference support code
 
@@ -1500,6 +1511,77 @@ cdef _string_box_decode(parser_t *parser, int col,
 
     return result, na_count
 
+@cython.boundscheck(False)
+cdef _categorical_convert(parser_t *parser, int col,
+                          int line_start, int line_end,
+                          bint na_filter, kh_str_t *na_hashset,
+                          char *encoding):
+    "Convert column data into codes, categories"
+    cdef:
+        int error, na_count = 0
+        Py_ssize_t i, size
+        size_t lines
+        coliter_t it
+        const char *word = NULL
+
+        int64_t NA = -1
+        int64_t[:] codes
+        int64_t current_category = 0
+
+        char *errors = "strict"
+        cdef StringPath path = _string_path(encoding)
+
+        int ret = 0
+        kh_str_t *table
+        khiter_t k
+
+    lines = line_end - line_start
+    codes = np.empty(lines, dtype=np.int64)
+
+    # factorize parsed values, creating a hash table
+    # bytes -> category code
+    with nogil:
+        table = kh_init_str()
+        coliter_setup(&it, parser, col, line_start)
+
+        for i in range(lines):
+            COLITER_NEXT(it, word)
+
+            if na_filter:
+                k = kh_get_str(na_hashset, word)
+                # is in NA values
+                if k != na_hashset.n_buckets:
+                    na_count += 1
+                    codes[i] = NA
+                    continue
+
+            k = kh_get_str(table, word)
+            # not in the hash table
+            if k == table.n_buckets:
+                k = kh_put_str(table, word, &ret)
+                table.vals[k] = current_category
+                current_category += 1
+
+            codes[i] = table.vals[k]
+
+    # parse and box categories to python strings
+    result = np.empty(table.n_occupied, dtype=np.object_)
+    if path == ENCODED:
+        for k in range(table.n_buckets):
+            if kh_exist_str(table, k):
+                size = strlen(table.keys[k])
+                result[table.vals[k]] = PyUnicode_Decode(table.keys[k], size, encoding, errors)
+    elif path == UTF8:
+        for k in range(table.n_buckets):
+            if kh_exist_str(table, k):
+                result[table.vals[k]] = PyUnicode_FromString(table.keys[k])
+    elif path == CSTRING:
+        for k in range(table.n_buckets):
+            if kh_exist_str(table, k):
+                result[table.vals[k]] = PyBytes_FromString(table.keys[k])
+
+    kh_destroy_str(table)
+    return np.asarray(codes), result, na_count
 
 cdef _to_fw_string(parser_t *parser, int col, int line_start,
                    int line_end, size_t width):
@@ -1719,6 +1801,7 @@ cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start, int l
         const char *word = NULL
         khiter_t k
     na_count[0] = 0
+
     coliter_setup(&it, parser, col, line_start)
 
     if na_filter:
@@ -1836,6 +1919,7 @@ cdef inline int _try_bool_flex_nogil(parser_t *parser, int col, int line_start,
 
     return 0
 
+
 cdef kh_str_t* kset_from_list(list values) except NULL:
     # caller takes responsibility for freeing the hash table
     cdef:
@@ -1924,7 +2008,11 @@ def _concatenate_chunks(list chunks):
             common_type = np.find_common_type(dtypes, [])
             if common_type == np.object:
                 warning_columns.append(str(name))
-        result[name] = np.concatenate(arrs)
+
+        if is_categorical_dtype(dtypes.pop()):
+            result[name] = union_categoricals(arrs, sort_categories=True)
+        else:
+            result[name] = np.concatenate(arrs)
 
     if warning_columns:
         warning_names = ','.join(warning_columns)
diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py
index 225ba533161b3..e3cc60e2856c2 100644
--- a/pandas/tools/tests/test_concat.py
+++ b/pandas/tools/tests/test_concat.py
@@ -850,6 +850,9 @@ def test_union_categorical(self):
             ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
             ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
 
+            (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'],
+             ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']),
+
             (pd.date_range('2014-01-01', '2014-01-05'),
              pd.date_range('2014-01-06', '2014-01-07'),
              pd.date_range('2014-01-01', '2014-01-07')),