diff --git a/doc/source/io.rst b/doc/source/io.rst
index baf684056e169..6e5d254d27b7f 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3558,6 +3558,13 @@ read and used to create a ``Categorical`` variable from them. Value labels can
 also be retrieved by the function ``variable_labels``, which requires data to be
 called before (see ``pandas.io.stata.StataReader``).
 
+The parameter ``convert_missing`` indicates whether missing value
+representations in Stata should be preserved.  If ``False`` (the default),
+missing values are represented as ``np.nan``.  If ``True``, missing values are
+represented using ``StataMissingValue`` objects, and columns containing missing
+values will have ``dtype`` set to ``object``.
+
+
 The StataReader supports .dta Formats 104, 105, 108, 113-115 and 117.
 Alternatively, the function :func:`~pandas.io.stata.read_stata` can be used
 
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
index 40a95ab103b0b..85f620fcd4b99 100644
--- a/doc/source/v0.15.0.txt
+++ b/doc/source/v0.15.0.txt
@@ -144,6 +144,11 @@ API changes
   strings must contain 244 or fewer characters.  Attempting to write Stata
   dta files with strings longer than 244 characters raises a ``ValueError``. (:issue:`7858`)
 
+- ``read_stata`` and ``StataReader`` can import missing data information into a
+  ``DataFrame`` by setting the argument ``convert_missing`` to ``True``. When
+  using this options, missing values are returned as ``StataMissingValue``
+  objects and columns containing missing values have ``object`` data type. (:issue:`8045`)
+
 - ``Index.isin`` now supports a ``level`` argument to specify which index level
   to use for membership tests (:issue:`7892`, :issue:`7890`)
 
@@ -414,6 +419,7 @@ Performance
 - Performance improvements in ``DatetimeIndex.__iter__`` to allow faster iteration (:issue:`7683`)
 - Performance improvements in ``Period`` creation (and ``PeriodIndex`` setitem) (:issue:`5155`)
 - Improvements in Series.transform for significant performance gains (revised) (:issue:`6496`)
+- Performance improvements in ``StataReader`` when reading large files (:issue:`8040`)
 
 
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 5b5ce3e59e16e..c9a3104eec3f0 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -9,7 +9,6 @@
 You can find more information on http://presbrey.mit.edu/PyDTA and
 http://statsmodels.sourceforge.net/devel/
 """
-# TODO: Fix this module so it can use cross-compatible zip, map, and range
 import numpy as np
 
 import sys
@@ -20,14 +19,16 @@
 from pandas.core.categorical import Categorical
 import datetime
 from pandas import compat
-from pandas.compat import long, lrange, lmap, lzip, text_type, string_types
+from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \
+    zip
 from pandas import isnull
 from pandas.io.common import get_filepath_or_buffer
 from pandas.lib import max_len_string_array, is_string_array
 from pandas.tslib import NaT
 
 def read_stata(filepath_or_buffer, convert_dates=True,
-               convert_categoricals=True, encoding=None, index=None):
+               convert_categoricals=True, encoding=None, index=None,
+               convert_missing=False):
     """
     Read Stata file into DataFrame
 
@@ -44,10 +45,19 @@ def read_stata(filepath_or_buffer, convert_dates=True,
         support unicode. None defaults to cp1252.
     index : identifier of index column
         identifier of column that should be used as index of the DataFrame
+    convert_missing : boolean, defaults to False
+        Flag indicating whether to convert missing values to their Stata
+        representations.  If False, missing values are replaced with nans.
+        If True, columns containing missing values are returned with
+        object data types and missing values are represented by
+        StataMissingValue objects.
     """
     reader = StataReader(filepath_or_buffer, encoding)
 
-    return reader.data(convert_dates, convert_categoricals, index)
+    return reader.data(convert_dates,
+                       convert_categoricals,
+                       index,
+                       convert_missing)
 
 _date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"]
 
@@ -291,35 +301,76 @@ class StataMissingValue(StringMixin):
 
     Parameters
     -----------
-    offset
-    value
+    value : int8, int16, int32, float32 or float64
+        The Stata missing value code
 
     Attributes
     ----------
-    string
-    value
+    string : string
+        String representation of the Stata missing value
+    value : int8, int16, int32, float32 or float64
+        The original encoded missing value
 
     Notes
     -----
     More information: <http://www.stata.com/help.cgi?missing>
+
+    Integer missing values make the code '.', '.a', ..., '.z' to the ranges
+    101 ... 127 (for int8), 32741 ... 32767  (for int16) and 2147483621 ...
+    2147483647 (for int32).  Missing values for floating point data types are
+    more complex but the pattern is simple to discern from the following table.
+
+    np.float32 missing values (float in Stata)
+    0000007f    .
+    0008007f    .a
+    0010007f    .b
+    ...
+    00c0007f    .x
+    00c8007f    .y
+    00d0007f    .z
+
+    np.float64 missing values (double in Stata)
+    000000000000e07f    .
+    000000000001e07f    .a
+    000000000002e07f    .b
+    ...
+    000000000018e07f    .x
+    000000000019e07f    .y
+    00000000001ae07f    .z
     """
-    # TODO: Needs test
-    def __init__(self, offset, value):
+
+    # Construct a dictionary of missing values
+    MISSING_VALUES = {}
+    bases = (101, 32741, 2147483621)
+    for b in bases:
+        MISSING_VALUES[b] = '.'
+        for i in range(1, 27):
+            MISSING_VALUES[i + b] = '.' + chr(96 + i)
+
+    base = b'\x00\x00\x00\x7f'
+    increment = struct.unpack('<i', b'\x00\x08\x00\x00')[0]
+    for i in range(27):
+        value = struct.unpack('<f', base)[0]
+        MISSING_VALUES[value] = '.'
+        if i > 0:
+            MISSING_VALUES[value] += chr(96 + i)
+        int_value = struct.unpack('<i', struct.pack('<f', value))[0] + increment
+        base = struct.pack('<i', int_value)
+
+    base = b'\x00\x00\x00\x00\x00\x00\xe0\x7f'
+    increment = struct.unpack('q', b'\x00\x00\x00\x00\x00\x01\x00\x00')[0]
+    for i in range(27):
+        value = struct.unpack('<d', base)[0]
+        MISSING_VALUES[value] = '.'
+        if i > 0:
+            MISSING_VALUES[value] += chr(96 + i)
+        int_value = struct.unpack('q', struct.pack('<d', value))[0] + increment
+        base = struct.pack('q', int_value)
+
+    def __init__(self, value):
         self._value = value
-        value_type = type(value)
-        if value_type in int:
-            loc = value - offset
-        elif value_type in (float, np.float32, np.float64):
-            if value <= np.finfo(np.float32).max:  # float32
-                conv_str, byte_loc, scale = '<f', 1, 8
-            else:
-                conv_str, byte_loc, scale = '<d', 5, 1
-            value_bytes = struct.pack(conv_str, value)
-            loc = (struct.unpack('<b', value_bytes[byte_loc])[0] / scale) + 0
-        else:
-            # Should never be hit
-            loc = 0
-        self._str = loc is 0 and '.' or ('.' + chr(loc + 96))
+        self._str = self.MISSING_VALUES[value]
+
     string = property(lambda self: self._str,
                       doc="The Stata representation of the missing value: "
                           "'.', '.a'..'.z'")
@@ -333,6 +384,10 @@ def __repr__(self):
         # not perfect :-/
         return "%s(%s)" % (self.__class__, self)
 
+    def __eq__(self, other):
+        return (isinstance(other, self.__class__)
+                and self.string == other.string and self.value == other.value)
+
 
 class StataParser(object):
     _default_encoding = 'cp1252'
@@ -711,15 +766,7 @@ def _col_size(self, k=None):
             return self.col_sizes[k]
 
     def _unpack(self, fmt, byt):
-        d = struct.unpack(self.byteorder + fmt, byt)[0]
-        if fmt[-1] in self.VALID_RANGE:
-            nmin, nmax = self.VALID_RANGE[fmt[-1]]
-            if d < nmin or d > nmax:
-                if self._missing_values:
-                    return StataMissingValue(nmax, d)
-                else:
-                    return None
-        return d
+        return struct.unpack(self.byteorder + fmt, byt)[0]
 
     def _null_terminate(self, s):
         if compat.PY3 or self._encoding is not None:  # have bytes not strings,
@@ -752,16 +799,15 @@ def _next(self):
                     )
             return data
         else:
-            return list(
-                map(
+            return lmap(
                     lambda i: self._unpack(typlist[i],
                                            self.path_or_buf.read(
                                                self._col_size(i)
                                            )),
                     range(self.nvar)
-                )
             )
 
+
     def _dataset(self):
         """
         Returns a Python generator object for iterating over the dataset.
@@ -853,7 +899,8 @@ def _read_strls(self):
             self.GSO[v_o] = self.path_or_buf.read(length-1)
             self.path_or_buf.read(1)  # zero-termination
 
-    def data(self, convert_dates=True, convert_categoricals=True, index=None):
+    def data(self, convert_dates=True, convert_categoricals=True, index=None,
+             convert_missing=False):
         """
         Reads observations from Stata file, converting them into a dataframe
 
@@ -866,11 +913,18 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None):
             variables
         index : identifier of index column
             identifier of column that should be used as index of the DataFrame
+        convert_missing : boolean, defaults to False
+            Flag indicating whether to convert missing values to their Stata
+            representation.  If False, missing values are replaced with
+            nans.  If True, columns containing missing values are returned with
+            object data types and missing values are represented by
+            StataMissingValue objects.
 
         Returns
         -------
         y : DataFrame instance
         """
+        self._missing_values = convert_missing
         if self._data_read:
             raise Exception("Data has already been read.")
         self._data_read = True
@@ -894,18 +948,62 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None):
         if convert_categoricals:
             self._read_value_labels()
 
+        # TODO: Refactor to use a dictionary constructor and the correct dtype from the start?
         if len(data)==0:
             data = DataFrame(columns=self.varlist, index=index)
         else:
             data = DataFrame(data, columns=self.varlist, index=index)
 
         cols_ = np.where(self.dtyplist)[0]
+
+        # Convert columns (if needed) to match input type
+        index = data.index
+        requires_type_conversion = False
+        data_formatted = []
         for i in cols_:
             if self.dtyplist[i] is not None:
                 col = data.columns[i]
-                if data[col].dtype is not np.dtype(object):
-                    data[col] = Series(data[col], data[col].index,
-                                       self.dtyplist[i])
+                dtype = data[col].dtype
+                if (dtype != np.dtype(object)) and (dtype != self.dtyplist[i]):
+                    requires_type_conversion = True
+                    data_formatted.append((col, Series(data[col], index, self.dtyplist[i])))
+                else:
+                    data_formatted.append((col, data[col]))
+        if requires_type_conversion:
+            data = DataFrame.from_items(data_formatted)
+        del data_formatted
+
+        # Check for missing values, and replace if found
+        for i, colname in enumerate(data):
+            fmt = self.typlist[i]
+            if fmt not in self.VALID_RANGE:
+                continue
+
+            nmin, nmax = self.VALID_RANGE[fmt]
+            series = data[colname]
+            missing = np.logical_or(series < nmin, series > nmax)
+
+            if not missing.any():
+                continue
+
+            if self._missing_values:  # Replacement follows Stata notation
+                missing_loc = np.argwhere(missing)
+                umissing, umissing_loc = np.unique(series[missing],
+                                                   return_inverse=True)
+                replacement = Series(series, dtype=np.object)
+                for i, um in enumerate(umissing):
+                    missing_value = StataMissingValue(um)
+
+                    loc = missing_loc[umissing_loc == i]
+                    replacement.iloc[loc] = missing_value
+            else:  # All replacements are identical
+                dtype = series.dtype
+                if dtype not in (np.float32, np.float64):
+                    dtype = np.float64
+                replacement = Series(series, dtype=dtype)
+                replacement[missing] = np.nan
+
+            data[colname] = replacement
 
         if convert_dates:
             cols = np.where(lmap(lambda x: x in _date_formats,
diff --git a/pandas/io/tests/data/stata8_113.dta b/pandas/io/tests/data/stata8_113.dta
new file mode 100644
index 0000000000000..9b0831746025e
Binary files /dev/null and b/pandas/io/tests/data/stata8_113.dta differ
diff --git a/pandas/io/tests/data/stata8_115.dta b/pandas/io/tests/data/stata8_115.dta
new file mode 100644
index 0000000000000..bb78368b3462b
Binary files /dev/null and b/pandas/io/tests/data/stata8_115.dta differ
diff --git a/pandas/io/tests/data/stata8_117.dta b/pandas/io/tests/data/stata8_117.dta
new file mode 100644
index 0000000000000..fcfa7abd7b0d9
Binary files /dev/null and b/pandas/io/tests/data/stata8_117.dta differ
diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
index 459a1fe6c0e89..9d630bf83ced7 100644
--- a/pandas/io/tests/test_stata.py
+++ b/pandas/io/tests/test_stata.py
@@ -5,16 +5,18 @@
 import os
 import warnings
 import nose
+import struct
 import sys
 from distutils.version import LooseVersion
 
 import numpy as np
 
 import pandas as pd
+from pandas.compat import iterkeys
 from pandas.core.frame import DataFrame, Series
 from pandas.io.parsers import read_csv
 from pandas.io.stata import (read_stata, StataReader, InvalidColumnName,
-    PossiblePrecisionLoss)
+    PossiblePrecisionLoss, StataMissingValue)
 import pandas.util.testing as tm
 from pandas.util.misc import is_little_endian
 from pandas import compat
@@ -71,6 +73,10 @@ def setUp(self):
         self.dta16_115 = os.path.join(self.dirpath, 'stata7_115.dta')
         self.dta16_117 = os.path.join(self.dirpath, 'stata7_117.dta')
 
+        self.dta17_113 = os.path.join(self.dirpath, 'stata8_113.dta')
+        self.dta17_115 = os.path.join(self.dirpath, 'stata8_115.dta')
+        self.dta17_117 = os.path.join(self.dirpath, 'stata8_117.dta')
+
     def read_dta(self, file):
         return read_stata(file, convert_dates=True)
 
@@ -589,6 +595,50 @@ def test_excessively_long_string(self):
             with tm.ensure_clean() as path:
                 original.to_stata(path)
 
+    def test_missing_value_generator(self):
+        types = ('b','h','l')
+        df = DataFrame([[0.0]],columns=['float_'])
+        with tm.ensure_clean() as path:
+            df.to_stata(path)
+            valid_range = StataReader(path).VALID_RANGE
+        expected_values = ['.' + chr(97 + i) for i in range(26)]
+        expected_values.insert(0, '.')
+        for t in types:
+            offset = valid_range[t][1]
+            for i in range(0,27):
+                val = StataMissingValue(offset+1+i)
+                self.assertTrue(val.string == expected_values[i])
+
+        # Test extremes for floats
+        val = StataMissingValue(struct.unpack('<f',b'\x00\x00\x00\x7f')[0])
+        self.assertTrue(val.string == '.')
+        val = StataMissingValue(struct.unpack('<f',b'\x00\xd0\x00\x7f')[0])
+        self.assertTrue(val.string == '.z')
+
+        # Test extremes for floats
+        val = StataMissingValue(struct.unpack('<d',b'\x00\x00\x00\x00\x00\x00\xe0\x7f')[0])
+        self.assertTrue(val.string == '.')
+        val = StataMissingValue(struct.unpack('<d',b'\x00\x00\x00\x00\x00\x1a\xe0\x7f')[0])
+        self.assertTrue(val.string == '.z')
+
+    def test_missing_value_conversion(self):
+        columns = ['int8_', 'int16_', 'int32_', 'float32_', 'float64_']
+        smv = StataMissingValue(101)
+        keys = [key for key in iterkeys(smv.MISSING_VALUES)]
+        keys.sort()
+        data = []
+        for i in range(27):
+            row = [StataMissingValue(keys[i+(j*27)]) for j in range(5)]
+            data.append(row)
+        expected = DataFrame(data,columns=columns)
+
+        parsed_113 = read_stata(self.dta17_113, convert_missing=True)
+        parsed_115 = read_stata(self.dta17_115, convert_missing=True)
+        parsed_117 = read_stata(self.dta17_117, convert_missing=True)
+
+        tm.assert_frame_equal(expected, parsed_113)
+        tm.assert_frame_equal(expected, parsed_115)
+        tm.assert_frame_equal(expected, parsed_117)
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/vb_suite/packers.py b/vb_suite/packers.py
index 40227b3c9bc48..cb933746bef83 100644
--- a/vb_suite/packers.py
+++ b/vb_suite/packers.py
@@ -121,3 +121,25 @@ def remove(f):
 packers_write_json_date_index = Benchmark("df.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date)
 setup = setup + setup_int_index
 packers_write_json = Benchmark("df.to_json(f,orient='split')", setup, cleanup="remove(f)", start_date=start_date)
+
+#----------------------------------------------------------------------
+# stata
+
+setup = common_setup + """
+df.to_stata(f, {'index': 'tc'})
+"""
+packers_read_stata = Benchmark("pd.read_stata(f)", setup, start_date=start_date)
+
+packers_write_stata = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date)
+
+setup = common_setup + """
+df['int8_'] = [randint(-127,100) for _ in range(N)]
+df['int16_'] = [randint(-127,100) for _ in range(N)]
+df['int32_'] = [randint(-127,100) for _ in range(N)]
+df['float32_'] = np.array(randn(N), dtype=np.float32)
+df.to_stata(f, {'index': 'tc'})
+"""
+
+packers_read_stata_with_int = Benchmark("pd.read_stata(f)", setup, start_date=start_date)
+
+packers_write_stata_with_int = Benchmark("df.to_stata(f, {'index': 'tc'})", setup, cleanup="remove(f)", start_date=start_date)