reef-technologies
diff --git a/‎appveyor.yml
+1-1 b/‎appveyor.yml
+1-1
diff --git a/‎ci/requirements-3.6_WIN.run
+2-2 b/‎ci/requirements-3.6_WIN.run
+2-2
diff --git a/‎doc/source/whatsnew/v0.21.1.txt
+6-3 b/‎doc/source/whatsnew/v0.21.1.txt
+6-3
diff --git a/‎doc/source/whatsnew/v0.22.0.txt
+1-1 b/‎doc/source/whatsnew/v0.22.0.txt
+1-1
diff --git a/‎pandas/_libs/index.pyx
+52-2 b/‎pandas/_libs/index.pyx
+52-2
diff --git a/‎pandas/_libs/index_class_helper.pxi.in
+4-1 b/‎pandas/_libs/index_class_helper.pxi.in
+4-1
diff --git a/‎pandas/_libs/parsers.pyx
+19-11 b/‎pandas/_libs/parsers.pyx
+19-11
diff --git a/‎pandas/core/frame.py
+1-1 b/‎pandas/core/frame.py
+1-1
diff --git a/‎pandas/core/indexes/category.py
+2-1 b/‎pandas/core/indexes/category.py
+2-1
diff --git a/‎pandas/core/indexes/datetimelike.py
+1-1 b/‎pandas/core/indexes/datetimelike.py
+1-1
diff --git a/‎pandas/core/indexes/datetimes.py
+4-2 b/‎pandas/core/indexes/datetimes.py
+4-2
diff --git a/‎pandas/core/indexes/period.py
+7-1 b/‎pandas/core/indexes/period.py
+7-1
diff --git a/‎pandas/core/indexes/timedeltas.py
+2-1 b/‎pandas/core/indexes/timedeltas.py
+2-1
diff --git a/‎pandas/core/reshape/merge.py
+5-3 b/‎pandas/core/reshape/merge.py
+5-3
diff --git a/‎pandas/io/formats/format.py
+1-1 b/‎pandas/io/formats/format.py
+1-1
diff --git a/‎pandas/io/parsers.py
+4-5 b/‎pandas/io/parsers.py
+4-5
diff --git a/‎pandas/tests/frame/test_to_csv.py
+13 b/‎pandas/tests/frame/test_to_csv.py
+13
@@ -22,7 +22,7 @@ environment:
       PYTHON_VERSION: "3.6"
       PYTHON_ARCH: "64"
       CONDA_PY: "36"
-      CONDA_NPY: "112"
+      CONDA_NPY: "113"
 
     - CONDA_ROOT: "C:\\Miniconda3_64"
       PYTHON_VERSION: "2.7"
 
@@ -1,12 +1,12 @@
 python-dateutil
 pytz
-numpy=1.12*
+numpy=1.13*
 bottleneck
 openpyxl
 xlsxwriter
 xlrd
 xlwt
-# scipy
+scipy
 feather-format
 numexpr
 pytables
 
@@ -57,6 +57,8 @@ Documentation Changes
 Bug Fixes
 ~~~~~~~~~
 - Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`)
+- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`)
+- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
 
 Conversion
 ^^^^^^^^^^
@@ -76,7 +78,8 @@ I/O
 ^^^
 
 - Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects.
-
+- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`)
+- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`)
 
 Plotting
 ^^^^^^^^
@@ -102,7 +105,7 @@ Sparse
 Reshaping
 ^^^^^^^^^
 
--
+- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`)
 -
 -
 
@@ -119,7 +122,7 @@ Categorical
 - Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`)
 - Error messages in the testing module have been improved when items have
   different ``CategoricalDtype`` (:issue:`18069`)
--
+- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
 
 Other
 ^^^^^
 
@@ -101,7 +101,7 @@ Conversion
 Indexing
 ^^^^^^^^
 
--
+- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`)
 -
 -
 
 
@@ -17,7 +17,7 @@ from tslib cimport _to_i8
 
 from hashtable cimport HashTable
 
-from pandas._libs import algos, hashtable as _hash
+from pandas._libs import algos, period as periodlib, hashtable as _hash
 from pandas._libs.tslib import Timestamp, Timedelta
 from datetime import datetime, timedelta
 
@@ -270,13 +270,16 @@ cdef class IndexEngine:
 
             values = self._get_index_values()
             self.mapping = self._make_hash_table(len(values))
-            self.mapping.map_locations(values)
+            self._call_map_locations(values)
 
             if len(self.mapping) == len(values):
                 self.unique = 1
 
         self.need_unique_check = 0
 
+    cpdef _call_map_locations(self, values):
+        self.mapping.map_locations(values)
+
     def clear_mapping(self):
         self.mapping = None
         self.need_monotonic_check = 1
@@ -490,6 +493,53 @@ cdef class TimedeltaEngine(DatetimeEngine):
     cdef _get_box_dtype(self):
         return 'm8[ns]'
 
+
+cdef class PeriodEngine(Int64Engine):
+
+    cdef _get_index_values(self):
+        return super(PeriodEngine, self).vgetter()
+
+    cpdef _call_map_locations(self, values):
+        super(PeriodEngine, self)._call_map_locations(values.view('i8'))
+
+    def _call_monotonic(self, values):
+        return super(PeriodEngine, self)._call_monotonic(values.view('i8'))
+
+    def get_indexer(self, values):
+        cdef ndarray[int64_t, ndim=1] ordinals
+
+        super(PeriodEngine, self)._ensure_mapping_populated()
+
+        freq = super(PeriodEngine, self).vgetter().freq
+        ordinals = periodlib.extract_ordinals(values, freq)
+
+        return self.mapping.lookup(ordinals)
+
+    def get_pad_indexer(self, other, limit=None):
+        freq = super(PeriodEngine, self).vgetter().freq
+        ordinal = periodlib.extract_ordinals(other, freq)
+
+        return algos.pad_int64(self._get_index_values(),
+                               np.asarray(ordinal), limit=limit)
+
+    def get_backfill_indexer(self, other, limit=None):
+        freq = super(PeriodEngine, self).vgetter().freq
+        ordinal = periodlib.extract_ordinals(other, freq)
+
+        return algos.backfill_int64(self._get_index_values(),
+                                    np.asarray(ordinal), limit=limit)
+
+    def get_indexer_non_unique(self, targets):
+        freq = super(PeriodEngine, self).vgetter().freq
+        ordinal = periodlib.extract_ordinals(targets, freq)
+        ordinal_array = np.asarray(ordinal)
+
+        return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array)
+
+    cdef _get_index_values_for_bool_indexer(self):
+        return self._get_index_values().view('i8')
+
+
 cpdef convert_scalar(ndarray arr, object value):
     # we don't turn integers
     # into datetimes/timedeltas
 
@@ -66,7 +66,7 @@ cdef class {{name}}Engine(IndexEngine):
             raise KeyError(val)
         {{endif}}
 
-        values = self._get_index_values()
+        values = self._get_index_values_for_bool_indexer()
         n = len(values)
 
         result = np.empty(n, dtype=bool)
@@ -86,6 +86,9 @@ cdef class {{name}}Engine(IndexEngine):
             return last_true
 
         return result
+
+    cdef _get_index_values_for_bool_indexer(self):
+        return self._get_index_values()
     {{endif}}
 
 {{endfor}}
@@ -374,6 +374,17 @@ cdef class TextReader:
                   float_precision=None,
                   skip_blank_lines=True):
 
+        # set encoding for native Python and C library
+        if encoding is not None:
+            if not isinstance(encoding, bytes):
+                encoding = encoding.encode('utf-8')
+            encoding = encoding.lower()
+            self.c_encoding = <char*> encoding
+        else:
+            self.c_encoding = NULL
+
+        self.encoding = encoding
+
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize
 
@@ -495,17 +506,6 @@ cdef class TextReader:
             self.parser.double_converter_nogil = NULL
             self.parser.double_converter_withgil = round_trip
 
-        # encoding
-        if encoding is not None:
-            if not isinstance(encoding, bytes):
-                encoding = encoding.encode('utf-8')
-            encoding = encoding.lower()
-            self.c_encoding = <char*> encoding
-        else:
-            self.c_encoding = NULL
-
-        self.encoding = encoding
-
         if isinstance(dtype, dict):
             dtype = {k: pandas_dtype(dtype[k])
                      for k in dtype}
@@ -684,6 +684,14 @@ cdef class TextReader:
             else:
                 raise ValueError('Unrecognized compression type: %s' %
                                  self.compression)
+
+            if b'utf-16' in (self.encoding or b''):
+                # we need to read utf-16 through UTF8Recoder.
+                # if source is utf-16, convert source to utf-8 by UTF8Recoder.
+                source = com.UTF8Recoder(source, self.encoding.decode('utf-8'))
+                self.encoding = b'utf-8'
+                self.c_encoding = <char*> self.encoding
+
             self.handle = source
 
         if isinstance(source, basestring):
 
@@ -5105,7 +5105,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
 
         >>> df = pd.DataFrame(columns=['A'])
         >>> for i in range(5):
-        ...     df = df.append({'A'}: i}, ignore_index=True)
+        ...     df = df.append({'A': i}, ignore_index=True)
         >>> df
            A
         0  0
 
@@ -79,7 +79,8 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
                 if data is not None or categories is None:
                     cls._scalar_data_error(data)
                 data = []
-            data = cls._create_categorical(cls, data, categories, ordered)
+            data = cls._create_categorical(cls, data, categories, ordered,
+                                           dtype)
 
         if copy:
             data = data.copy()
 
@@ -679,7 +679,7 @@ def __sub__(self, other):
                 return self._add_delta(-other)
             elif is_integer(other):
                 return self.shift(-other)
-            elif isinstance(other, datetime):
+            elif isinstance(other, (datetime, np.datetime64)):
                 return self._sub_datelike(other)
             elif isinstance(other, Period):
                 return self._sub_period(other)
 
@@ -29,6 +29,7 @@
 import pandas.core.dtypes.concat as _concat
 from pandas.errors import PerformanceWarning
 from pandas.core.common import _values_from_object, _maybe_box
+from pandas.core.algorithms import checked_add_with_arr
 
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.numeric import Int64Index, Float64Index
@@ -767,7 +768,7 @@ def _sub_datelike(self, other):
                 raise TypeError("DatetimeIndex subtraction must have the same "
                                 "timezones or no timezones")
             result = self._sub_datelike_dti(other)
-        elif isinstance(other, datetime):
+        elif isinstance(other, (datetime, np.datetime64)):
             other = Timestamp(other)
             if other is libts.NaT:
                 result = self._nat_new(box=False)
@@ -777,7 +778,8 @@ def _sub_datelike(self, other):
                                 "timezones or no timezones")
             else:
                 i8 = self.asi8
-                result = i8 - other.value
+                result = checked_add_with_arr(i8, -other.value,
+                                              arr_mask=self._isnan)
                 result = self._maybe_mask_results(result,
                                                   fill_value=libts.iNaT)
         else:
 
@@ -31,7 +31,7 @@
 import pandas.tseries.offsets as offsets
 
 from pandas._libs.lib import infer_dtype
-from pandas._libs import tslib, period
+from pandas._libs import tslib, period, index as libindex
 from pandas._libs.period import (Period, IncompatibleFrequency,
                                  get_period_field_arr, _validate_end_alias,
                                  _quarter_to_myear)
@@ -192,6 +192,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
 
     freq = None
 
+    _engine_type = libindex.PeriodEngine
+
     __eq__ = _period_index_cmp('__eq__')
     __ne__ = _period_index_cmp('__ne__', nat_result=True)
     __lt__ = _period_index_cmp('__lt__')
@@ -275,6 +277,10 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
         data = period.extract_ordinals(data, freq)
         return cls._from_ordinals(data, name=name, freq=freq)
 
+    @cache_readonly
+    def _engine(self):
+        return self._engine_type(lambda: self, len(self))
+
     @classmethod
     def _generate_range(cls, start, end, periods, freq, fields):
         if freq is not None:
 
@@ -361,7 +361,8 @@ def _add_datelike(self, other):
         else:
             other = Timestamp(other)
             i8 = self.asi8
-            result = checked_add_with_arr(i8, other.value)
+            result = checked_add_with_arr(i8, other.value,
+                                          arr_mask=self._isnan)
             result = self._maybe_mask_results(result, fill_value=iNaT)
         return DatetimeIndex(result, name=self.name, copy=False)
 
 
@@ -1253,10 +1253,12 @@ def _get_merge_keys(self):
          join_names) = super(_AsOfMerge, self)._get_merge_keys()
 
         # validate index types are the same
-        for lk, rk in zip(left_join_keys, right_join_keys):
+        for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)):
             if not is_dtype_equal(lk.dtype, rk.dtype):
-                raise MergeError("incompatible merge keys, "
-                                 "must be the same type")
+                raise MergeError("incompatible merge keys [{i}] {lkdtype} and "
+                                 "{rkdtype}, must be the same type"
+                                 .format(i=i, lkdtype=lk.dtype,
+                                         rkdtype=rk.dtype))
 
         # validate tolerance; must be a Timedelta if we have a DTI
         if self.tolerance is not None:
 
@@ -1695,7 +1695,7 @@ def _save_header(self):
             else:
                 encoded_labels = []
 
-        if not has_mi_columns:
+        if not has_mi_columns or has_aliases:
             encoded_labels += list(write_cols)
             writer.writerow(encoded_labels)
         else:
 
@@ -1431,7 +1431,6 @@ def ix(col):
             if not isinstance(col, compat.string_types):
                 return col
             raise ValueError('Index %s invalid' % col)
-        index = None
 
         to_remove = []
         index = []
@@ -1462,8 +1461,6 @@ def _get_name(icol):
                 if i == icol:
                     return c
 
-        index = None
-
         to_remove = []
         index = []
         for idx in self.index_col:
@@ -1484,7 +1481,7 @@ def _agg_index(self, index, try_parse_dates=True):
 
         for i, arr in enumerate(index):
 
-            if (try_parse_dates and self._should_parse_dates(i)):
+            if try_parse_dates and self._should_parse_dates(i):
                 arr = self._date_conv(arr)
 
             col_na_values = self.na_values
@@ -1671,7 +1668,9 @@ def __init__(self, src, **kwds):
 
         ParserBase.__init__(self, kwds)
 
-        if 'utf-16' in (kwds.get('encoding') or ''):
+        if (kwds.get('compression') is None
+           and 'utf-16' in (kwds.get('encoding') or '')):
+            # if source is utf-16 plain text, convert source to utf-8
             if isinstance(src, compat.string_types):
                 src = open(src, 'rb')
                 self.handles.append(src)
 
@@ -1203,3 +1203,16 @@ def test_period_index_date_overflow(self):
 
         expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n'
         assert result == expected
+
+    def test_multi_index_header(self):
+        # see gh-5539
+        columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2),
+                                             ("b", 1), ("b", 2)])
+        df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
+        df.columns = columns
+
+        header = ["a", "b", "c", "d"]
+        result = df.to_csv(header=header)
+
+        expected = ",a,b,c,d\n0,1,2,3,4\n1,5,6,7,8\n"
+        assert result == expected
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ Conversion`
`101`	`101`	`Indexing`
`102`	`102`	`^^^^^^^^`
`103`	`103`
`104`		`--`
	`104`	+- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`)
`105`	`105`	`-`
`106`	`106`	`-`
`107`	`107`
Original file line number	Diff line number	Diff line change
`@@ -5105,7 +5105,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):`
`5105`	`5105`
`5106`	`5106`	`>>> df = pd.DataFrame(columns=['A'])`
`5107`	`5107`	`>>> for i in range(5):`
`5108`		`- ... df = df.append({'A'}: i}, ignore_index=True)`
	`5108`	`+ ... df = df.append({'A': i}, ignore_index=True)`
`5109`	`5109`	`>>> df`
`5110`	`5110`	`A`
`5111`	`5111`	`0 0`