Merge branch 'master' of https://github.com/pandas-dev/pandas into libwriters

jbrockmendel · jbrockmendel · commit f77d7ddb542a · 2018-01-30T10:17:21.000-08:00
diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py
@@ -44,15 +44,15 @@ class Convert(object):
 
     goal_time = 0.5
     params = (['DataFrame', 'Series'], ['Timestamp', 'Timedelta'])
-    param_names = ['contructor', 'replace_data']
+    param_names = ['constructor', 'replace_data']
 
-    def setup(self, contructor, replace_data):
+    def setup(self, constructor, replace_data):
         N = 10**3
         data = {'Series': pd.Series(np.random.randint(N, size=N)),
                 'DataFrame': pd.DataFrame({'A': np.random.randint(N, size=N),
                                            'B': np.random.randint(N, size=N)})}
         self.to_replace = {i: getattr(pd, replace_data) for i in range(N)}
-        self.data = data[contructor]
+        self.data = data[constructor]
 
-    def time_replace(self, contructor, replace_data):
+    def time_replace(self, constructor, replace_data):
         self.data.replace(self.to_replace)
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -12,14 +12,14 @@ class Methods(object):
               ['int', 'float'],
               ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
                'sum', 'corr', 'cov'])
-    param_names = ['contructor', 'window', 'dtype', 'method']
+    param_names = ['constructor', 'window', 'dtype', 'method']
 
-    def setup(self, contructor, window, dtype, method):
+    def setup(self, constructor, window, dtype, method):
         N = 10**5
         arr = np.random.random(N).astype(dtype)
-        self.roll = getattr(pd, contructor)(arr).rolling(window)
+        self.roll = getattr(pd, constructor)(arr).rolling(window)
 
-    def time_rolling(self, contructor, window, dtype, method):
+    def time_rolling(self, constructor, window, dtype, method):
         getattr(self.roll, method)()
 
 
@@ -30,12 +30,12 @@ class Quantile(object):
               [10, 1000],
               ['int', 'float'],
               [0, 0.5, 1])
-    param_names = ['contructor', 'window', 'dtype', 'percentile']
+    param_names = ['constructor', 'window', 'dtype', 'percentile']
 
-    def setup(self, contructor, window, dtype, percentile):
+    def setup(self, constructor, window, dtype, percentile):
         N = 10**5
         arr = np.random.random(N).astype(dtype)
-        self.roll = getattr(pd, contructor)(arr).rolling(window)
+        self.roll = getattr(pd, constructor)(arr).rolling(window)
 
-    def time_quantile(self, contructor, window, dtype, percentile):
+    def time_quantile(self, constructor, window, dtype, percentile):
         self.roll.quantile(percentile)
diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run
@@ -13,7 +13,7 @@ lxml
 html5lib
 jinja2
 sqlalchemy
-pymysql
+pymysql<0.8.0
 feather-format
 pyarrow
 psycopg2
diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -154,7 +154,7 @@ Selection
    While standard Python / Numpy expressions for selecting and setting are
    intuitive and come in handy for interactive work, for production code, we
    recommend the optimized pandas data access methods, ``.at``, ``.iat``,
-   ``.loc``, ``.iloc`` and ``.ix``.
+   ``.loc`` and ``.iloc``.
 
 See the indexing documentation :ref:`Indexing and Selecting Data <indexing>` and :ref:`MultiIndex / Advanced Indexing <advanced>`.
 
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -2500,7 +2500,7 @@ Scalar introspection
 Extensions
 ----------
 
-These are primarily intented for library authors looking to extend pandas
+These are primarily intended for library authors looking to extend pandas
 objects.
 
 .. currentmodule:: pandas
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -2675,7 +2675,7 @@ file, and the ``sheet_name`` indicating which sheet to parse.
 +++++++++++++++++++
 
 To facilitate working with multiple sheets from the same file, the ``ExcelFile``
-class can be used to wrap the file and can be be passed into ``read_excel``
+class can be used to wrap the file and can be passed into ``read_excel``
 There will be a performance benefit for reading multiple sheets as the file is
 read into memory only once.
 
@@ -4537,7 +4537,7 @@ See the documentation for `pyarrow <http://arrow.apache.org/docs/python/>`__ and
 .. note::
 
    These engines are very similar and should read/write nearly identical parquet format files.
-   Currently ``pyarrow`` does not support timedelta data, and ``fastparquet`` does not support timezone aware datetimes (they are coerced to UTC).
+   Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes.
    These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library).
 
 .. ipython:: python
diff --git a/doc/sphinxext/numpydoc/tests/test_docscrape.py b/doc/sphinxext/numpydoc/tests/test_docscrape.py
@@ -42,7 +42,7 @@
   -------
   out : ndarray
       The drawn samples, arranged according to `shape`.  If the
-      shape given is (m,n,...), then the shape of `out` is is
+      shape given is (m,n,...), then the shape of `out` is
       (m,n,...,N).
 
       In other words, each entry ``out[i,j,...,:]`` is an N-dimensional
@@ -222,7 +222,7 @@ def test_str():
 -------
 out : ndarray
     The drawn samples, arranged according to `shape`.  If the
-    shape given is (m,n,...), then the shape of `out` is is
+    shape given is (m,n,...), then the shape of `out` is
     (m,n,...,N).
 
     In other words, each entry ``out[i,j,...,:]`` is an N-dimensional
@@ -340,7 +340,7 @@ def test_sphinx_str():
     **out** : ndarray
 
         The drawn samples, arranged according to `shape`.  If the
-        shape given is (m,n,...), then the shape of `out` is is
+        shape given is (m,n,...), then the shape of `out` is
         (m,n,...,N).
 
         In other words, each entry ``out[i,j,...,:]`` is an N-dimensional
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -897,7 +897,7 @@ class Timedelta(_Timedelta):
     Represents a duration, the difference between two dates or times.
 
     Timedelta is the pandas equivalent of python's ``datetime.timedelta``
-    and is interchangable with it in most cases.
+    and is interchangeable with it in most cases.
 
     Parameters
     ----------
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
@@ -295,7 +295,7 @@ cpdef bint tz_compare(object start, object end):
     timezones. For example
     `<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
     `<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
-    timezones but aren't evaluted such, but the string representation
+    timezones but aren't evaluated such, but the string representation
     for both of these is `'Europe/Paris'`.
 
     This exists only to add a notion of equality to pytz-style zones
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4115,7 +4115,7 @@ def combine(self, other, func, fill_value=None, overwrite=True):
                 series[this_mask] = fill_value
                 otherSeries[other_mask] = fill_value
 
-            # if we have different dtypes, possibily promote
+            # if we have different dtypes, possibly promote
             new_dtype = this_dtype
             if not is_dtype_equal(this_dtype, other_dtype):
                 new_dtype = find_common_type([this_dtype, other_dtype])
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -332,7 +332,7 @@ def freqstr(self):
     @cache_readonly
     def inferred_freq(self):
         """
-        Trys to return a string representing a frequency guess,
+        Tryies to return a string representing a frequency guess,
         generated by infer_freq.  Returns None if it can't autodetect the
         frequency.
         """
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -224,12 +224,17 @@ def make_block_scalar(self, values):
         """
         return ScalarBlock(values)
 
-    def make_block_same_class(self, values, placement=None, ndim=None):
+    def make_block_same_class(self, values, placement=None, ndim=None,
+                              dtype=None):
         """ Wrap given values in a block of same type as self. """
+        if dtype is not None:
+            # issue 19431 fastparquet is passing this
+            warnings.warn("dtype argument is deprecated, will be removed "
+                          "in a future release.", DeprecationWarning)
         if placement is None:
             placement = self.mgr_locs
         return make_block(values, placement=placement, ndim=ndim,
-                          klass=self.__class__)
+                          klass=self.__class__, dtype=dtype)
 
     def __unicode__(self):
 
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -120,7 +120,7 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None,
             if dtype is not None:
                 mgr = mgr.astype(dtype)
         else:
-            msg = ('SparseDataFrame called with unkown type "{data_type}" '
+            msg = ('SparseDataFrame called with unknown type "{data_type}" '
                    'for data argument')
             raise TypeError(msg.format(data_type=type(data).__name__))
 
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -493,7 +493,7 @@ def _set_value(self, label, value, takeable=False):
         values = self.to_dense()
 
         # if the label doesn't exist, we will create a new object here
-        # and possibily change the index
+        # and possibly change the index
         new_values = values._set_value(label, value, takeable=takeable)
         if new_values is not None:
             values = new_values
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -1395,7 +1395,7 @@ def _validate(data):
         elif isinstance(data, Index):
             # can't use ABCIndex to exclude non-str
 
-            # see scc/inferrence.pyx which can contain string values
+            # see src/inference.pyx which can contain string values
             allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
             if data.inferred_type not in allowed_types:
                 message = ("Can only use .str accessor with string values "
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
@@ -210,7 +210,7 @@ def _hash_categorical(c, encoding, hash_key):
 
     # we have uint64, as we don't directly support missing values
     # we don't want to use take_nd which will coerce to float
-    # instead, directly construt the result with a
+    # instead, directly construct the result with a
     # max(np.uint64) as the missing value indicator
     #
     # TODO: GH 15362
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1962,7 +1962,7 @@ def formatter(value):
     def get_result_as_array(self):
         """
         Returns the float values converted into strings using
-        the parameters given at initalisation, as a numpy array
+        the parameters given at initialisation, as a numpy array
         """
 
         if self.formatter is not None:
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -3763,7 +3763,7 @@ def write(self, **kwargs):
 class LegacyTable(Table):
 
     """ an appendable table: allow append/query/delete operations to a
-          (possibily) already existing appendable table this table ALLOWS
+          (possibly) already existing appendable table this table ALLOWS
           append (but doesn't require them), and stores the data in a format
           that can be easily searched
 
diff --git a/pandas/tests/categorical/test_constructors.py b/pandas/tests/categorical/test_constructors.py
@@ -382,7 +382,7 @@ def test_constructor_from_categorical_with_unknown_dtype(self):
                                ordered=True)
         tm.assert_categorical_equal(result, expected)
 
-    def test_contructor_from_categorical_string(self):
+    def test_constructor_from_categorical_string(self):
         values = Categorical(['a', 'b', 'd'])
         # use categories, ordered
         result = Categorical(values, categories=['a', 'b', 'c'], ordered=True,
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -543,7 +543,7 @@ def test_nested_dict_frame_constructor(self):
         tm.assert_frame_equal(result, df)
 
     def _check_basic_constructor(self, empty):
-        # mat: 2d matrix with shpae (3, 2) to input. empty - makes sized
+        # mat: 2d matrix with shape (3, 2) to input. empty - makes sized
         # objects
         mat = empty((2, 3), dtype=float)
         # 2-D input
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -285,6 +285,14 @@ def test_delete(self):
         with pytest.raises(Exception):
             newb.delete(3)
 
+    def test_make_block_same_class(self):
+        # issue 19431
+        block = create_block('M8[ns, US/Eastern]', [3])
+        with tm.assert_produces_warning(DeprecationWarning,
+                                        check_stacklevel=False):
+            block.make_block_same_class(block.values.values,
+                                        dtype=block.values.dtype)
+
 
 class TestDatetimeBlock(object):
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -2531,7 +2531,7 @@ def test_date_tz(self):
             [datetime(2013, 1, 1), pd.NaT], utc=True).format()
         assert formatted[0] == "2013-01-01 00:00:00+00:00"
 
-    def test_date_explict_date_format(self):
+    def test_date_explicit_date_format(self):
         formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format(
             date_format="%m-%d-%Y", na_rep="UT")
         assert formatted[0] == "02-01-2003"
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -71,6 +71,15 @@ def fp():
     return 'fastparquet'
 
 
+@pytest.fixture
+def fp_lt_014():
+    if not _HAVE_FASTPARQUET:
+        pytest.skip("fastparquet is not installed")
+    if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'):
+        pytest.skip("fastparquet is >= 0.1.4")
+    return 'fastparquet'
+
+
 @pytest.fixture
 def df_compat():
     return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'})
@@ -435,8 +444,10 @@ def test_basic(self, fp, df_full):
         df = df_full
 
         # additional supported types for fastparquet
+        if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'):
+            df['datetime_tz'] = pd.date_range('20130101', periods=3,
+                                              tz='US/Eastern')
         df['timedelta'] = pd.timedelta_range('1 day', periods=3)
-
         check_round_trip(df, fp)
 
     @pytest.mark.skip(reason="not supported")
@@ -468,14 +479,15 @@ def test_categorical(self, fp):
         df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
         check_round_trip(df, fp)
 
-    def test_datetime_tz(self, fp):
-        # doesn't preserve tz
+    def test_datetime_tz(self, fp_lt_014):
+
+        # fastparquet<0.1.4 doesn't preserve tz
         df = pd.DataFrame({'a': pd.date_range('20130101', periods=3,
                                               tz='US/Eastern')})
-
         # warns on the coercion
         with catch_warnings(record=True):
-            check_round_trip(df, fp, expected=df.astype('datetime64[ns]'))
+            check_round_trip(df, fp_lt_014,
+                             expected=df.astype('datetime64[ns]'))
 
     def test_filter_row_groups(self, fp):
         d = {'a': list(range(0, 3))}
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -43,7 +43,7 @@ def test_empty(self, method, unit, use_bottleneck):
             result = getattr(s, method)()
             assert result == unit
 
-            # Explict
+            # Explicit
             result = getattr(s, method)(min_count=0)
             assert result == unit
 
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
@@ -19,6 +19,7 @@
 from pandas.core.indexes.timedeltas import Timedelta
 import pandas.core.nanops as nanops
 
+from pandas.errors import PerformanceWarning
 from pandas.compat import range, zip
 from pandas import compat
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
@@ -871,8 +872,9 @@ def test_timedelta64_operations_with_DateOffset(self):
         expected = Series([timedelta(minutes=4, seconds=3)] * 3)
         assert_series_equal(result, expected)
 
-        result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3),
-                              pd.offsets.Hour(2)])
+        with tm.assert_produces_warning(PerformanceWarning):
+            result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3),
+                                  pd.offsets.Hour(2)])
         expected = Series([timedelta(minutes=6, seconds=3), timedelta(
             minutes=5, seconds=6), timedelta(hours=2, minutes=5, seconds=3)])
         assert_series_equal(result, expected)
@@ -1163,7 +1165,7 @@ def test_timedelta_floordiv(self, scalar_td):
                                        ('NCC1701D', 'NCC1701D', 'NCC1701D')])
     def test_td64_series_with_tdi(self, names):
         # GH#17250 make sure result dtype is correct
-        # GH#19043 make sure names are propogated correctly
+        # GH#19043 make sure names are propagated correctly
         tdi = pd.TimedeltaIndex(['0 days', '1 day'], name=names[0])
         ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1])
         expected = Series([Timedelta(hours=3), Timedelta(days=1, hours=4)],
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
@@ -218,7 +218,7 @@ def test_constructor_from_unknown_type(self):
         class Unknown:
             pass
         with pytest.raises(TypeError,
-                           message='SparseDataFrame called with unkown type '
+                           message='SparseDataFrame called with unknown type '
                                    '"Unknown" for data argument'):
             SparseDataFrame(Unknown())
 
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -1611,13 +1611,13 @@ def test_pyint_engine(self):
             index = MultiIndex.from_tuples(keys)
             assert index.get_loc(keys[idx]) == idx
 
-            expected = np.arange(idx + 1, dtype='int64')
+            expected = np.arange(idx + 1, dtype=np.intp)
             result = index.get_indexer([keys[i] for i in expected])
             tm.assert_numpy_array_equal(result, expected)
 
         # With missing key:
         idces = range(len(keys))
-        expected = np.array([-1] + list(idces), dtype='int64')
+        expected = np.array([-1] + list(idces), dtype=np.intp)
         missing = tuple([0, 1] * 5 * N)
         result = index.get_indexer([missing] + [keys[i] for i in idces])
         tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -2401,7 +2401,7 @@ class for all warnings. To check that no warning is returned,
         into errors.
         Valid values are:
 
-        * "error" - turns matching warnings into exeptions
+        * "error" - turns matching warnings into exceptions
         * "ignore" - discard the warning
         * "always" - always emit a warning
         * "default" - print the warning the first time it is generated

Original file line number	Diff line number	Diff line change
`@@ -210,7 +210,7 @@ def _hash_categorical(c, encoding, hash_key):`
`210`	`210`
`211`	`211`	`# we have uint64, as we don't directly support missing values`
`212`	`212`	`# we don't want to use take_nd which will coerce to float`
`213`		`- # instead, directly construt the result with a`
	`213`	`+ # instead, directly construct the result with a`
`214`	`214`	`# max(np.uint64) as the missing value indicator`
`215`	`215`	`#`
`216`	`216`	`# TODO: GH 15362`