pandas-dev · StephenKappel · May 8, 2016 · May 10, 2016 · May 10, 2016 · May 11, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -14,7 +14,7 @@ env:
 
 git:
     # for cloning
-    depth: 300
+    depth: 500
 
 matrix:
     fast_finish: true

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -423,7 +423,7 @@ class frame_get_dtype_counts(object):
     goal_time = 0.2
 
     def setup(self):
-        self.df = pandas.DataFrame(np.random.randn(10, 10000))
+        self.df = DataFrame(np.random.randn(10, 10000))
 
     def time_frame_get_dtype_counts(self):
         self.df.get_dtype_counts()
@@ -985,3 +985,14 @@ def setup(self):
 
     def time_series_string_vector_slice(self):
         self.s.str[:5]
+
+
+class frame_quantile_axis1(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = DataFrame(np.random.randn(1000, 3),
+                            columns=list('ABC'))
+
+    def time_frame_quantile_axis1(self):
+        self.df.quantile([0.1, 0.5], axis=1)
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -773,6 +773,21 @@ def setup(self):
     def time_groupby_transform_series2(self):
         self.df.groupby('id')['val'].transform(np.mean)
 
+
+class groupby_transform_dataframe(object):
+    # GH 12737
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = pd.DataFrame({'group': np.repeat(np.arange(1000), 10),
+                                'B': np.nan,
+                                'C': np.nan})
+        self.df.ix[4::10, 'B':'C'] = 5
+
+    def time_groupby_transform_dataframe(self):
+        self.df.groupby('group').transform('first')
+
+
 class groupby_transform_cythonized(object):
     goal_time = 0.2
 

diff --git a/asv_bench/benchmarks/parser_vb.py b/asv_bench/benchmarks/parser_vb.py
@@ -23,18 +23,42 @@ class read_csv_default_converter(object):
     goal_time = 0.2
 
     def setup(self):
-        self.data = '0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n        0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n        0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n        0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n        0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n        '
+        self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
+0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
+0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
+0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
+0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n"""
         self.data = (self.data * 200)
 
     def time_read_csv_default_converter(self):
         read_csv(StringIO(self.data), sep=',', header=None, float_precision=None)
 
 
+class read_csv_default_converter_with_decimal(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.data = """0,1213700904466425978256438611;0,0525708283766902484401839501;0,4174092731488769913994474336\n
+0,4096341697147408700274695547;0,1587830198973579909349496119;0,1292545832485494372576795285\n
+0,8323255650024565799327547210;0,9694902427379478160318626578;0,6295047811546814475747169126\n
+0,4679375305798131323697930383;0,2963942381834381301075609371;0,5268936082160610157032465394\n
+0,6685382761849776311890991564;0,6721207066140679753374342908;0,6519975277021627935170045020\n"""
+        self.data = (self.data * 200)
+
+    def time_read_csv_default_converter_with_decimal(self):
+        read_csv(StringIO(self.data), sep=';', header=None,
+                 float_precision=None, decimal=',')
+
+
 class read_csv_precise_converter(object):
     goal_time = 0.2
 
     def setup(self):
-        self.data = '0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n        0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n        0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n        0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n        0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n        '
+        self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
+0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
+0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
+0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
+0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n"""
         self.data = (self.data * 200)
 
     def time_read_csv_precise_converter(self):
@@ -45,7 +69,11 @@ class read_csv_roundtrip_converter(object):
     goal_time = 0.2
 
     def setup(self):
-        self.data = '0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n        0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n        0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n        0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n        0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n        '
+        self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
+0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
+0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
+0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
+0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n"""
         self.data = (self.data * 200)
 
     def time_read_csv_roundtrip_converter(self):
@@ -109,4 +137,28 @@ def setup(self):
         self.data = (self.data * 200)
 
     def time_read_table_multiple_date_baseline(self):
-        read_table(StringIO(self.data), sep=',', header=None, parse_dates=[1])
+        read_table(StringIO(self.data), sep=',', header=None, parse_dates=[1])
+
+
+class read_csv_default_converter_python_engine(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.data = '0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n        0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n        0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n        0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n        0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n        '
+        self.data = (self.data * 200)
+
+    def time_read_csv_default_converter(self):
+        read_csv(StringIO(self.data), sep=',', header=None,
+                 float_precision=None, engine='python')
+
+
+class read_csv_default_converter_with_decimal_python_engine(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.data = '0,1213700904466425978256438611;0,0525708283766902484401839501;0,4174092731488769913994474336\n        0,4096341697147408700274695547;0,1587830198973579909349496119;0,1292545832485494372576795285\n        0,8323255650024565799327547210;0,9694902427379478160318626578;0,6295047811546814475747169126\n        0,4679375305798131323697930383;0,2963942381834381301075609371;0,5268936082160610157032465394\n        0,6685382761849776311890991564;0,6721207066140679753374342908;0,6519975277021627935170045020\n        '
+        self.data = (self.data * 200)
+
+    def time_read_csv_default_converter_with_decimal(self):
+        read_csv(StringIO(self.data), sep=';', header=None,
+                 float_precision=None, decimal=',', engine='python')
diff --git a/ci/cron/go_doc.sh b/ci/cron/go_doc.sh
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -15,7 +15,17 @@ if [ "$LINT" ]; then
         if [ $? -ne "0" ]; then
             RET=1
         fi
+
     done
+    echo "Linting DONE"
+
+    echo "Check for invalid testing"
+    grep -r -E --include '*.py' --exclude nosetester.py --exclude testing.py '(numpy|np)\.testing' pandas
+    if [ $? = "0" ]; then
+        RET=1
+    fi
+    echo "Check for invalid testing DONE"
+
 else
     echo "NOT Linting"
 fi

diff --git a/ci/requirements-3.4.run b/ci/requirements-3.4.run
@@ -1,4 +1,4 @@
-pytz
+pytz=2015.7
 numpy=1.8.1
 openpyxl
 xlsxwriter

diff --git a/codecov.yml b/codecov.yml
@@ -7,6 +7,3 @@ coverage:
       default:
         target: '50'
         branches: null
-    changes:
-      default:
-        branches: null
diff --git a/doc/README.rst b/doc/README.rst
@@ -160,7 +160,7 @@ and `Good as first PR
 <https://github.com/pydata/pandas/issues?labels=Good+as+first+PR&sort=updated&state=open>`_
 where you could start out.
 
-Or maybe you have an idea of you own, by using pandas, looking for something
+Or maybe you have an idea of your own, by using pandas, looking for something
 in the documentation and thinking 'this can be improved', let's do something
 about that!
 

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -483,6 +483,17 @@ SQL style merges. See the :ref:`Database style joining <merging.join>`
    right
    pd.merge(left, right, on='key')
 
+Another example that can be given is:
+
+.. ipython:: python
+
+   left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
+   right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
+   left
+   right
+   pd.merge(left, right, on='key')
+
+
 Append
 ~~~~~~
 

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -528,6 +528,13 @@ return a copy of the data rather than a view:
    jim joe
    1   z    0.64094
 
+Furthermore if you try to index something that is not fully lexsorted, this can raise:
+
+.. code-block:: ipython
+
+    In [5]: dfm.loc[(0,'y'):(1, 'z')]
+    KeyError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
+
 The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
 
 .. ipython:: python
@@ -542,6 +549,12 @@ The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and t
    dfm.index.is_lexsorted()
    dfm.index.lexsort_depth
 
+And now selection works as expected.
+
+.. ipython:: python
+
+   dfm.loc[(0,'y'):(1, 'z')]
+
 Take Methods
 ------------
 

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -354,6 +354,9 @@ Computations / Descriptive Stats
    Series.unique
    Series.nunique
    Series.is_unique
+   Series.is_monotonic
+   Series.is_monotonic_increasing
+   Series.is_monotonic_decreasing
    Series.value_counts
 
 Reindexing / Selection / Label manipulation
@@ -1333,6 +1336,7 @@ Modifying and Computations
    Index.max
    Index.reindex
    Index.repeat
+   Index.where
    Index.take
    Index.putmask
    Index.set_names

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -1726,6 +1726,28 @@ then the more *general* one will be used as the result of the operation.
    # conversion of dtypes
    df3.astype('float32').dtypes
 
+Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`
+
+.. ipython:: python
+
+   dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]})
+   dft[['a','b']] = dft[['a','b']].astype(np.uint8)
+   dft
+   dft.dtypes
+
+.. note::
+
+    When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype`  and :meth:`~DataFrame.loc`, upcasting occurs.
+
+    :meth:`~DataFrame.loc` tries to fit in what we are assigning to the current dtypes, while ``[]`` will overwrite them taking the dtype from the right hand side. Therefore the following piece of code produces the unintended result.
+
+    .. ipython:: python
+
+       dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]})
+       dft.loc[:, ['a', 'b']].astype(np.uint8).dtypes
+       dft.loc[:, ['a', 'b']] = dft.loc[:, ['a', 'b']].astype(np.uint8)
+       dft.dtypes
+
 object conversion
 ~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -21,7 +21,7 @@ and `Difficulty Novice
 <https://github.com/pydata/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22>`_
 where you could start out.
 
-Or maybe through using *pandas* you have an idea of you own or are looking for something
+Or maybe through using *pandas* you have an idea of your own or are looking for something
 in the documentation and thinking 'this can be improved'...you can do something
 about it!
 

diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst
@@ -95,7 +95,7 @@ Plain cython
 ~~~~~~~~~~~~
 
 First we're going to need to import the cython magic function to ipython (for
-cython versions >=0.21 you can use ``%load_ext Cython``):
+cython versions  < 0.21 you can use ``%load_ext cythonmagic``):
 
 .. ipython:: python
    :okwarning: