DOC: added sorting examples to 10min

jreback · jreback · commit 41793eaf9553 · 2013-03-06T21:01:48.000-05:00
BUG: fixed multi-index selection via loc, back to using some
     of ix code (but still do validation if not mi)

ENH: add xs to Series for compatiblity, create _xs functions in all objects

DOC: added several sub-sections to 10min
     fixed some references in basics.rst
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -59,6 +59,7 @@ pandas 0.11.0
   - Add ``format`` option to ``pandas.to_datetime`` with faster conversion of
     strings that can be parsed with datetime.strptime
   - Add ``axes`` property to ``Series`` for compatibility 
+  - Add ``xs`` function to ``Series`` for compatibility 
 
 **API Changes**
 
@@ -135,7 +136,6 @@ pandas 0.11.0
   - Bug on in-place putmasking on an ``integer`` series that needs to be converted to ``float`` (GH2746_)
   - Bug in argsort of ``datetime64[ns]`` Series with ``NaT`` (GH2967_)
   - Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)
-  - ``icol`` with negative indicies was return ``nan`` (see GH2922_)
   - Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_)
 
 .. _GH622: https://github.com/pydata/pandas/issues/622
diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -67,7 +67,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
                         'E' : 'foo' })
    df2
 
-Having specific dtypes
+Having specific :ref:`dtypes <basics.dtypes>`
 
 .. ipython:: python
 
@@ -83,7 +83,7 @@ See the top & bottom rows of the frame
 .. ipython:: python
 
    df.head()
-   df.tail()
+   df.tail(3)
 
 Display the index,columns, and the underlying numpy data
 
@@ -99,6 +99,24 @@ Describe shows a quick statistic summary of your data
 
    df.describe()
 
+Transposing your data
+
+.. ipython:: python
+
+   df.T
+
+Sorting by an axis
+
+.. ipython:: python
+
+   df.sort_index(axis=1, ascending=False)
+
+Sorting by values
+
+.. ipython:: python
+
+   df.sort(columns='B')
+
 Selection
 ---------
 
@@ -112,6 +130,7 @@ Selecting a single column, which yields a ``Series``
 
 .. ipython:: python
 
+   # equivalently ``df.A``
    df['A']
 
 Selecting via ``[]``, which slices the rows.
@@ -167,7 +186,6 @@ Select via the position of the passed integers
 
 .. ipython:: python
 
-   # this is a cross-section of the object
    df.iloc[3]
 
 By integer slices, acting similar to numpy/python
@@ -220,7 +238,7 @@ Pandas will detect this and raise ``IndexError``, rather than return an empty st
 
 ::
 
-    >>> df.iloc[:,3:6]
+    >>> df.iloc[:,8:10]
     IndexError: out-of-bounds on slice (end)
 
 Boolean Indexing
@@ -232,7 +250,7 @@ Using a single column's values to select data.
 
    df[df.A > 0]
 
-A ``where`` operation.
+A ``where`` operation for getting.
 
 .. ipython:: python
 
@@ -270,6 +288,14 @@ Setting by assigning with a numpy array
    df.loc[:,'D'] = np.array([5] * len(df))
    df
 
+A ``where`` operation with setting.
+
+.. ipython:: python
+
+   df2 = df.copy()
+   df2[df2 > 0] = -df2
+   df2
+
 Missing Data
 ------------
 
@@ -297,6 +323,12 @@ Filling missing data
 
    df1.fillna(value=5)
 
+To get the boolean mask where values are ``nan``
+
+.. ipython:: python
+
+   pd.isnull(df1)
+
 
 Operations
 ----------
@@ -306,6 +338,8 @@ See the :ref:`Basic section on Binary Ops <basics.binop>`
 Stats
 ~~~~~
 
+Operations in general *exclude* missing data.
+
 Performing a descriptive statistic
 
 .. ipython:: python
@@ -318,11 +352,15 @@ Same operation on the other axis
 
    df.mean(1)
 
-Operations on missing data, exclude the data
+Operating with objects that have different dimensionality and need alignment.
+In addition, pandas automatically broadcasts along the specified dimension.
 
 .. ipython:: python
 
-  df1.mean()
+   s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)
+   s
+   df.sub(s,axis='index')
+
 
 Apply
 ~~~~~
@@ -334,6 +372,27 @@ Applying functions to the data
    df.apply(np.cumsum)
    df.apply(lambda x: x.max() - x.min())
 
+Histogramming
+~~~~~~~~~~~~~
+
+See more at :ref:`Histogramming and Discretization <basics.discretization>`
+
+.. ipython:: python
+
+   s = Series(np.random.randint(0,7,size=10))
+   s
+   s.value_counts()
+
+String Methods
+~~~~~~~~~~~~~~
+
+See more at :ref:`Vectorized String Methods <basics.string_methods>`
+
+.. ipython:: python
+
+   s = Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
+   s.str.lower()
+
 Merge
 -----
 
@@ -425,6 +484,9 @@ Reshaping
 See the section on :ref:`Hierarchical Indexing <indexing.hierarchical>` and
 see the section on :ref:`Reshaping <reshaping.stacking>`).
 
+Stack
+~~~~~
+
 .. ipython:: python
 
    tuples = zip(*[['bar', 'bar', 'baz', 'baz',
@@ -453,6 +515,26 @@ unstacks the **last level**:
    stacked.unstack(1)
    stacked.unstack(0)
 
+Pivot Tables
+~~~~~~~~~~~~
+See the section on :ref:`Pivot Tables <reshaping.pivot>`).
+
+.. ipython:: python
+
+   df = DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
+                   'B' : ['A', 'B', 'C'] * 4,
+                   'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
+                   'D' : np.random.randn(12),
+                   'E' : np.random.randn(12)})
+   df
+
+We can produce pivot tables from this data very easily:
+
+.. ipython:: python
+
+   pivot_table(df, values='D', rows=['A', 'B'], cols=['C'])
+
+
 Time Series
 -----------
 
@@ -581,3 +663,25 @@ Reading from a HDF5 Store
    store.close()
    os.remove('foo.h5')
 
+Excel
+~~~~~
+
+Reading and writing to :ref:`MS Excel <io.excel>`
+
+Writing to an excel file
+
+.. ipython:: python
+
+   df.to_excel('foo.xlsx', sheet_name='sheet1')
+
+Reading from an excel file
+
+.. ipython:: python
+
+   xls = ExcelFile('foo.xlsx')
+   xls.parse('sheet1', index_col=None, na_values=['NA'])
+
+.. ipython:: python
+   :suppress:
+
+   os.remove('foo.xlsx')
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -9,9 +9,9 @@
    randn = np.random.randn
    np.set_printoptions(precision=4, suppress=True)
 
-*****************************
-Essential Basic Functionality
-*****************************
+==============================
+ Essential Basic Functionality
+==============================
 
 Here we discuss a lot of the essential functionality common to the pandas data
 structures. Here's how to create some of the objects used in the examples from
@@ -374,6 +374,8 @@ value, ``idxmin`` and ``idxmax`` return the first matching index:
    df3
    df3['A'].idxmin()
 
+.. _basics.discretization:
+
 Value counts (histogramming)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -976,11 +978,11 @@ To be clear, no pandas methods have the side effect of modifying your data;
 almost all methods return new objects, leaving the original object
 untouched. If data is modified, it is because you did so explicitly.
 
+.. _basics.dtypes:
+
 dtypes
 ------
 
-.. _basics.dtypes:
-
 The main types stored in pandas objects are ``float``, ``int``, ``bool``, ``datetime64[ns]``, ``timedelta[ns]``,
 and ``object``. In addition these dtypes have item sizes, e.g. ``int64`` and ``int32``. A convenient ``dtypes`` 
 attribute for DataFrames returns a Series with the data type of each column.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -906,6 +906,8 @@ And then import the data directly to a DataFrame by calling:
    clipdf
 
 
+.. _io.excel:
+
 Excel files
 -----------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2343,6 +2343,8 @@ def xs(self, key, axis=0, level=None, copy=True):
             result.index = new_index
             return result
 
+    _xs = xs
+
     def lookup(self, row_labels, col_labels):
         """
         Label-based "fancy indexing" function for DataFrame. Given equal-length
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -55,9 +55,9 @@ def _get_label(self, label, axis=0):
             raise IndexingError('no slices here')
 
         try:
-            return self.obj.xs(label, axis=axis, copy=False)
+            return self.obj._xs(label, axis=axis, copy=False)
         except Exception:
-            return self.obj.xs(label, axis=axis, copy=True)
+            return self.obj._xs(label, axis=axis, copy=True)
 
     def _get_loc(self, key, axis=0):
         return self.obj._ixs(key, axis=axis)
@@ -86,6 +86,9 @@ def __setitem__(self, key, value):
 
         self._setitem_with_indexer(indexer, value)
 
+    def _has_valid_tuple(self, key):
+        pass
+
     def _convert_tuple(self, key):
         keyidx = []
         for i, k in enumerate(key):
@@ -224,6 +227,9 @@ def _getitem_tuple(self, tup):
         if self._multi_take_opportunity(tup):
             return self._multi_take(tup)
 
+        # no multi-index, so validate all of the indexers
+        self._has_valid_tuple(tup)
+
         # no shortcut needed
         retval = self.obj
         for i, key in enumerate(tup):
@@ -616,15 +622,16 @@ class _LocationIndexer(_NDFrameIndexer):
     def _has_valid_type(self, k, axis):
         raise NotImplementedError()
 
+    def _has_valid_tuple(self, key):
+        """ check the key for valid keys across my indexer """
+        for i, k in enumerate(key):
+            if i >= self.obj.ndim:
+                raise ValueError('Too many indexers')
+            if not self._has_valid_type(k,i):
+                raise ValueError("Location based indexing can only have [%s] types" % self._valid_types)
+
     def __getitem__(self, key):
         if type(key) is tuple:
-
-            for i, k in enumerate(key):
-                if i >= self.obj.ndim:
-                    raise ValueError('Too many indexers')
-                if not self._has_valid_type(k,i):
-                    raise ValueError("Location based indexing can only have [%s] types" % self._valid_types)
-
             return self._getitem_tuple(key)
         else:
             return self._getitem_axis(key, axis=0)
@@ -707,11 +714,7 @@ def _getitem_axis(self, key, axis=0):
 
             return self._getitem_iterable(key, axis=axis)
         else:
-            indexer = labels.get_loc(key)
-            return self._get_loc(indexer, axis=axis)
-
-    def _get_loc(self, key, axis=0):
-        return self.obj._ixs(key, axis=axis)
+            return self._get_label(key, axis=axis)
 
 class _iLocIndexer(_LocationIndexer):
     """ purely integer based location based indexing """
@@ -723,6 +726,7 @@ def _has_valid_type(self, key, axis):
 
     def _getitem_tuple(self, tup):
 
+        self._has_valid_tuple(tup)
         retval = self.obj
         for i, key in enumerate(tup):
             if _is_null_slice(key):
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -1065,6 +1065,8 @@ def xs(self, key, axis=1, copy=True):
         new_data = self._data.xs(key, axis=axis_number, copy=copy)
         return self._constructor_sliced(new_data)
 
+    _xs = xs
+
     def _ixs(self, i, axis=0):
         # for compatibility with .ix indexing
         # Won't work with hierarchical indexing yet
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -559,6 +559,9 @@ def ix(self):
 
         return self._ix
 
+    def _xs(self, key, axis=0, level=None, copy=True):
+        return self.__getitem__(key)
+
     def _ixs(self, i, axis=0):
         """
         Return the i-th value or values in the Series by location
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py