changhiskhan
diff --git a/‎RELEASE.rst
+23-1 b/‎RELEASE.rst
+23-1
diff --git a/‎doc/source/basics.rst
+73-5 b/‎doc/source/basics.rst
+73-5
diff --git a/‎doc/source/dsintro.rst
+7 b/‎doc/source/dsintro.rst
+7
diff --git a/‎doc/source/indexing.rst
+26 b/‎doc/source/indexing.rst
+26
diff --git a/‎doc/source/v0.8.1.txt
+30-11 b/‎doc/source/v0.8.1.txt
+30-11
diff --git a/‎doc/source/visualization.rst
+9-3 b/‎doc/source/visualization.rst
+9-3
diff --git a/‎doc/source/whatsnew.rst
+2 b/‎doc/source/whatsnew.rst
+2
diff --git a/‎pandas/core/format.py
+16-3 b/‎pandas/core/format.py
+16-3
@@ -22,15 +22,25 @@ Where to get it
 * Binary installers on PyPI: http://pypi.python.org/pypi/pandas
 * Documentation: http://pandas.pydata.org
 
-pandas 0.8.1
+pandas 0.8.2
 ============
 
 **Release date:** NOT YET RELEASED
 
+**Improvements to existing features**
+
+  - Add ``flags`` option for ``re.compile`` in some Series.str methods (#1659)
+
+pandas 0.8.1
+============
+
+**Release date:** July 22, 2012
+
 **New features**
 
   - Add vectorized, NA-friendly string methods to Series (#1621, #620)
   - Can pass dict of per-column line styles to DataFrame.plot (#1559)
+  - Selective plotting to secondary y-axis on same subplot (PR #1640)
   - Add new ``bootstrap_plot`` plot function
   - Add new ``parallel_coordinates`` plot function (#1488)
   - Add ``radviz`` plot function (#1566)
@@ -45,6 +55,8 @@ pandas 0.8.1
   - Add Cython group median method for >15x speedup (#1358)
   - Drastically improve ``to_datetime`` performance on ISO8601 datetime strings
     (with no time zones) (#1571)
+  - Improve single-key groupby performance on large data sets, accelerate use of
+    groupby with a Categorical variable
   - Add ability to append hierarchical index levels with ``set_index`` and to
     drop single levels with ``reset_index`` (#1569, #1577)
   - Always apply passed functions in ``resample``, even if upsampling (#1596)
@@ -56,6 +68,8 @@ pandas 0.8.1
   - Accelerate 3-axis multi data selection from homogeneous Panel (#979)
   - Add ``adjust`` option to ewma to disable adjustment factor (#1584)
   - Add new matplotlib converters for high frequency time series plotting (#1599)
+  - Handling of tz-aware datetime.datetime objects in to_datetime; raise
+    Exception unless utc=True given (#1581)
 
 **Bug fixes**
 
@@ -96,6 +110,14 @@ pandas 0.8.1
   - Fix use of string alias timestamps with tz-aware time series (#1647)
   - Fix Series.max/min and Series.describe on len-0 series (#1650)
   - Handle None values in dict passed to concat (#1649)
+  - Fix Series.interpolate with method='values' and DatetimeIndex (#1646)
+  - Fix IndexError in left merges on a DataFrame with 0-length (#1628)
+  - Fix DataFrame column width display with UTF-8 encoded characters (#1620)
+  - Handle case in pandas.io.data.get_data_yahoo where Yahoo! returns duplicate
+    dates for most recent business day
+  - Avoid downsampling when plotting mixed frequencies on the same subplot (#1619)
+  - Fix read_csv bug when reading a single line (#1553)
+  - Fix bug in C code causing monthly periods prior to December 1969 to be off (#1570)
 
 pandas 0.8.0
 ============
 
@@ -141,7 +141,7 @@ an axis and broadcasting over the same axis:
    major_mean
    wp.sub(major_mean, axis='major')
 
-And similarly for axis="items" and axis="minor".
+And similarly for ``axis="items"`` and ``axis="minor"``.
 
 .. note::
 
@@ -369,14 +369,14 @@ index labels with the minimum and maximum corresponding values:
    df1.idxmin(axis=0)
    df1.idxmax(axis=1)
 
-When there are multiple rows (or columns) matching the minimum or maximum 
+When there are multiple rows (or columns) matching the minimum or maximum
 value, ``idxmin`` and ``idxmax`` return the first matching index:
 
 .. ipython:: python
 
-   df = DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))
-   df
-   df['A'].idxmin()
+   df3 = DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba'))
+   df3
+   df3['A'].idxmin()
 
 Value counts (histogramming)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -835,6 +835,74 @@ For instance,
 
    for r in df2.itertuples(): print r
 
+.. _basics.string_methods:
+
+Vectorized string methods
+-------------------------
+
+Series is equipped (as of pandas 0.8.1) with a set of string processing methods
+that make it easy to operate on each element of the array. Perhaps most
+importantly, these methods exclude missing/NA values automatically. These are
+accessed via the Series's ``str`` attribute and generally have names matching
+the equivalent (scalar) build-in string methods:
+
+.. ipython:: python
+
+   s = Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
+   s.str.lower()
+   s.str.upper()
+   s.str.len()
+
+Methods like ``split`` return a Series of lists:
+
+.. ipython:: python
+
+   s2 = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
+   s2.str.split('_')
+
+Elements in the split lists can be accessed using ``get`` or ``[]`` notation:
+
+.. ipython:: python
+
+   s2.str.split('_').str.get(1)
+   s2.str.split('_').str[1]
+
+Methods like ``replace`` and ``findall`` take regular expressions, too:
+
+.. ipython:: python
+
+   s3 = Series(['A', 'B', 'C', 'Aaba', 'Baca',
+               '', np.nan, 'CABA', 'dog', 'cat'])
+   s3
+   s3.str.replace('^.a|dog', 'XX-XX ', case=False)
+
+.. csv-table::
+    :header: "Method", "Description"
+    :widths: 20, 80
+
+    ``cat``,Concatenate strings
+    ``split``,Split strings on delimiter
+    ``get``,Index into each element (retrieve i-th element)
+    ``join``,Join strings in each element of the Series with passed separator
+    ``contains``,Return boolean array if each string contains pattern/regex
+    ``replace``,Replace occurrences of pattern/regex with some other string
+    ``repeat``,Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
+    ``pad``,"Add whitespace to left, right, or both sides of strings"
+    ``center``,Equivalent to ``pad(side='both')``
+    ``slice``,Slice each string in the Series
+    ``slice_replace``,Replace slice in each string with passed value
+    ``count``,Count occurrences of pattern
+    ``startswith``,Equivalent to ``str.startswith(pat)`` for each element
+    ``endswidth``,Equivalent to ``str.endswith(pat)`` for each element
+    ``findall``,Compute list of all occurrences of pattern/regex for each string
+    ``match``,"Call ``re.match`` on each element, returning matched groups as list"
+    ``len``,Compute string lengths
+    ``strip``,Equivalent to ``str.strip``
+    ``rstrip``,Equivalent to ``str.rstrip``
+    ``lstrip``,Equivalent to ``str.lstrip``
+    ``lower``,Equivalent to ``str.lower``
+    ``upper``,Equivalent to ``str.upper``
+
 .. _basics.sorting:
 
 Sorting by index and value
 
@@ -32,6 +32,13 @@ between labels and data will not be broken unless done so explicitly by you.
 We'll give a brief intro to the data structures, then consider all of the broad
 categories of functionality and methods in separate sections.
 
+When using pandas, we recommend the following import convention:
+
+.. code-block:: python
+
+   import pandas as pd
+
+
 .. _basics.series:
 
 Series
 
@@ -9,6 +9,7 @@
    import random
    np.random.seed(123456)
    from pandas import *
+   import pandas as pd
    randn = np.random.randn
    randint = np.random.randint
    np.set_printoptions(precision=4, suppress=True)
@@ -665,6 +666,14 @@ can find yourself working with hierarchically-indexed data without creating a
 ``MultiIndex`` explicitly yourself. However, when loading data from a file, you
 may wish to generate your own ``MultiIndex`` when preparing the data set.
 
+Note that how the index is displayed by be controlled using the
+``multi_sparse`` option in ``pandas.set_printoptions``:
+
+.. ipython:: python
+
+   pd.set_printoptions(multi_sparse=False)
+   df
+   pd.set_printoptions(multi_sparse=True)
 
 Reconstructing the level labels
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -935,6 +944,15 @@ indexed DataFrame:
    indexed2 = data.set_index(['a', 'b'])
    indexed2
 
+The ``append`` keyword option allow you to keep the existing index and append the given
+columns to a MultiIndex:
+
+.. ipython:: python
+
+   frame = data.set_index('c', drop=False)
+   frame = frame.set_index(['a', 'b'], append=True)
+   frame
+
 Other options in ``set_index`` allow you not drop the index columns or to add
 the index in-place (without creating a new object):
 
@@ -959,6 +977,14 @@ integer index. This is the inverse operation to ``set_index``
 The output is more similar to a SQL table or a record array. The names for the
 columns derived from the index are the ones stored in the ``names`` attribute.
 
+You can use the ``level`` keyword to remove only a portion of the index:
+
+.. ipython:: python
+
+   frame
+   frame.reset_index(level=1)
+
+
 ``reset_index`` takes an optional parameter ``drop`` which if true simply
 discards the index, instead of putting index values in the DataFrame's columns.
 
 
@@ -1,27 +1,46 @@
 .. _whatsnew_0801:
 
-v.0.8.1 (July 23, 2012)
----------------------------
+v0.8.1 (July 22, 2012)
+----------------------
 
-This release includes a few new features and addresses over a dozen bugs in
-0.8.0, most notably NA friendly string processing functionality and a series of
-new plot types and options.
+This release includes a few new features, performance enhancements, and over 30
+bug fixes from 0.8.0.  New features include notably NA friendly string
+processing functionality and a series of new plot types and options.
 
 New features
 ~~~~~~~~~~~~
 
-  - Add string processing methods accesible via Series.str (GH620_)
+  - Add :ref:`vectorized string processing methods <basics.string_methods>`
+    accessible via Series.str (GH620_)
   - Add option to disable adjustment in EWMA (GH1584_)
-  - Radviz plot (GH1566_)
+  - :ref:`Radviz plot <visualization.radviz>` (GH1566_)
+  - :ref:`Parallel coordinates plot <visualization.parallel_coordinates>`
+  - :ref:`Bootstrap plot <visualization.bootstrap>`
   - Per column styles and secondary y-axis plotting (GH1559_)
   - New datetime converters millisecond plotting  (GH1599_)
+  - Add option to disable "sparse" display of hierarchical indexes (GH1538_)
+  - Series/DataFrame's ``set_index`` method can :ref:`append levels
+    <indexing.set_index>` to an existing Index/MultiIndex (GH1569_, GH1577_)
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-  - Improved implementation of rolling min and max
-  - Set logic performance for primitives
+  - Improved implementation of rolling min and max (thanks to `Bottleneck
+    <http://berkeleyanalytics.com/bottleneck/>`__ !)
+  - Add accelerated ``'median'`` GroupBy option (GH1358_)
+  - Significantly improve the performance of parsing ISO8601-format date
+    strings with ``DatetimeIndex`` or ``to_datetime`` (GH1571_)
+  - Improve the performance of GroupBy on single-key aggregations and use with
+    Categorical types
   - Significant datetime parsing performance improvments
 
-.. _GH561: https://github.com/pydata/pandas/issues/561
-.. _GH50: https://github.com/pydata/pandas/issues/50
+.. _GH620: https://github.com/pydata/pandas/issues/620
+.. _GH1358: https://github.com/pydata/pandas/issues/1358
+.. _GH1538: https://github.com/pydata/pandas/issues/1538
+.. _GH1559: https://github.com/pydata/pandas/issues/1559
+.. _GH1584: https://github.com/pydata/pandas/issues/1584
+.. _GH1566: https://github.com/pydata/pandas/issues/1566
+.. _GH1569: https://github.com/pydata/pandas/issues/1569
+.. _GH1571: https://github.com/pydata/pandas/issues/1571
+.. _GH1577: https://github.com/pydata/pandas/issues/1577
+.. _GH1599: https://github.com/pydata/pandas/issues/1599
@@ -98,11 +98,11 @@ You can plot one column versus another using the `x` and `y` keywords in
 
    plt.figure()
 
-   df = DataFrame(np.random.randn(1000, 2), columns=['B', 'C']).cumsum()
-   df['A'] = Series(range(len(df)))
+   df3 = DataFrame(np.random.randn(1000, 2), columns=['B', 'C']).cumsum()
+   df3['A'] = Series(range(len(df)))
 
    @savefig df_plot_xy.png width=4.5in
-   df.plot(x='A', y='B')
+   df3.plot(x='A', y='B')
 
 
 Plotting on a Secondary Y-axis
@@ -339,6 +339,8 @@ of the same class will usually be closer together and form larger structures.
    @savefig andrews_curves.png width=6in
    andrews_curves(data, 'Name')
 
+.. _visualization.parallel_coordinates:
+
 Parallel Coordinates
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -402,6 +404,8 @@ confidence band.
    @savefig autocorrelation_plot.png width=6in
    autocorrelation_plot(data)
 
+.. _visualization.bootstrap:
+
 Bootstrap Plot
 ~~~~~~~~~~~~~~
 
@@ -420,6 +424,8 @@ are what constitutes the bootstrap plot.
    @savefig bootstrap_plot.png width=8in
    bootstrap_plot(data, size=50, samples=500, color='grey')
 
+.. _visualization.radviz:
+
 RadViz
 ~~~~~~
 
 
@@ -16,6 +16,8 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: v0.8.1.txt
+
 .. include:: v0.8.0.txt
 
 .. include:: v0.7.3.txt
 
@@ -133,10 +133,18 @@ def to_string(self):
 
 if py3compat.PY3:  # pragma: no cover
     _encode_diff = lambda x: 0
+
+    _strlen = len
 else:
     def _encode_diff(x):
         return len(x) - len(x.decode('utf-8'))
 
+    def _strlen(x):
+        try:
+            return len(x.decode('utf-8'))
+        except UnicodeError:
+            return len(x)
+
 class DataFrameFormatter(object):
     """
     Render a DataFrame
@@ -205,7 +213,7 @@ def to_string(self, force_unicode=False):
                 if self.header:
                     fmt_values = self._format_col(i)
                     cheader = str_columns[i]
-                    max_len = max(max(len(x) for x in fmt_values),
+                    max_len = max(max(_strlen(x) for x in fmt_values),
                                   max(len(x) for x in cheader))
                     if self.justify == 'left':
                         cheader = [x.ljust(max_len) for x in cheader]
@@ -624,7 +632,7 @@ def _make_fixed_width(strings, justify='right'):
     if len(strings) == 0:
         return strings
 
-    max_len = max(len(x) for x in strings)
+    max_len = max(_strlen(x) for x in strings)
     conf_max = print_config.max_colwidth
     if conf_max is not None and max_len > conf_max:
         max_len = conf_max
@@ -635,7 +643,12 @@ def _make_fixed_width(strings, justify='right'):
         justfunc = lambda self, x: self.rjust(x)
 
     def just(x):
-        return justfunc(x[:max_len], max_len)
+        try:
+            eff_len = max_len + _encode_diff(x)
+        except UnicodeError:
+            eff_len = max_len
+
+        return justfunc(x[:eff_len], eff_len)
 
     return [just(x) for x in strings]