Merge tag 'v0.9.1' into debian

yarikoptic · yarikoptic · commit 6a14733f3f78 · 2012-11-14T22:42:09.000-05:00
Version 0.9.1 * tag 'v0.9.1': RLS: Version 0.9.1 final BUG: icol() should propegate fill_value for sparse data frames pandas-dev#2249 TST: icol() should propegate fill_value for sparse data frames BUG: override SparseDataFrame.icol to use __getitem__ instead of accessing _data internals. close pandas-dev#2251 BUG: make Series.tz_localize work with length-0 non-DatetimeIndex. close pandas-dev#2248 BUG: parallel_coordinates bugfix with matplotlib 1.2.0. close pandas-dev#2237 BUG: issue constructing DataFrame from empty Series with name. close pandas-dev#2234 ENH: disable repr dependence on terminal width when running non-interactively. pandas-dev#1610 BUG: ExcelWriter raises exception on PeriodIndex pandas-dev#2240 BUG: SparseDataFrame.icol return SparseSeries. SparseSeries.from_array return SparseSeries. close pandas-dev#2227, pandas-dev#2229 BUG: fix tz-aware resampling issue. close pandas-dev#2245
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -25,7 +25,7 @@ Where to get it
 pandas 0.9.1
 ============
 
-**Release date:** NOT YET RELEASED
+**Release date:** 2012-11-14
 
 **New features**
 
@@ -103,6 +103,14 @@ pandas 0.9.1
   - Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236)
   - Fix duplicate columns issue (#2218, #2219)
   - Fix SparseSeries.__pow__ issue with NA input (#2220)
+  - Fix icol with integer sequence failure (#2228)
+  - Fixed resampling tz-aware time series issue (#2245)
+  - SparseDataFrame.icol was not returning SparseSeries (#2227, #2229)
+  - Enable ExcelWriter to handle PeriodIndex (#2240)
+  - Fix issue constructing DataFrame from empty Series with name (#2234)
+  - Use console-width detection in interactive sessions only (#1610)
+  - Fix parallel_coordinates legend bug with mpl 1.2.0 (#2237)
+  - Make tz_localize work in corner case of empty Series (#2248)
 
 pandas 0.9.0
 ============
diff --git a/doc/source/v0.9.1.txt b/doc/source/v0.9.1.txt
@@ -1,7 +1,7 @@
 .. _whatsnew_0901:
 
-v0.9.1 (November 8, 2012)
--------------------------
+v0.9.1 (November 14, 2012)
+--------------------------
 
 This is a bugfix release from 0.9.0 and includes several new features and
 enhancements along with a large number of bug fixes. The new features include
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1073,6 +1073,14 @@ def _concat_compat(to_concat, axis=0):
     else:
         return np.concatenate(to_concat, axis=axis)
 
+def in_interactive_session():
+    """ check if we're running in an interactive shell
+
+    returns True if running under python/ipython interactive shell
+    """
+    import __main__ as main
+    return not hasattr(main, '__file__')
+
 # Unicode consolidation
 # ---------------------
 #
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -321,6 +321,7 @@ class DataFrame(NDFrame):
     _auto_consolidate = True
     _verbose_info = True
     _het_axis = 1
+    _col_klass = Series
 
     _AXIS_NUMBERS = {
         'index': 0,
@@ -581,13 +582,15 @@ def _need_info_repr_(self):
         else:
             # save us
             if (len(self.index) > max_rows or
-                len(self.columns) > terminal_width // 2):
+                (com.in_interactive_session() and
+                len(self.columns) > terminal_width // 2)):
                 return True
             else:
                 buf = StringIO()
                 self.to_string(buf=buf)
                 value = buf.getvalue()
-                if max([len(l) for l in value.split('\n')]) > terminal_width:
+                if (max([len(l) for l in value.split('\n')]) > terminal_width and
+                    com.in_interactive_session()):
                     return True
                 else:
                     return False
@@ -1179,8 +1182,12 @@ def _helper_csvexcel(self, writer, na_rep=None, cols=None,
                 encoded_cols = list(cols)
                 writer.writerow(encoded_cols)
 
-        nlevels = getattr(self.index, 'nlevels', 1)
-        for j, idx in enumerate(self.index):
+        data_index = self.index
+        if isinstance(self.index, PeriodIndex):
+            data_index = self.index.to_timestamp()
+
+        nlevels = getattr(data_index, 'nlevels', 1)
+        for j, idx in enumerate(data_index):
             row_fields = []
             if index:
                 if nlevels == 1:
@@ -1733,7 +1740,15 @@ def icol(self, i):
                     return self.ix[:, i]
 
             values = self._data.iget(i)
-            return Series.from_array(values, index=self.index, name=label)
+            if hasattr(self,'default_fill_value'):
+                s = self._col_klass.from_array(values, index=self.index,
+                                               name=label,
+                                               fill_value= self.default_fill_value)
+            else:
+                s = self._col_klass.from_array(values, index=self.index,
+                                               name=label)
+
+            return s
 
     def _ixs(self, i, axis=0):
         if axis == 0:
@@ -5083,6 +5098,9 @@ def extract_index(data):
 
 def _prep_ndarray(values, copy=True):
     if not isinstance(values, np.ndarray):
+        if len(values) == 0:
+            return np.empty((0, 0), dtype=object)
+
         arr = np.asarray(values)
         # NumPy strings are a pain, convert to object
         if issubclass(arr.dtype.type, basestring):
@@ -5095,11 +5113,7 @@ def _prep_ndarray(values, copy=True):
             values = values.copy()
 
     if values.ndim == 1:
-        N = values.shape[0]
-        if N == 0:
-            values = values.reshape((values.shape[0], 0))
-        else:
-            values = values.reshape((values.shape[0], 1))
+        values = values.reshape((values.shape[0], 1))
     elif values.ndim != 2:
         raise Exception('Must pass 2-d input')
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2756,7 +2756,15 @@ def tz_localize(self, tz, copy=True):
         -------
         localized : TimeSeries
         """
-        new_index = self.index.tz_localize(tz)
+        from pandas.tseries.index import DatetimeIndex
+
+        if not isinstance(self.index, DatetimeIndex):
+            if len(self.index) > 0:
+                raise Exception('Cannot tz-localize non-time series')
+
+            new_index = DatetimeIndex([], tz=tz)
+        else:
+            new_index = self.index.tz_localize(tz)
 
         new_values = self.values
         if copy:
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -64,6 +64,7 @@ class SparseDataFrame(DataFrame):
     _columns = None
     _series = None
     _is_mixed_type = False
+    _col_klass = SparseSeries
     ndim = 2
 
     def __init__(self, data=None, index=None, columns=None,
@@ -342,6 +343,37 @@ def __getitem__(self, key):
             else:  # pragma: no cover
                 raise
 
+    def icol(self, i):
+        """
+        Retrieve the i-th column or columns of the DataFrame by location
+
+        Parameters
+        ----------
+        i : int, slice, or sequence of integers
+
+        Notes
+        -----
+        If slice passed, the resulting data will be a view
+
+        Returns
+        -------
+        column : Series (int) or DataFrame (slice, sequence)
+        """
+        if isinstance(i, slice):
+            # need to return view
+            lab_slice = slice(label[0], label[-1])
+            return self.ix[:, lab_slice]
+        else:
+            label = self.columns[i]
+            if isinstance(label, Index):
+                if self.columns.inferred_type == 'integer':
+                    # XXX re: #2228
+                    return self.reindex(columns=label)
+                else:
+                    return self.ix[:, i]
+
+            return self[label]
+
     @Appender(DataFrame.get_value.__doc__, indents=0)
     def get_value(self, index, col):
         s = self._series[col]
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
@@ -146,6 +146,13 @@ def __new__(cls, data, index=None, sparse_index=None, kind='block',
         output.name = name
         return output
 
+    @classmethod
+    def from_array(cls, arr, index=None, name=None, copy=False,fill_value=None):
+        """
+        Simplified alternate constructor
+        """
+        return SparseSeries(arr, index=index, name=name, copy=copy,fill_value=fill_value)
+
     def __init__(self, data, index=None, sparse_index=None, kind='block',
                  fill_value=None, name=None, copy=False):
         """Data structure for labeled, sparse floating point data
diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
@@ -901,6 +901,18 @@ def test_getitem(self):
 
         self.assertRaises(Exception, sdf.__getitem__, ['a', 'd'])
 
+    def test_icol(self):
+        # #2227
+        result = self.frame.icol(0)
+        self.assertTrue(isinstance(result, SparseSeries))
+        assert_sp_series_equal(result, self.frame['A'])
+
+        # preserve sparse index type. #2251
+        data = {'A' : [0,1 ]}
+        iframe = SparseDataFrame(data, default_kind='integer')
+        self.assertEquals(type(iframe['A'].sp_index),
+                          type(iframe.icol(0).sp_index))
+
     def test_set_value(self):
         res = self.frame.set_value('foobar', 'B', 1.5)
         self.assert_(res is not self.frame)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1327,6 +1327,11 @@ def test_irow_icol_duplicates(self):
         xp = df.ix[:, [0]]
         assert_frame_equal(rs, xp)
 
+    def test_icol_sparse_propegate_fill_value(self):
+        from pandas.sparse.api import SparseDataFrame
+        df=SparseDataFrame({'A' : [999,1]},default_fill_value=999)
+        self.assertTrue( len(df['A'].sp_values) == len(df.icol(0).sp_values))
+
     def test_iget_value(self):
         for i, row in enumerate(self.frame.index):
             for j, col in enumerate(self.frame.columns):
@@ -2269,11 +2274,16 @@ def test_constructor_orient(self):
         assert_frame_equal(recons, expected)
 
     def test_constructor_Series_named(self):
-        a = Series([1,2,3], index=['a','b','c'], name='x')
+        a = Series([1, 2, 3], index=['a', 'b', 'c'], name='x')
         df = DataFrame(a)
         self.assert_(df.columns[0] == 'x')
         self.assert_(df.index.equals(a.index))
 
+        # #2234
+        a = Series([], name='x')
+        df = DataFrame(a)
+        self.assert_(df.columns[0] == 'x')
+
     def test_constructor_Series_differently_indexed(self):
         # name
         s1 = Series([1, 2, 3], index=['a','b','c'], name='x')
@@ -3842,6 +3852,24 @@ def test_to_excel_from_excel(self):
         assert_frame_equal(frame, recons)
         os.remove(path)
 
+    def test_to_excel_periodindex(self):
+        try:
+            import xlwt
+            import xlrd
+            import openpyxl
+        except ImportError:
+            raise nose.SkipTest
+
+        for ext in ['xls', 'xlsx']:
+            path = '__tmp__.' + ext
+            frame = self.tsframe
+            xp = frame.resample('M', kind='period')
+            xp.to_excel(path, 'sht1')
+
+            reader = ExcelFile(path)
+            rs = reader.parse('sht1', index_col=0, parse_dates=True)
+            assert_frame_equal(xp, rs.to_period('M'))
+            os.remove(path)
 
     def test_to_excel_multiindex(self):
         try:
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
@@ -452,12 +452,14 @@ def random_color(column):
     for i in range(n):
         row = df.irow(i).values
         y = row
-        label = None
         kls = class_col.iget_value(i)
         if com.pprint_thing(kls) not in used_legends:
             label = com.pprint_thing(kls)
             used_legends.add(label)
-        ax.plot(x, y, color=random_color(kls), label=label, **kwds)
+            ax.plot(x, y, color=random_color(kls),
+                    label=label, **kwds)
+        else:
+            ax.plot(x, y, color=random_color(kls), **kwds)
 
     for i in range(ncols):
         ax.axvline(i, linewidth=1, color='black')
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
@@ -203,6 +203,10 @@ def __new__(cls, data=None,
                 if isinstance(data, DatetimeIndex):
                     if name is not None:
                         data.name = name
+
+                    if tz is not None:
+                        return data.tz_localize(tz)
+
                     return data
 
         if issubclass(data.dtype.type, basestring):
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
@@ -110,7 +110,10 @@ def _get_time_bins(self, axis):
 
         first, last = _get_range_edges(axis, self.freq, closed=self.closed,
                                        base=self.base)
-        binner = labels = DatetimeIndex(freq=self.freq, start=first, end=last)
+        tz = axis.tz
+        binner = labels = DatetimeIndex(freq=self.freq,
+                                        start=first.replace(tzinfo=None),
+                                        end=last.replace(tzinfo=None), tz=tz)
 
         # a little hack
         trimmed = False
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
@@ -823,6 +823,24 @@ def test_resample_tz_localized(self):
         # it works
         result = ts_local.resample('D')
 
+        # #2245
+        idx = date_range('2001-09-20 15:59','2001-09-20 16:00', freq='T',
+                         tz='Australia/Sydney')
+        s = Series([1,2], index=idx)
+
+        result = s.resample('D')
+        ex_index = date_range('2001-09-21', periods=1, freq='D',
+                              tz='Australia/Sydney')
+        expected = Series([1.5], index=ex_index)
+
+        assert_series_equal(result, expected)
+
+        # for good measure
+        result = s.resample('D', kind='period')
+        ex_index = period_range('2001-09-20', periods=1, freq='D')
+        expected = Series([1.5], index=ex_index)
+        assert_series_equal(result, expected)
+
     def test_closed_left_corner(self):
         # #1465
         s = Series(np.random.randn(21),
diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py
@@ -137,6 +137,17 @@ def test_tz_localize_dti(self):
                             freq='L')
         self.assertRaises(pytz.NonExistentTimeError, dti.tz_localize, 'US/Eastern')
 
+    def test_tz_localize_empty_series(self):
+        # #2248
+
+        ts = Series()
+
+        ts2 = ts.tz_localize('utc')
+        self.assertTrue(ts2.index.tz == pytz.utc)
+
+        ts2 = ts.tz_localize('US/Eastern')
+        self.assertTrue(ts2.index.tz == pytz.timezone('US/Eastern'))
+
     def test_astimezone(self):
         utc = Timestamp('3/11/2012 22:00', tz='UTC')
         expected = utc.tz_convert('US/Eastern')
diff --git a/setup.py b/setup.py
@@ -207,7 +207,7 @@
 MICRO = 1
 ISRELEASED = True
 VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
-QUALIFIER = 'rc1'
+QUALIFIER = ''
 
 FULLVERSION = VERSION
 if not ISRELEASED: