diff --git a/doc/source/release.rst b/doc/source/release.rst index 151402a8b5370..ba27079b93b96 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -59,7 +59,7 @@ API Changes - ``Series.sort`` will raise a ``ValueError`` (rather than a ``TypeError``) on sorting an object that is a view of another (:issue:`5856`, :issue:`5853`) - Raise/Warn ``SettingWithCopyError`` (according to the option ``chained_assignment`` in more cases, - when detecting chained assignment, related (:issue:`5938`) + when detecting chained assignment, related (:issue:`5938`, :issue:`6025`) - DataFrame.head(0) returns self instead of empty frame (:issue:`5846`) - ``autocorrelation_plot`` now accepts ``**kwargs``. (:issue:`5623`) - ``convert_objects`` now accepts a ``convert_timedeltas='coerce'`` argument to allow forced dtype conversion of @@ -89,6 +89,7 @@ Improvements to existing features - improved dtype inference for ``timedelta`` like passed to constructors (:issue:`5458`,:issue:`5689`) - escape special characters when writing to latex (:issue: `5374`) - perf improvements in ``DataFrame.apply`` (:issue:`6013`) + - ``Dataframe``s ``to_excel`` will now also accept file-like objects as output (:issue: `5992`) .. _release.bug_fixes-0.13.1: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index adb7b2d2b2691..2d4fd2d792732 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1582,7 +1582,7 @@ def _ixs(self, i, axis=0, copy=False): new_values, copy = self._data.fast_2d_xs(i, copy=copy) result = Series(new_values, index=self.columns, name=self.index[i], dtype=new_values.dtype) - result.is_copy=copy + result._set_is_copy(self, copy=copy) return result # icol @@ -1707,7 +1707,7 @@ def _getitem_multilevel(self, key): if isinstance(result, Series): result = Series(result, index=self.index, name=key) - result.is_copy=True + result._set_is_copy(self) return result else: return self._get_item_cache(key) @@ -1878,6 +1878,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): + self._check_setitem_copy() self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): @@ -1912,6 +1913,7 @@ def _setitem_frame(self, key, value): raise TypeError( 'Cannot do boolean setting on mixed-type frame') + self._check_setitem_copy() self.where(-key, value, inplace=True) def _ensure_valid_index(self, value): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eb24301e5e603..63454d32a5fa1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2,6 +2,7 @@ import warnings import operator import weakref +import gc import numpy as np import pandas.lib as lib @@ -97,7 +98,7 @@ def __init__(self, data, axes=None, copy=False, dtype=None, for i, ax in enumerate(axes): data = data.reindex_axis(ax, axis=i) - object.__setattr__(self, 'is_copy', False) + object.__setattr__(self, 'is_copy', None) object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) @@ -994,6 +995,9 @@ def _get_item_cache(self, item): res = self._box_item_values(item, values) cache[item] = res res._set_as_cached(item, self) + + # for a chain + res.is_copy = self.is_copy return res def _set_as_cached(self, item, cacher): @@ -1031,16 +1035,14 @@ def _maybe_update_cacher(self, clear=False): # a copy if ref is None: del self._cacher - self.is_copy = True - self._check_setitem_copy(stacklevel=5, t='referant') else: try: ref._maybe_cache_changed(cacher[0], self) except: pass - if ref.is_copy: - self.is_copy = True - self._check_setitem_copy(stacklevel=5, t='referant') + + # check if we are a copy + self._check_setitem_copy(stacklevel=5, t='referant') if clear: self._clear_item_cache() @@ -1055,13 +1057,35 @@ def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() + def _set_is_copy(self, ref=None, copy=True): + if not copy: + self.is_copy = None + else: + if ref is not None: + self.is_copy = weakref.ref(ref) + else: + self.is_copy = None + def _check_setitem_copy(self, stacklevel=4, t='setting'): """ validate if we are doing a settitem on a chained copy. If you call this function, be sure to set the stacklevel such that the user will see the error *at the level of setting*""" if self.is_copy: + value = config.get_option('mode.chained_assignment') + if value is None: + return + + # see if the copy is not actually refererd; if so, then disolve + # the copy weakref + try: + gc.collect(2) + if not gc.get_referents(self.is_copy()): + self.is_copy = None + return + except: + pass if t == 'referant': t = ("A value is trying to be set on a copy of a slice from a " @@ -1143,7 +1167,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True): # maybe set copy if we didn't actually change the index if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)): - result.is_copy=is_copy + result._set_is_copy(self) return result @@ -1276,12 +1300,12 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): new_values, copy = self._data.fast_2d_xs(loc, copy=copy) result = Series(new_values, index=self.columns, name=self.index[loc]) - result.is_copy=True else: result = self[loc] result.index = new_index + result._set_is_copy(self) return result _xs = xs diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 474b5ae3ac6d6..9a67f5fe3854e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -211,7 +211,7 @@ def _setitem_with_indexer(self, indexer, value): labels = _safe_append_to_index(index, key) self.obj._data = self.obj.reindex_axis(labels, i)._data self.obj._maybe_update_cacher(clear=True) - self.obj.is_copy=False + self.obj.is_copy=None if isinstance(labels, MultiIndex): self.obj.sortlevel(inplace=True) @@ -418,6 +418,7 @@ def can_do_equal_len(): if isinstance(value, ABCPanel): value = self._align_panel(indexer, value) + # actually do the set self.obj._data = self.obj._data.setitem(indexer, value) self.obj._maybe_update_cacher(clear=True) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index ad7c37fba4c2f..63d5f57afe2c6 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -431,8 +431,9 @@ def save(self): def __init__(self, path, engine=None, **engine_kwargs): # validate that this engine can handle the extnesion - ext = os.path.splitext(path)[-1] - self.check_extension(ext) + if not hasattr(path, 'read'): + ext = os.path.splitext(path)[-1] + self.check_extension(ext) self.path = path self.sheets = {} diff --git a/pandas/io/json.py b/pandas/io/json.py index 698f7777a1100..4ed325df9a747 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -60,7 +60,7 @@ def __init__(self, obj, orient, date_format, double_precision, self.date_unit = date_unit self.default_handler = default_handler - self.is_copy = False + self.is_copy = None self._format_axes() def _format_axes(self): diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index edcb80ae74f6f..0062f0c514fe5 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -17,6 +17,7 @@ ) from pandas.util.testing import ensure_clean from pandas.core.config import set_option, get_option +from pandas.compat import BytesIO import pandas.util.testing as tm import pandas as pd @@ -410,6 +411,22 @@ def test_excelwriter_contextmanager(self): tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2) + def test_stringio_writer(self): + _skip_if_no_xlsxwriter() + _skip_if_no_xlrd() + + path = BytesIO() + with ExcelWriter(path, engine='xlsxwriter', **{'options': {'in-memory': True}}) as ew: + self.frame.to_excel(ew, 'test1', engine='xlsxwriter') + ew.save() + path.seek(0) + ef = ExcelFile(path) + found_df = ef.parse('test1') + tm.assert_frame_equal(self.frame, found_df) + path.close() + + + def test_roundtrip(self): _skip_if_no_xlrd() diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index d0c783725f8bb..86627271c8e6a 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -181,7 +181,7 @@ def test_repr_should_return_str(self): u("\u03c6")] cols = [u("\u03c8")] df = DataFrame(data, columns=cols, index=index1) - self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 + self.assertTrue(type(df.__repr__()) == str) # both py2 / 3 def test_repr_no_backslash(self): with option_context('mode.sim_interactive', True): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 7daf95ac15a95..3270d80dc9dad 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1380,10 +1380,10 @@ def test_set_value_keeps_names(self): columns=['one', 'two', 'three', 'four'], index=idx) df = df.sortlevel() - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) self.assertEqual(df.index.names, ('Name', 'Number')) df = df.set_value(('grethe', '4'), 'one', 99.34) - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) self.assertEqual(df.index.names, ('Name', 'Number')) def test_names(self): diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f80b0d36445cf..8c4572cf63dd1 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -2022,7 +2022,7 @@ def test_detect_chained_assignment(self): # work with the chain expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') - self.assert_(not df.is_copy) + self.assert_(df.is_copy is None) df['A'][0] = -5 df['A'][1] = -6 @@ -2030,11 +2030,11 @@ def test_detect_chained_assignment(self): expected = DataFrame([[-5,2],[np.nan,3.]],columns=list('AB')) df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) - self.assert_(not df.is_copy) + self.assert_(df.is_copy is None) df['A'][0] = -5 df['A'][1] = np.nan assert_frame_equal(df, expected) - self.assert_(not df['A'].is_copy) + self.assert_(df['A'].is_copy is None) # using a copy (the chain), fails df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) @@ -2046,7 +2046,7 @@ def f(): df = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c' : Series(range(7),dtype='int64') }) - self.assert_(not df.is_copy) + self.assert_(df.is_copy is None) expected = DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], 'c' : [42,42,2,3,4,42,6]}) @@ -2075,7 +2075,7 @@ def f(): # make sure that is_copy is picked up reconstruction # GH5475 df = DataFrame({"A": [1,2]}) - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) with tm.ensure_clean('__tmp__pickle') as path: df.to_pickle(path) df2 = pd.read_pickle(path) @@ -2100,25 +2100,34 @@ def random_text(nobs=100): # always a copy x = df.iloc[[0,1,2]] - self.assert_(x.is_copy is True) + self.assert_(x.is_copy is not None) x = df.iloc[[0,1,2,4]] - self.assert_(x.is_copy is True) + self.assert_(x.is_copy is not None) # explicity copy indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer].copy() - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) df['letters'] = df['letters'].apply(str.lower) # implicity take df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) df = df.ix[indexer] - self.assert_(df.is_copy is True) + self.assert_(df.is_copy is not None) + df['letters'] = df['letters'].apply(str.lower) + + # implicity take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x : len(x) > 10) + df = df.ix[indexer] + self.assert_(df.is_copy is not None) df.loc[:,'letters'] = df['letters'].apply(str.lower) - # this will raise - #df['letters'] = df['letters'].apply(str.lower) + # should be ok even though its a copy! + self.assert_(df.is_copy is None) + df['letters'] = df['letters'].apply(str.lower) + self.assert_(df.is_copy is None) df = random_text(100000) indexer = df.letters.apply(lambda x : len(x) > 10) @@ -2126,7 +2135,7 @@ def random_text(nobs=100): # an identical take, so no copy df = DataFrame({'a' : [1]}).dropna() - self.assert_(df.is_copy is False) + self.assert_(df.is_copy is None) df['a'] += 1 # inplace ops @@ -2165,7 +2174,15 @@ def f(): df[['c']][mask] = df[['b']][mask] self.assertRaises(com.SettingWithCopyError, f) - pd.set_option('chained_assignment','warn') + # false positives GH6025 + df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] }) + str(df) + df['column1'] = df['column1'] + 'b' + str(df) + df = df [df['column2']!=8] + str(df) + df['column1'] = df['column1'] + 'c' + str(df) def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 0ede6bd2bd46d..1571cf3bf6a7d 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -174,7 +174,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']): exp = self.data.groupby(rows)[col].mean() tm.assert_series_equal(cmarg, exp) - res.sortlevel(inplace=True) + res = res.sortlevel() rmarg = res.xs(('All', ''))[:-1] exp = self.data.groupby(cols)[col].mean() tm.assert_series_equal(rmarg, exp)