Skip to content

Commit 1ba5625

Browse files
committed
ENH: added ignore_index option to DataFrame.append, and speed optimizations with _ensure_index function
1 parent eb1b59f commit 1ba5625

File tree

10 files changed

+105
-42
lines changed

10 files changed

+105
-42
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ This is a bug fix release
1717

1818
-
1919

20+
**Improvements to existing features**
21+
22+
- Some speed enhancements with internal Index type-checking function
23+
2024
pandas 0.4
2125
==========
2226

pandas/core/common.py

-7
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,6 @@ def _mut_exclusive(arg1, arg2):
138138
else:
139139
return arg2
140140

141-
def _ensure_index(index_like):
142-
from pandas.core.index import Index
143-
if not isinstance(index_like, Index):
144-
index_like = Index(index_like)
145-
146-
return index_like
147-
148141
def _any_none(*args):
149142
for arg in args:
150143
if arg is None:

pandas/core/frame.py

+52-21
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
from numpy import nan
2020
import numpy as np
2121

22-
from pandas.core.common import (isnull, notnull, PandasError, _ensure_index,
22+
from pandas.core.common import (isnull, notnull, PandasError,
2323
_try_sort, _pfixed, _default_index,
2424
_infer_dtype, _stringify)
2525
from pandas.core.daterange import DateRange
2626
from pandas.core.generic import AxisProperty, NDFrame
27-
from pandas.core.index import Index, MultiIndex, NULL_INDEX
27+
from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index
2828
from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels
2929
from pandas.core.internals import BlockManager, make_block, form_blocks
30-
from pandas.core.series import Series, _is_bool_indexer
30+
from pandas.core.series import Series, _is_bool_indexer, _maybe_upcast
3131
from pandas.util.decorators import deprecate
3232
import pandas.core.common as common
3333
import pandas.core.datetools as datetools
@@ -2008,11 +2008,18 @@ def f(x):
20082008
#----------------------------------------------------------------------
20092009
# Merging / joining methods
20102010

2011-
def append(self, other):
2011+
def append(self, other, ignore_index=False):
20122012
"""
20132013
Append columns of other to end of this frame's columns and index.
20142014
Columns not in this frame are added as new columns.
20152015
2016+
Parameters
2017+
----------
2018+
other : DataFrame
2019+
ignore_index : boolean, default False
2020+
If True do not use the index labels. Useful for gluing together
2021+
record arrays
2022+
20162023
Returns
20172024
-------
20182025
appended : DataFrame
@@ -2022,28 +2029,53 @@ def append(self, other):
20222029
if not self:
20232030
return other.copy()
20242031

2025-
new_index = np.concatenate((self.index, other.index))
2026-
new_data = {}
2032+
if ignore_index:
2033+
new_index = None
2034+
else:
2035+
new_index = np.concatenate((self.index, other.index))
2036+
2037+
if self.columns.equals(other.columns):
2038+
return self._append_same_columns(other, new_index)
2039+
else:
2040+
return self._append_different_columns(other, new_index)
20272041

2028-
new_columns = self.columns
2042+
def _append_different_columns(self, other, new_index):
2043+
new_columns = self.columns + other.columns
2044+
new_data = self._append_column_by_column(other)
2045+
return self._constructor(data=new_data, index=new_index,
2046+
columns=new_columns)
2047+
2048+
def _append_same_columns(self, other, new_index):
2049+
if self._is_mixed_type:
2050+
new_data = self._append_column_by_column(other)
2051+
else:
2052+
new_data= np.concatenate((self.values, other.values), axis=0)
2053+
return self._constructor(new_data, index=new_index,
2054+
columns=self.columns)
20292055

2030-
if not new_columns.equals(other.columns):
2031-
new_columns = self.columns + other.columns
2056+
def _append_column_by_column(self, other):
2057+
def _concat_missing(values, n):
2058+
values = _maybe_upcast(values)
2059+
missing_values = np.empty(n, dtype=values.dtype)
2060+
missing_values.fill(np.nan)
2061+
return values, missing_values
20322062

2033-
for column, series in self.iteritems():
2034-
values = series.values
2035-
if column in other:
2036-
other_values = other[column].values
2037-
new_data[column] = np.concatenate((values, other_values))
2063+
new_data = {}
2064+
for col in self:
2065+
values = self._data.get(col)
2066+
if col in other:
2067+
other_values = other._data.get(col)
20382068
else:
2039-
new_data[column] = series
2069+
values, other_values = _concat_missing(values, len(other))
2070+
new_data[col] = np.concatenate((values, other_values))
20402071

2041-
for column, series in other.iteritems():
2042-
if column not in self:
2043-
new_data[column] = series
2072+
for col in other:
2073+
values = other._data.get(col)
2074+
if col not in self:
2075+
values, missing_values = _concat_missing(values, len(self))
2076+
new_data[col] = np.concatenate((missing_values, values))
20442077

2045-
return self._constructor(data=new_data, index=new_index,
2046-
columns=new_columns)
2078+
return new_data
20472079

20482080
def join(self, other, on=None, how=None, lsuffix='', rsuffix=''):
20492081
"""
@@ -3137,7 +3169,6 @@ def _homogenize(data, index, columns, dtype=None):
31373169

31383170
return homogenized
31393171

3140-
31413172
def _put_str(s, space):
31423173
return ('%s' % s)[:space].ljust(space)
31433174

pandas/core/generic.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import numpy as np
22
import cPickle
33

4-
from pandas.core.common import _ensure_index
5-
from pandas.core.index import Index, MultiIndex
4+
from pandas.core.index import Index, MultiIndex, _ensure_index
65
import pandas.core.datetools as datetools
76

87
#-------------------------------------------------------------------------------

pandas/core/index.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
import numpy as np
77

88
from pandas.core.common import (_format, adjoin as _adjoin, _stringify,
9-
_ensure_index, _is_bool_indexer,
10-
_asarray_tuplesafe)
9+
_is_bool_indexer, _asarray_tuplesafe)
1110
from pandas.util.decorators import deprecate, cache_readonly
1211
import pandas.core.common as common
1312
import pandas._tseries as _tseries
@@ -1226,3 +1225,7 @@ def _sparsify(label_list):
12261225

12271226
return zip(*result)
12281227

1228+
def _ensure_index(index_like):
1229+
if isinstance(index_like, Index):
1230+
return index_like
1231+
return Index(index_like)

pandas/core/internals.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from numpy import nan
44
import numpy as np
55

6-
from pandas.core.index import Index
7-
from pandas.core.common import _ensure_index
6+
from pandas.core.index import Index, _ensure_index
87
import pandas.core.common as common
98
import pandas._tseries as _tseries
109

pandas/core/panel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
import numpy as np
1212

13-
from pandas.core.common import (PandasError, _mut_exclusive, _ensure_index,
13+
from pandas.core.common import (PandasError, _mut_exclusive,
1414
_try_sort, _default_index, _infer_dtype)
15-
from pandas.core.index import Factor, Index, MultiIndex
15+
from pandas.core.index import Factor, Index, MultiIndex, _ensure_index
1616
from pandas.core.indexing import _NDFrameIndexer
1717
from pandas.core.internals import BlockManager, make_block, form_blocks
1818
from pandas.core.frame import DataFrame, _union_indexes

pandas/core/series.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
from numpy import nan, ndarray
1414
import numpy as np
1515

16-
from pandas.core.common import (isnull, notnull, _ensure_index,
17-
_is_bool_indexer, _default_index)
16+
from pandas.core.common import (isnull, notnull, _is_bool_indexer,
17+
_default_index)
1818
from pandas.core.daterange import DateRange
1919
from pandas.core.generic import PandasObject
20-
from pandas.core.index import Index, MultiIndex
20+
from pandas.core.index import Index, MultiIndex, _ensure_index
2121
from pandas.core.indexing import _SeriesIndexer, _maybe_droplevels
2222
from pandas.util.decorators import deprecate
2323
import pandas.core.datetools as datetools

pandas/core/sparse.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import operator
1212

1313
from pandas.core.common import (isnull, _pickle_array, _unpickle_array,
14-
_mut_exclusive, _ensure_index, _try_sort)
15-
from pandas.core.index import Index, MultiIndex, NULL_INDEX
14+
_mut_exclusive, _try_sort)
15+
from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index
1616
from pandas.core.series import Series, TimeSeries
1717
from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray,
1818
_default_index)
@@ -624,7 +624,7 @@ class SparseDataFrame(DataFrame):
624624
_verbose_info = False
625625
_columns = None
626626
_series = None
627-
627+
_is_mixed_type = False
628628
ndim = 2
629629

630630
def __init__(self, data=None, index=None, columns=None,
@@ -1056,6 +1056,24 @@ def _rename_columns_inplace(self, mapper):
10561056
self.columns = new_columns
10571057
self._series = new_series
10581058

1059+
def _append_column_by_column(self, other):
1060+
new_data = {}
1061+
for col in self:
1062+
values = self[col].values
1063+
if col in other:
1064+
other_values = other[col].values
1065+
else:
1066+
values = _maybe_upcast(values)
1067+
other_values = np.empty(len(other), dtype=values.dtype)
1068+
other_values.fill(np.nan)
1069+
new_data[col] = np.concatenate((values, other_values))
1070+
1071+
for column, series in other.iteritems():
1072+
if column not in self:
1073+
new_data[column] = series.values
1074+
1075+
return new_data
1076+
10591077
def add_prefix(self, prefix):
10601078
f = (('%s' % prefix) + '%s').__mod__
10611079
return self.rename(columns=f)

pandas/tests/test_frame.py

+16
Original file line numberDiff line numberDiff line change
@@ -1517,6 +1517,22 @@ def test_append(self):
15171517
assert_frame_equal(self.frame, appended)
15181518
self.assert_(appended is not self.frame)
15191519

1520+
def test_append_records(self):
1521+
arr1 = np.zeros((2,),dtype=('i4,f4,a10'))
1522+
arr1[:] = [(1,2.,'Hello'),(2,3.,"World")]
1523+
1524+
arr2 = np.zeros((3,),dtype=('i4,f4,a10'))
1525+
arr2[:] = [(3, 4.,'foo'),
1526+
(5, 6.,"bar"),
1527+
(7., 8., 'baz')]
1528+
1529+
df1 = DataFrame(arr1)
1530+
df2 = DataFrame(arr2)
1531+
1532+
result = df1.append(df2, ignore_index=True)
1533+
expected = DataFrame(np.concatenate((arr1, arr2)))
1534+
assert_frame_equal(result, expected)
1535+
15201536
def test_asfreq(self):
15211537
offset_monthly = self.tsframe.asfreq(datetools.bmonthEnd)
15221538
rule_monthly = self.tsframe.asfreq('EOM')

0 commit comments

Comments
 (0)