From 1106007d0d7a7d96f873f6bae5d5efd377d33196 Mon Sep 17 00:00:00 2001 From: Andrew Loucky Date: Sat, 15 Jul 2017 15:25:07 -0400 Subject: [PATCH 1/2] DOC: add warning to append about inefficiency --- doc/make.py | 11 +++++++---- pandas/core/frame.py | 5 +++++ pandas/core/series.py | 11 +++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/doc/make.py b/doc/make.py index acef563f301e4..b5faa6d9fe6a9 100755 --- a/doc/make.py +++ b/doc/make.py @@ -150,10 +150,13 @@ def _remove_notebooks(): print("Warning: Pandoc is not installed. Skipping notebooks.") _remove_notebooks() - yield - for nb, content in contents.items(): - with open(nb, 'wt') as f: - f.write(content) + try: + yield + except BaseException: + for nb, content in contents.items(): + with open(nb, 'wt') as f: + f.write(content) + raise def html(): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6559fc4c24ce2..4b80c53b62a20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4618,6 +4618,11 @@ def append(self, other, ignore_index=False, verify_integrity=False): the DataFrame's index, the order of the columns in the resulting DataFrame will be unchanged. + Iteratively appending rows to a Dataframe can be more computationally + intense than a single concatenate. A better solution is to append those + rows to a list then concatenate the list with the original Dataframe + all at once. + See also -------- pandas.concat : General function to concatenate DataFrame, Series diff --git a/pandas/core/series.py b/pandas/core/series.py index e1f668dd3afda..ef5aaafe0207f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1522,6 +1522,17 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): verify_integrity : boolean, default False If True, raise Exception on creating index with duplicates + Notes + ----- + Iteratively appending to a series can be more computationally intense + than a single concatenate. A better solution is to append values to a + list then concatenate the list with the original series all at once. + + See also + -------- + pandas.concat : General function to concatenate DataFrame, Series + or Panel objects + Returns ------- appended : Series From ea4cc3080d312d35bf8fe5de0d598a8ad53951b2 Mon Sep 17 00:00:00 2001 From: Andrew Loucky Date: Sun, 16 Jul 2017 10:44:10 -0400 Subject: [PATCH 2/2] DOC: fix wording in frame and series --- pandas/core/frame.py | 34 ++++++++++++++++++++++++++++++---- pandas/core/series.py | 5 +++-- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4b80c53b62a20..4a937414960b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4618,10 +4618,10 @@ def append(self, other, ignore_index=False, verify_integrity=False): the DataFrame's index, the order of the columns in the resulting DataFrame will be unchanged. - Iteratively appending rows to a Dataframe can be more computationally - intense than a single concatenate. A better solution is to append those - rows to a list then concatenate the list with the original Dataframe - all at once. + Iteratively appending rows to a DataFrame can be more computationally + intensive than a single concatenate. A better solution is to append + those rows to a list and then concatenate the list with the original + DataFrame all at once. See also -------- @@ -4653,6 +4653,32 @@ def append(self, other, ignore_index=False, verify_integrity=False): 2 5 6 3 7 8 + The following, while not a recommended method for generating a + DataFrame, illustrates how to efficiently generate a DataFrame from + multiple data sources. + + Less efficient: + >>> df = pd.DataFrame(columns=['A']) + >>> for i in range(5): + ... df = df.append({'A'}: i}, ignore_index=True) + >>> df + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + + More efficient: + >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], + ... ignore_index=True) + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + """ if isinstance(other, (Series, dict)): if isinstance(other, dict): diff --git a/pandas/core/series.py b/pandas/core/series.py index ef5aaafe0207f..8c5c85e341575 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1524,9 +1524,10 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): Notes ----- - Iteratively appending to a series can be more computationally intense + Iteratively appending to a Series can be more computationally intensive than a single concatenate. A better solution is to append values to a - list then concatenate the list with the original series all at once. + list and then concatenate the list with the original series all at + once. See also --------