add DataFrame.insert_columns

MarcoGorelli · MarcoGorelli · commit 36bba1f1c21a · 2023-08-22T09:51:47.000+01:00
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -201,6 +201,52 @@ def insert_column(self, loc: int, column: Column[Any]) -> DataFrame:
         """
         ...
 
+    def insert_columns(self, locs_and_columns: Sequence[tuple[int, Column[Any]]]) -> DataFrame:
+        """
+        Insert columns into DataFrame at specified locations.
+
+        Like `insert_column`, but can insert multiple (independent) columns.
+        Some implementations may be able to make use of parallelism in this
+        case. For example instead of:
+        
+        .. code-block::
+
+            new_column = df.get_column_by_name('a') + 1
+            df = df.insert_column(0, new_column.rename('a_plus_1'))
+            new_column = df.get_column_by_name('b') + 1
+            df = df.insert_column(1, new_column.rename('b_plus_1'))
+        
+        it would be better to write
+
+        .. code-block::
+
+            new_column_0 = df.get_column_by_name('a') + 1
+            new_column_1 = df.get_column_by_name('b') + 1
+            df = df.insert_columns(
+                [
+                    (0, new_column_0.rename('a_plus_1')),
+                    (1, new_column_1.rename('b_plus_1')),
+                ]
+            )
+        
+        so that insertion can happen in parallel for some implementations.
+
+        Parameters
+        ----------
+        locs_and_columns : Sequence[Tuple[int, Column]]
+            Sequence of tuples of the kind (location, column).
+            Must be independent of each other.
+            Locations and column names must be unique.
+            Column names may not already be present in the
+            dataframe - use `DataFrame.rename` to rename them
+            beforehand if necessary.
+        
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
     def drop_column(self, label: str) -> DataFrame:
         """
         Drop the specified column.