Skip to content

Commit f1c16c0

Browse files
Don't copy dataframes or use inplace=True (#305)
Pandas is moving away from the `inplace=True` argument in its next version, and recommends against using it as most methods make copies of the underlying data anyway, and using inplace is more likely to lead to bugs: pandas-dev/pandas#16529 pandas-dev/pandas#51466 We also have a lot of code that looks like: ```python df = table.dataframe.copy() df.foobar(..., inplace=True) ... table = replace(table, dataframe=df) ``` which is better written as the following, as most pandas methods `foobar` return a new dataframe with the results of the operation: ```python df = table.dataframe.foobar(...) ... table = replace(table, dataframe=df) ``` It is not necessary to copy dataframes unless in very special cases, for example, you are adding a column to the dataframe: ```python df = model.topology.copy() # This copy is needed, otherwise the next line adds a column to `model.topology` df['new_col'] = df['old_col'] * 2 ```
1 parent bbb9afc commit f1c16c0

File tree

5 files changed

+107
-125
lines changed

5 files changed

+107
-125
lines changed

xl2times/__main__.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,7 @@ def keep_last_by_file_order(df):
313313
df = df.sort_values(by="file_order", kind="stable")
314314
df = df.drop(columns=["source_filename", "file_order"])
315315
df = df.drop_duplicates(keep="last")
316-
df.reset_index(drop=True, inplace=True)
317-
return df
316+
return df.reset_index(drop=True)
318317

319318
result = {}
320319
used_tables = set()
@@ -326,7 +325,7 @@ def keep_last_by_file_order(df):
326325
)
327326
else:
328327
used_tables.add(mapping.xl_name)
329-
df = input[mapping.xl_name].copy()
328+
df = input[mapping.xl_name]
330329
# Filter rows according to filter_rows mapping:
331330
for filter_col, filter_val in mapping.filter_rows.items():
332331
if filter_col not in df.columns:
@@ -352,7 +351,7 @@ def keep_last_by_file_order(df):
352351
# Keep only the required columns
353352
cols_to_keep = set(mapping.times_cols).union({"source_filename"})
354353
cols_to_drop = [x for x in df.columns if x not in cols_to_keep]
355-
df.drop(columns=cols_to_drop, inplace=True)
354+
df = df.drop(columns=cols_to_drop)
356355
# Drop duplicates, keeping last seen rows as per file order
357356
df = keep_last_by_file_order(df)
358357
# Drop rows with missing values

xl2times/datatypes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,8 @@ def veda_cgs(self) -> dict[tuple[str, str, str], str]:
295295
cols = ["region", "process", "commodity", "csets"]
296296
# Exclude auxillary flows
297297
index = self.topology["io"].isin({"IN", "OUT"})
298-
veda_cgs = self.topology[cols + ["io"]][index].copy()
299-
veda_cgs.drop_duplicates(subset=cols, keep="last", inplace=True)
298+
veda_cgs = self.topology[cols + ["io"]][index]
299+
veda_cgs = veda_cgs.drop_duplicates(subset=cols, keep="last")
300300
veda_cgs["veda_cg"] = veda_cgs["csets"] + veda_cgs["io"].str[:1]
301301
veda_cgs = veda_cgs.set_index(["region", "process", "commodity"])[
302302
"veda_cg"

xl2times/excel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def extract_table(
170170
# Make all columns names strings as some are integers e.g. years
171171
table_df.columns = [str(x) for x in df.iloc[header_row, start_col:end_col]]
172172

173-
table_df.reset_index(drop=True, inplace=True)
173+
table_df = table_df.reset_index(drop=True)
174174

175175
# Don't use applymap because it can convert ints to floats
176176
# https://pandas.pydata.org/pandas-docs/stable/user_guide/gotchas.html#gotchas-intna

0 commit comments

Comments
 (0)