Skip to content

ENH: Use flags.allows_duplicate_labels to define default insert behavior #45109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4375,7 +4375,7 @@ def insert(
loc: int,
column: Hashable,
value: Scalar | AnyArrayLike,
allow_duplicates: bool = False,
allow_duplicates: bool | None = None,
) -> None:
"""
Insert column into DataFrame at specified location.
Expand Down Expand Up @@ -4422,7 +4422,9 @@ def insert(
0 NaN 100 1 99 3
1 5.0 100 2 99 4
"""
if allow_duplicates and not self.flags.allows_duplicate_labels:
if allow_duplicates is None:
allow_duplicates = self.flags.allows_duplicate_labels
elif allow_duplicates and not self.flags.allows_duplicate_labels:
raise ValueError(
"Cannot specify 'allow_duplicates=True' when "
"'self.flags.allows_duplicate_labels' is False."
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@


class TestDataFrameInsert:
@pytest.mark.xfail(reason="Allow duplicate columns")
def test_insert(self):
df = DataFrame(
np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,12 +328,15 @@ def test_reset_index_multiindex_nan(self):
"2012-12-31",
],
)
def test_reset_index_with_datetimeindex_cols(self, name):
def test_reset_index_with_datetimeindex_cols(self, name, request):
# GH#5818
warn = None
if isinstance(name, Timestamp) and name.tz is not None:
# _deprecate_mismatched_indexing
warn = FutureWarning
request.node.add_marker(
pytest.mark.xfail(reason="Duplicate labels allowed")
)

df = DataFrame(
[[1, 2], [3, 4]],
Expand Down Expand Up @@ -370,13 +373,14 @@ def test_reset_index_range(self):
)
tm.assert_frame_equal(result, expected)

def test_reset_index_multiindex_columns(self):
def test_reset_index_multiindex_columns(self, request):
levels = [["A", ""], ["B", "b"]]
df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
result = df[["B"]].rename_axis("A").reset_index()
tm.assert_frame_equal(result, df)

# GH#16120: already existing column
request.node.add_marker(pytest.mark.xfail(reason="Duplicate labels allowed"))
msg = r"cannot insert \('A', ''\), already exists"
with pytest.raises(ValueError, match=msg):
df.rename_axis("A").reset_index()
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_nonunique_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def test_setattr_columns_vs_construct_with_columns_datetimeindx(self):
expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx)
check(df, expected)

@pytest.mark.xfail(reason="Allow duplicate columns")
def test_insert_with_duplicate_columns(self):
# insert
df = DataFrame(
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_frame_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def test_mixed_groupings(normalize, expected_label, expected_values):
],
)
@pytest.mark.parametrize("as_index", [False, True])
def test_column_name_clashes(test, expected_names, as_index):
def test_column_name_clashes(test, expected_names, as_index, request):
df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6], "d": [7, 8], "e": [9, 10]})
if test == "repeat":
df.columns = list("abbde")
Expand All @@ -431,6 +431,7 @@ def test_column_name_clashes(test, expected_names, as_index):
)
tm.assert_series_equal(result, expected)
else:
request.node.add_marker(pytest.mark.xfail(reason="Duplicate labels allowed"))
with pytest.raises(ValueError, match="cannot insert"):
df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts()

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/pytables/test_put.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def test_column_multiindex(setup_path):
)


@pytest.mark.xfail(reason="Duplicate labels allowed")
def test_store_multiindex(setup_path):

# validate multi-index names
Expand Down