Skip to content

Commit a3f14bf

Browse files
BUG: Fixes pd.merge issue with columns of dtype numpy.uintc on windows (pandas-dev#60145)
* bug fix for numpy.uintc in merge operations on windows Added pytest test case to verify correct behavior with numpy.uintc dtype * Formatting changes after running pre-commit * Added tests for numpy.intc * added whatsnew note * pre-commit automatic changes and also made changes to test_merge.py file to make pandas namespace consistent * removed comment * added the deleted whatsnew note back * better whatsnew note Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent d48235f commit a3f14bf

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,7 @@ Reshaping
739739
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)
740740
- Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`)
741741
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
742+
- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
742743
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
743744
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
744745

pandas/core/reshape/merge.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,17 @@
123123

124124
# See https://github.com/pandas-dev/pandas/issues/52451
125125
if np.intc is not np.int32:
126-
_factorizers[np.intc] = libhashtable.Int64Factorizer
126+
if np.dtype(np.intc).itemsize == 4:
127+
_factorizers[np.intc] = libhashtable.Int32Factorizer
128+
else:
129+
_factorizers[np.intc] = libhashtable.Int64Factorizer
130+
131+
if np.uintc is not np.uint32:
132+
if np.dtype(np.uintc).itemsize == 4:
133+
_factorizers[np.uintc] = libhashtable.UInt32Factorizer
134+
else:
135+
_factorizers[np.uintc] = libhashtable.UInt64Factorizer
136+
127137

128138
_known = (np.ndarray, ExtensionArray, Index, ABCSeries)
129139

pandas/tests/reshape/merge/test_merge.py

+35
Original file line numberDiff line numberDiff line change
@@ -1843,6 +1843,41 @@ def test_merge_empty(self, left_empty, how, exp):
18431843

18441844
tm.assert_frame_equal(result, expected)
18451845

1846+
def test_merge_with_uintc_columns(self):
1847+
df1 = DataFrame({"a": ["foo", "bar"], "b": np.array([1, 2], dtype=np.uintc)})
1848+
df2 = DataFrame({"a": ["foo", "baz"], "b": np.array([3, 4], dtype=np.uintc)})
1849+
result = df1.merge(df2, how="outer")
1850+
expected = DataFrame(
1851+
{
1852+
"a": ["bar", "baz", "foo", "foo"],
1853+
"b": np.array([2, 4, 1, 3], dtype=np.uintc),
1854+
}
1855+
)
1856+
tm.assert_frame_equal(result.reset_index(drop=True), expected)
1857+
1858+
def test_merge_with_intc_columns(self):
1859+
df1 = DataFrame({"a": ["foo", "bar"], "b": np.array([1, 2], dtype=np.intc)})
1860+
df2 = DataFrame({"a": ["foo", "baz"], "b": np.array([3, 4], dtype=np.intc)})
1861+
result = df1.merge(df2, how="outer")
1862+
expected = DataFrame(
1863+
{
1864+
"a": ["bar", "baz", "foo", "foo"],
1865+
"b": np.array([2, 4, 1, 3], dtype=np.intc),
1866+
}
1867+
)
1868+
tm.assert_frame_equal(result.reset_index(drop=True), expected)
1869+
1870+
def test_merge_intc_non_monotonic(self):
1871+
df = DataFrame({"join_key": Series([0, 2, 1], dtype=np.intc)})
1872+
df_details = DataFrame(
1873+
{"join_key": Series([0, 1, 2], dtype=np.intc), "value": ["a", "b", "c"]}
1874+
)
1875+
merged = df.merge(df_details, on="join_key", how="left")
1876+
expected = DataFrame(
1877+
{"join_key": np.array([0, 2, 1], dtype=np.intc), "value": ["a", "c", "b"]}
1878+
)
1879+
tm.assert_frame_equal(merged.reset_index(drop=True), expected)
1880+
18461881

18471882
@pytest.fixture
18481883
def left():

0 commit comments

Comments
 (0)