Skip to content

Commit c82b02d

Browse files
jbrockmendelSeeminSyed
authored andcommitted
CLN: avoid values_from_object in reshape.merge (pandas-dev#32537)
1 parent 872bb90 commit c82b02d

File tree

1 file changed

+17
-15
lines changed

1 file changed

+17
-15
lines changed

pandas/core/reshape/merge.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import pandas.core.algorithms as algos
4646
from pandas.core.arrays.categorical import _recode_for_categories
4747
import pandas.core.common as com
48+
from pandas.core.construction import extract_array
4849
from pandas.core.frame import _merge_doc
4950
from pandas.core.internals import concatenate_block_managers
5051
from pandas.core.sorting import is_int64_overflow_possible
@@ -1820,9 +1821,14 @@ def _right_outer_join(x, y, max_groups):
18201821

18211822
def _factorize_keys(lk, rk, sort=True):
18221823
# Some pre-processing for non-ndarray lk / rk
1823-
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
1824-
lk = getattr(lk, "_values", lk)._data
1825-
rk = getattr(rk, "_values", rk)._data
1824+
lk = extract_array(lk, extract_numpy=True)
1825+
rk = extract_array(rk, extract_numpy=True)
1826+
1827+
if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype):
1828+
# Extract the ndarray (UTC-localized) values
1829+
# Note: we dont need the dtypes to match, as these can still be compared
1830+
lk, _ = lk._values_for_factorize()
1831+
rk, _ = rk._values_for_factorize()
18261832

18271833
elif (
18281834
is_categorical_dtype(lk) and is_categorical_dtype(rk) and lk.is_dtype_equal(rk)
@@ -1837,27 +1843,23 @@ def _factorize_keys(lk, rk, sort=True):
18371843
lk = ensure_int64(lk.codes)
18381844
rk = ensure_int64(rk)
18391845

1840-
elif (
1841-
is_extension_array_dtype(lk.dtype)
1842-
and is_extension_array_dtype(rk.dtype)
1843-
and lk.dtype == rk.dtype
1844-
):
1846+
elif is_extension_array_dtype(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype):
18451847
lk, _ = lk._values_for_factorize()
18461848
rk, _ = rk._values_for_factorize()
18471849

18481850
if is_integer_dtype(lk) and is_integer_dtype(rk):
18491851
# GH#23917 TODO: needs tests for case where lk is integer-dtype
18501852
# and rk is datetime-dtype
18511853
klass = libhashtable.Int64Factorizer
1852-
lk = ensure_int64(com.values_from_object(lk))
1853-
rk = ensure_int64(com.values_from_object(rk))
1854-
elif issubclass(lk.dtype.type, (np.timedelta64, np.datetime64)) and issubclass(
1855-
rk.dtype.type, (np.timedelta64, np.datetime64)
1856-
):
1854+
lk = ensure_int64(np.asarray(lk))
1855+
rk = ensure_int64(np.asarray(rk))
1856+
1857+
elif needs_i8_conversion(lk.dtype) and is_dtype_equal(lk.dtype, rk.dtype):
18571858
# GH#23917 TODO: Needs tests for non-matching dtypes
18581859
klass = libhashtable.Int64Factorizer
1859-
lk = ensure_int64(com.values_from_object(lk))
1860-
rk = ensure_int64(com.values_from_object(rk))
1860+
lk = ensure_int64(np.asarray(lk, dtype=np.int64))
1861+
rk = ensure_int64(np.asarray(rk, dtype=np.int64))
1862+
18611863
else:
18621864
klass = libhashtable.Factorizer
18631865
lk = ensure_object(lk)

0 commit comments

Comments
 (0)