Skip to content

Commit cf5c2d3

Browse files
authored
BUG: PandasArray[uint].factorize (pandas-dev#46295)
1 parent d00763a commit cf5c2d3

File tree

5 files changed

+22
-8
lines changed

5 files changed

+22
-8
lines changed

pandas/core/arrays/_mixins.py

+3
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ def _from_factorized(cls, values, original):
190190
def _values_for_argsort(self) -> np.ndarray:
191191
return self._ndarray
192192

193+
def _values_for_factorize(self):
194+
return self._ndarray, self._internal_fill_value
195+
193196
# Signature of "argmin" incompatible with supertype "ExtensionArray"
194197
def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override]
195198
# override base class by adding axis keyword

pandas/core/arrays/categorical.py

-3
Original file line numberDiff line numberDiff line change
@@ -2298,9 +2298,6 @@ def unique(self):
22982298
unique_codes = unique1d(self.codes)
22992299
return self._from_backing_data(unique_codes)
23002300

2301-
def _values_for_factorize(self):
2302-
return self._ndarray, -1
2303-
23042301
def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
23052302
# make sure we have correct itemsize for resulting codes
23062303
res_values = coerce_indexer_dtype(res_values, self.dtype.categories)

pandas/core/arrays/datetimelike.py

-3
Original file line numberDiff line numberDiff line change
@@ -549,9 +549,6 @@ def copy(self: DatetimeLikeArrayT, order="C") -> DatetimeLikeArrayT:
549549
new_obj._freq = self.freq
550550
return new_obj
551551

552-
def _values_for_factorize(self):
553-
return self._ndarray, self._internal_fill_value
554-
555552
# ------------------------------------------------------------------
556553
# Validation Methods
557554
# TODO: try to de-duplicate these, ensure identical behavior

pandas/core/arrays/numpy_.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,12 @@ def _validate_scalar(self, fill_value):
189189
fill_value = self.dtype.na_value
190190
return fill_value
191191

192-
def _values_for_factorize(self) -> tuple[np.ndarray, int]:
193-
return self._ndarray, -1
192+
def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
193+
if self.dtype.kind in ["i", "u", "b"]:
194+
fv = None
195+
else:
196+
fv = np.nan
197+
return self._ndarray, fv
194198

195199
# ------------------------------------------------------------------------
196200
# Reductions

pandas/tests/arrays/numpy_/test_numpy.py

+13
Original file line numberDiff line numberDiff line change
@@ -309,3 +309,16 @@ def test_quantile_empty(dtype):
309309
result = arr._quantile(idx, interpolation="linear")
310310
expected = PandasArray(np.array([np.nan, np.nan]))
311311
tm.assert_extension_array_equal(result, expected)
312+
313+
314+
def test_factorize_unsigned():
315+
# don't raise when calling factorize on unsigned int PandasArray
316+
arr = np.array([1, 2, 3], dtype=np.uint64)
317+
obj = PandasArray(arr)
318+
319+
res_codes, res_unique = obj.factorize()
320+
exp_codes, exp_unique = pd.factorize(arr)
321+
322+
tm.assert_numpy_array_equal(res_codes, exp_codes)
323+
324+
tm.assert_extension_array_equal(res_unique, PandasArray(exp_unique))

0 commit comments

Comments
 (0)