@@ -3330,6 +3330,25 @@ def test_factorize_chunked_dictionary():
3330
3330
tm .assert_index_equal (res_uniques , exp_uniques )
3331
3331
3332
3332
3333
+ def test_factorize_dictionary_with_na ():
3334
+ # Test that factorize properly handles NA values in dictionary arrays
3335
+ arr = pd .array (['a1' , pd .NA ], dtype = pd .ArrowDtype (pa .dictionary (pa .int32 (), pa .utf8 ())))
3336
+
3337
+ # Test with use_na_sentinel=True (default)
3338
+ indices , uniques = arr .factorize ()
3339
+ expected_indices = np .array ([0 , - 1 ], dtype = np .intp )
3340
+ tm .assert_numpy_array_equal (indices , expected_indices )
3341
+ expected_uniques = pd .array (['a1' ], dtype = arr .dtype )
3342
+ tm .assert_extension_array_equal (uniques , expected_uniques )
3343
+
3344
+ # Test with use_na_sentinel=False
3345
+ indices , uniques = arr .factorize (use_na_sentinel = False )
3346
+ expected_indices = np .array ([0 , 1 ], dtype = np .intp )
3347
+ tm .assert_numpy_array_equal (indices , expected_indices )
3348
+ expected_uniques = pd .array (['a1' , None ], dtype = arr .dtype )
3349
+ tm .assert_extension_array_equal (uniques , expected_uniques )
3350
+
3351
+
3333
3352
def test_dictionary_astype_categorical ():
3334
3353
# GH#56672
3335
3354
arrs = [
0 commit comments