From 86a3f1c12a97f5f43a26b73e464ab797a62b61f8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 5 Nov 2022 22:00:21 +0100 Subject: [PATCH 1/3] CLN: Deduplicate code in duplicated --- pandas/_libs/hashtable_func_helper.pxi.in | 26 ++++++----------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 68e253fd03620..331cb9324d690 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -141,32 +141,19 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons if keep not in ('last', 'first', False): raise ValueError('keep must be either "first", "last" or False') - if keep == 'last': + {{for cond, keep in [('if', '"last"'), ('elif', '"first"')]}} + {{cond}} keep == {{keep}}: {{if dtype == 'object'}} if True: {{else}} with nogil: {{endif}} + {{if keep == 'last'}} for i in range(n - 1, -1, -1): - # equivalent: range(n)[::-1], which cython doesn't like in nogil - if uses_mask and mask[i]: - if seen_na: - out[i] = True - else: - out[i] = False - seen_na = True - else: - value = {{to_c_type}}(values[i]) - kh_put_{{ttype}}(table, value, &ret) - out[i] = ret == 0 - - elif keep == 'first': - {{if dtype == 'object'}} - if True: - {{else}} - with nogil: - {{endif}} + {{else}} for i in range(n): + {{endif}} + # equivalent: range(n)[::-1], which cython doesn't like in nogil if uses_mask and mask[i]: if seen_na: out[i] = True @@ -177,6 +164,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons value = {{to_c_type}}(values[i]) kh_put_{{ttype}}(table, value, &ret) out[i] = ret == 0 + {{endfor}} else: {{if dtype == 'object'}} From 35375ddf96b01ae143fc726b0adf5e0de82a0ca9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 5 Nov 2022 22:01:45 +0100 Subject: [PATCH 2/3] Clean --- pandas/_libs/hashtable_func_helper.pxi.in | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index 331cb9324d690..d9f36357976de 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -153,7 +153,6 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons {{else}} for i in range(n): {{endif}} - # equivalent: range(n)[::-1], which cython doesn't like in nogil if uses_mask and mask[i]: if seen_na: out[i] = True From 0898ba8e80f9e53eb05c046334c1f2493e806b4d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 5 Nov 2022 22:03:23 +0100 Subject: [PATCH 3/3] Fix --- pandas/_libs/hashtable_func_helper.pxi.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index d9f36357976de..b9cf6011481af 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -148,7 +148,7 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons {{else}} with nogil: {{endif}} - {{if keep == 'last'}} + {{if keep == '"last"'}} for i in range(n - 1, -1, -1): {{else}} for i in range(n):