From ab44c47e908125584fa5c1fe41caa0270d308240 Mon Sep 17 00:00:00 2001 From: Cho Yin Yong Date: Sat, 24 Oct 2020 00:24:32 -0400 Subject: [PATCH 1/6] kth order statistics divide and conquer algorithm --- divide_and_conquer/kth_order_statistic.py | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 divide_and_conquer/kth_order_statistic.py diff --git a/divide_and_conquer/kth_order_statistic.py b/divide_and_conquer/kth_order_statistic.py new file mode 100644 index 000000000000..f99e547a6f70 --- /dev/null +++ b/divide_and_conquer/kth_order_statistic.py @@ -0,0 +1,51 @@ +""" +Find the kth smallest element in linear time using divide and conquer. +Recall we can do this trivially in O(nlogn) time. Sort the list and access kth element in constant time. + +This is a divide and conquer algorithm that can find a solution in O(n) time. +""" +from typing import List +from random import choice + + +def random_pivot(lst): + """ + Choose a random pivot for the list. + We can use a more sophisticated algorithm here, such as the median-of-medians algorithm. + """ + return choice(lst) + + +def kth_number(lst: List[int], k: int) -> int: + """ + Return the kth smallest number in lst. + >>> kth_number([2, 1, 3, 4, 5], 3) + 3 + >>> kth_number([2, 1, 3, 4, 5], 1) + 1 + >>> kth_number([2, 1, 3, 4, 5], 5) + 5 + >>> kth_number([3, 2, 5, 6, 7, 8], 2) + 3 + >>> kth_number([25, 21, 98, 100, 76, 22, 43, 60, 89, 87], 4) + 43 + """ + # pick a pivot and separate into list based on pivot. + pivot = random_pivot(lst) + + # partition based on pivot + # linear time + small = [e for e in lst if e < pivot] + big = [e for e in lst if e > pivot] + + # if we get lucky, pivot might be the element we want. + # we can easily see this: + # small (elements smaller than k) + pivot (kth element) + big (elements larger than k) + if len(small) == k - 1: + return pivot + # pivot is in elements bigger than k + elif len(small) < k - 1: + return kth_number(big, k - len(small) - 1) + # pivot is in elements smaller than k + else: + return kth_number(small, k) From b1046c82c97db7dc896b040e18f9130f3aa1d754 Mon Sep 17 00:00:00 2001 From: Cho Yin Yong Date: Sat, 24 Oct 2020 00:28:16 -0400 Subject: [PATCH 2/6] add explanation of algorithm. --- divide_and_conquer/kth_order_statistic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/divide_and_conquer/kth_order_statistic.py b/divide_and_conquer/kth_order_statistic.py index f99e547a6f70..74384b4aa336 100644 --- a/divide_and_conquer/kth_order_statistic.py +++ b/divide_and_conquer/kth_order_statistic.py @@ -3,6 +3,9 @@ Recall we can do this trivially in O(nlogn) time. Sort the list and access kth element in constant time. This is a divide and conquer algorithm that can find a solution in O(n) time. + +For more information of this algorithm: +https://web.stanford.edu/class/archive/cs/cs161/cs161.1138/lectures/08/Small08.pdf """ from typing import List from random import choice From 7dfee7efb660f3f6c9f8d2bbaa51e2ab54d26932 Mon Sep 17 00:00:00 2001 From: Cho Yin Yong Date: Sat, 24 Oct 2020 00:30:44 -0400 Subject: [PATCH 3/6] fix PEP8 line too long error --- divide_and_conquer/kth_order_statistic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/divide_and_conquer/kth_order_statistic.py b/divide_and_conquer/kth_order_statistic.py index 74384b4aa336..9de2f9f8c074 100644 --- a/divide_and_conquer/kth_order_statistic.py +++ b/divide_and_conquer/kth_order_statistic.py @@ -1,6 +1,7 @@ """ Find the kth smallest element in linear time using divide and conquer. -Recall we can do this trivially in O(nlogn) time. Sort the list and access kth element in constant time. +Recall we can do this trivially in O(nlogn) time. Sort the list and +access kth element in constant time. This is a divide and conquer algorithm that can find a solution in O(n) time. @@ -14,7 +15,8 @@ def random_pivot(lst): """ Choose a random pivot for the list. - We can use a more sophisticated algorithm here, such as the median-of-medians algorithm. + We can use a more sophisticated algorithm here, such as the median-of-medians + algorithm. """ return choice(lst) @@ -43,7 +45,9 @@ def kth_number(lst: List[int], k: int) -> int: # if we get lucky, pivot might be the element we want. # we can easily see this: - # small (elements smaller than k) + pivot (kth element) + big (elements larger than k) + # small (elements smaller than k) + # + pivot (kth element) + # + big (elements larger than k) if len(small) == k - 1: return pivot # pivot is in elements bigger than k From f972e4a35674250a8f49c39c1eafb1e521efeae2 Mon Sep 17 00:00:00 2001 From: Cho Yin Yong Date: Sat, 24 Oct 2020 00:32:01 -0400 Subject: [PATCH 4/6] update order to be compliant to isort --- divide_and_conquer/kth_order_statistic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/divide_and_conquer/kth_order_statistic.py b/divide_and_conquer/kth_order_statistic.py index 9de2f9f8c074..e5311cae4a8e 100644 --- a/divide_and_conquer/kth_order_statistic.py +++ b/divide_and_conquer/kth_order_statistic.py @@ -8,8 +8,8 @@ For more information of this algorithm: https://web.stanford.edu/class/archive/cs/cs161/cs161.1138/lectures/08/Small08.pdf """ -from typing import List from random import choice +from typing import List def random_pivot(lst): From 8e003dd9ba097931db8d43517a532558b3661653 Mon Sep 17 00:00:00 2001 From: Cho Yin Yong Date: Thu, 29 Oct 2020 13:35:47 -0400 Subject: [PATCH 5/6] add doctest --- divide_and_conquer/kth_order_statistic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/divide_and_conquer/kth_order_statistic.py b/divide_and_conquer/kth_order_statistic.py index e5311cae4a8e..dfbed7d89139 100644 --- a/divide_and_conquer/kth_order_statistic.py +++ b/divide_and_conquer/kth_order_statistic.py @@ -56,3 +56,8 @@ def kth_number(lst: List[int], k: int) -> int: # pivot is in elements smaller than k else: return kth_number(small, k) + + +if __name__ == "__main__": + import doctest + doctest.testmod() From 082d227d742566abae2f79ee5804a42c5647291f Mon Sep 17 00:00:00 2001 From: Cho Yin Yong Date: Sun, 1 Nov 2020 17:40:51 -0500 Subject: [PATCH 6/6] make file black compliant --- divide_and_conquer/kth_order_statistic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/divide_and_conquer/kth_order_statistic.py b/divide_and_conquer/kth_order_statistic.py index dfbed7d89139..f6e81a306bff 100644 --- a/divide_and_conquer/kth_order_statistic.py +++ b/divide_and_conquer/kth_order_statistic.py @@ -60,4 +60,5 @@ def kth_number(lst: List[int], k: int) -> int: if __name__ == "__main__": import doctest + doctest.testmod()