From 9e370bea4017fa741ede639ae6915bab8d24b954 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Thu, 8 Oct 2020 04:29:25 +0800 Subject: [PATCH 1/7] Add type hints and docstrings to heap.py - Add type hints - Add docstrings - Add explanatory comments - Improve code readability - Change to use f-string --- data_structures/heap/heap.py | 179 ++++++++++++++++++++--------------- 1 file changed, 101 insertions(+), 78 deletions(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index b901c54a4284..5da6a896c76d 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -1,99 +1,124 @@ -#!/usr/bin/python3 +from typing import List, Iterable, Union class Heap(object): - """ + """A Max Heap Implementation + >>> unsorted = [103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5] >>> h = Heap() - >>> h.build_heap(unsorted) - >>> h.display() + >>> h.build_max_heap(unsorted) + >>> print(h) [209, 201, 25, 103, 107, 15, 1, 9, 7, 11, 5] >>> - >>> h.get_max() + >>> h.extract_max() 209 - >>> h.display() + >>> print(h) [201, 107, 25, 103, 11, 15, 1, 9, 7, 5] >>> >>> h.insert(100) - >>> h.display() + >>> print(h) [201, 107, 25, 103, 100, 15, 1, 9, 7, 5, 11] >>> >>> h.heap_sort() - >>> h.display() + >>> print(h) [1, 5, 7, 9, 11, 15, 25, 100, 103, 107, 201] - >>> """ def __init__(self): - self.h = [] - self.curr_size = 0 - - def get_left_child_index(self, i): - left_child_index = 2 * i + 1 - if left_child_index < self.curr_size: + self.h: List[Union[int, float]] = [] + self.heap_size: int = 0 + + def __repr__(self) -> str: + return str(self.h) + + def parent_index(self, child_idx: int) -> int: + """ return the parent index of given child """ + if child_idx > 0: + return (child_idx - 1) // 2 + + def left_child_idx(self, parent_idx: int) -> int: + """ + return the left child index if the left child exists. + if not, return None. + """ + left_child_index = 2 * parent_idx + 1 + if left_child_index < self.heap_size: return left_child_index - return None - def get_right_child(self, i): - right_child_index = 2 * i + 2 - if right_child_index < self.curr_size: + def right_child_idx(self, parent_idx: int) -> int: + """ + return the right child index if the right child exists. + if not, return None. + """ + right_child_index = 2 * parent_idx + 2 + if right_child_index < self.heap_size: return right_child_index - return None - - def max_heapify(self, index): - if index < self.curr_size: - largest = index - lc = self.get_left_child_index(index) - rc = self.get_right_child(index) - if lc is not None and self.h[lc] > self.h[largest]: - largest = lc - if rc is not None and self.h[rc] > self.h[largest]: - largest = rc - if largest != index: - self.h[largest], self.h[index] = self.h[index], self.h[largest] - self.max_heapify(largest) - - def build_heap(self, collection): - self.curr_size = len(collection) - self.h = list(collection) - if self.curr_size <= 1: - return - for i in range(self.curr_size // 2 - 1, -1, -1): - self.max_heapify(i) - def get_max(self): - if self.curr_size >= 2: + def max_heapify(self, index: int): + """ + correct a single violation of the heap property in a subtree's root. + """ + if index < self.heap_size: + violation: int = index + left_child = self.left_child_idx(index) + right_child = self.right_child_idx(index) + # check which child is larger than its parent + if left_child is not None and self.h[left_child] > self.h[violation]: + violation = left_child + if right_child is not None and self.h[right_child] > self.h[violation]: + violation = right_child + # if violation indeed exists + if violation != index: + # swap to fix the violation + self.h[violation], self.h[index] = self.h[index], self.h[violation] + # fix the subsequent violation recursively if any + self.max_heapify(violation) + + def build_max_heap(self, collection: Iterable[Union[int, float]]): + """ build max heap from an unsorted array""" + self.heap_size = len(collection) + self.h = list(collection) + if self.heap_size > 1: + # max_heapify from right to left but exclude leaves (last level) + for i in range(self.heap_size // 2 - 1, -1, -1): + self.max_heapify(i) + + def max(self) -> Union[int, float]: + """ return the max in the heap """ + if self.heap_size >= 1: + return self.h[0] + + def extract_max(self) -> Union[int, float]: + """ get and remove max from heap """ + if self.heap_size >= 2: me = self.h[0] self.h[0] = self.h.pop(-1) - self.curr_size -= 1 + self.heap_size -= 1 self.max_heapify(0) return me - elif self.curr_size == 1: - self.curr_size -= 1 + elif self.heap_size == 1: + self.heap_size -= 1 return self.h.pop(-1) - return None + + def insert(self, value: Union[int, float]): + """ insert a new value into the max heap """ + self.h.append(value) + idx = (self.heap_size - 1) // 2 + self.heap_size += 1 + while idx >= 0: + self.max_heapify(idx) + idx = (idx - 1) // 2 def heap_sort(self): - size = self.curr_size + size = self.heap_size for j in range(size - 1, 0, -1): self.h[0], self.h[j] = self.h[j], self.h[0] - self.curr_size -= 1 + self.heap_size -= 1 self.max_heapify(0) - self.curr_size = size - - def insert(self, data): - self.h.append(data) - curr = (self.curr_size - 1) // 2 - self.curr_size += 1 - while curr >= 0: - self.max_heapify(curr) - curr = (curr - 1) // 2 + self.heap_size = size - def display(self): - print(self.h) - -def main(): +def demo(): for unsorted in [ [], [0], @@ -110,26 +135,24 @@ def main(): [103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5], [-45, -2, -5], ]: - print("source unsorted list: %s" % unsorted) + print(f"unsorted array: {unsorted}") - h = Heap() - h.build_heap(unsorted) - print("after build heap: ", end=" ") - h.display() + heap = Heap() + heap.build_max_heap(unsorted) + print(f"after build heap: {heap}") - print("max value: %s" % h.get_max()) - print("delete max value: ", end=" ") - h.display() + print(f"max value: {heap.extract_max()}") + print(f"after max value removed: {heap}") - h.insert(100) - print("after insert new value 100: ", end=" ") - h.display() + heap.insert(100) + print(f"after new value 100 inserted: {heap}") - h.heap_sort() - print("heap sort: ", end=" ") - h.display() - print() + heap.heap_sort() + print(f"heap-sorted array: {heap}\n") if __name__ == "__main__": - main() + # demo() + import doctest + + doctest.testmod() From 6b3d70d268eec463a24e3e5e685d5bd4385b1003 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Thu, 8 Oct 2020 04:49:11 +0800 Subject: [PATCH 2/7] Fix import sorting --- data_structures/heap/heap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index 5da6a896c76d..00176301c166 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -1,4 +1,4 @@ -from typing import List, Iterable, Union +from typing import Iterable, List, Union class Heap(object): From b670301be9b3791cc844f5d27667e3a35a0b57c2 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Thu, 22 Oct 2020 14:02:54 +0000 Subject: [PATCH 3/7] fixup! Format Python code with psf/black push --- data_structures/heap/heap.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index c6ae8fb8d681..46fccedaa543 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -1,10 +1,9 @@ from typing import Iterable, List, Union - class Heap: """A Max Heap Implementation - + >>> unsorted = [103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5] >>> h = Heap() >>> h.build_max_heap(unsorted) From d5976191a55aa71d2a56020c8e7f97057ef2ffd7 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Mon, 23 Nov 2020 20:42:44 +0800 Subject: [PATCH 4/7] Fix static type error --- data_structures/heap/heap.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index 46fccedaa543..d687fbef1b66 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -35,6 +35,8 @@ def parent_index(self, child_idx: int) -> int: """ return the parent index of given child """ if child_idx > 0: return (child_idx - 1) // 2 + else: + raise Exception("Index out of range") def left_child_idx(self, parent_idx: int) -> int: """ @@ -44,6 +46,8 @@ def left_child_idx(self, parent_idx: int) -> int: left_child_index = 2 * parent_idx + 1 if left_child_index < self.heap_size: return left_child_index + else: + raise Exception("No left child") def right_child_idx(self, parent_idx: int) -> int: """ @@ -53,6 +57,8 @@ def right_child_idx(self, parent_idx: int) -> int: right_child_index = 2 * parent_idx + 2 if right_child_index < self.heap_size: return right_child_index + else: + raise Exception("No right child") def max_heapify(self, index: int): """ @@ -76,8 +82,8 @@ def max_heapify(self, index: int): def build_max_heap(self, collection: Iterable[Union[int, float]]): """ build max heap from an unsorted array""" - self.heap_size = len(collection) self.h = list(collection) + self.heap_size = len(self.h) if self.heap_size > 1: # max_heapify from right to left but exclude leaves (last level) for i in range(self.heap_size // 2 - 1, -1, -1): @@ -87,6 +93,8 @@ def max(self) -> Union[int, float]: """ return the max in the heap """ if self.heap_size >= 1: return self.h[0] + else: + raise Exception("Empty heap") def extract_max(self) -> Union[int, float]: """ get and remove max from heap """ @@ -99,6 +107,8 @@ def extract_max(self) -> Union[int, float]: elif self.heap_size == 1: self.heap_size -= 1 return self.h.pop(-1) + else: + raise Exception("Empty heap") def insert(self, value: Union[int, float]): """ insert a new value into the max heap """ From 9f8f0a0f1a3f46589d45c4426c0cd880b7a6512a Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Fri, 25 Dec 2020 01:07:26 +0800 Subject: [PATCH 5/7] Fix failing test --- data_structures/heap/heap.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index d687fbef1b66..086cc96de826 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -31,14 +31,13 @@ def __init__(self): def __repr__(self) -> str: return str(self.h) - def parent_index(self, child_idx: int) -> int: + def parent_index(self, child_idx: int) -> Union[int, None]: """ return the parent index of given child """ if child_idx > 0: return (child_idx - 1) // 2 - else: - raise Exception("Index out of range") + return None - def left_child_idx(self, parent_idx: int) -> int: + def left_child_idx(self, parent_idx: int) -> Union[int, None]: """ return the left child index if the left child exists. if not, return None. @@ -46,10 +45,9 @@ def left_child_idx(self, parent_idx: int) -> int: left_child_index = 2 * parent_idx + 1 if left_child_index < self.heap_size: return left_child_index - else: - raise Exception("No left child") + return None - def right_child_idx(self, parent_idx: int) -> int: + def right_child_idx(self, parent_idx: int) -> Union[int, None]: """ return the right child index if the right child exists. if not, return None. @@ -57,8 +55,7 @@ def right_child_idx(self, parent_idx: int) -> int: right_child_index = 2 * parent_idx + 2 if right_child_index < self.heap_size: return right_child_index - else: - raise Exception("No right child") + return None def max_heapify(self, index: int): """ From 199173be894306bfadbcfcc85fd7d1d2e23d6c9d Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Fri, 25 Dec 2020 21:12:47 +0800 Subject: [PATCH 6/7] Fix type hints --- data_structures/heap/heap.py | 40 +++++++++++++++++------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index 086cc96de826..5bfb8e3d2113 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -1,4 +1,4 @@ -from typing import Iterable, List, Union +from typing import Iterable, List, Optional class Heap: @@ -24,20 +24,20 @@ class Heap: [1, 5, 7, 9, 11, 15, 25, 100, 103, 107, 201] """ - def __init__(self): - self.h: List[Union[int, float]] = [] + def __init__(self) -> None: + self.h: List[float] = [] self.heap_size: int = 0 def __repr__(self) -> str: return str(self.h) - def parent_index(self, child_idx: int) -> Union[int, None]: + def parent_index(self, child_idx: int) -> Optional[int]: """ return the parent index of given child """ if child_idx > 0: return (child_idx - 1) // 2 return None - def left_child_idx(self, parent_idx: int) -> Union[int, None]: + def left_child_idx(self, parent_idx: int) -> Optional[int]: """ return the left child index if the left child exists. if not, return None. @@ -47,7 +47,7 @@ def left_child_idx(self, parent_idx: int) -> Union[int, None]: return left_child_index return None - def right_child_idx(self, parent_idx: int) -> Union[int, None]: + def right_child_idx(self, parent_idx: int) -> Optional[int]: """ return the right child index if the right child exists. if not, return None. @@ -57,7 +57,7 @@ def right_child_idx(self, parent_idx: int) -> Union[int, None]: return right_child_index return None - def max_heapify(self, index: int): + def max_heapify(self, index: int) -> None: """ correct a single violation of the heap property in a subtree's root. """ @@ -77,7 +77,7 @@ def max_heapify(self, index: int): # fix the subsequent violation recursively if any self.max_heapify(violation) - def build_max_heap(self, collection: Iterable[Union[int, float]]): + def build_max_heap(self, collection: Iterable[float]): """ build max heap from an unsorted array""" self.h = list(collection) self.heap_size = len(self.h) @@ -86,14 +86,14 @@ def build_max_heap(self, collection: Iterable[Union[int, float]]): for i in range(self.heap_size // 2 - 1, -1, -1): self.max_heapify(i) - def max(self) -> Union[int, float]: + def max(self) -> float: """ return the max in the heap """ if self.heap_size >= 1: return self.h[0] else: raise Exception("Empty heap") - def extract_max(self) -> Union[int, float]: + def extract_max(self) -> float: """ get and remove max from heap """ if self.heap_size >= 2: me = self.h[0] @@ -107,7 +107,7 @@ def extract_max(self) -> Union[int, float]: else: raise Exception("Empty heap") - def insert(self, value: Union[int, float]): + def insert(self, value: float) -> None: """ insert a new value into the max heap """ self.h.append(value) idx = (self.heap_size - 1) // 2 @@ -116,7 +116,7 @@ def insert(self, value: Union[int, float]): self.max_heapify(idx) idx = (idx - 1) // 2 - def heap_sort(self): + def heap_sort(self) -> None: size = self.heap_size for j in range(size - 1, 0, -1): self.h[0], self.h[j] = self.h[j], self.h[0] @@ -125,9 +125,14 @@ def heap_sort(self): self.heap_size = size -def demo(): +if __name__ == "__main__": + import doctest + + # run doc test + doctest.testmod() + + # demo for unsorted in [ - [], [0], [2], [3, 5], @@ -156,10 +161,3 @@ def demo(): heap.heap_sort() print(f"heap-sorted array: {heap}\n") - - -if __name__ == "__main__": - # demo() - import doctest - - doctest.testmod() From e61693af83d5b40f7b8fd5019717645e76fa49f6 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Sat, 26 Dec 2020 08:37:16 +0530 Subject: [PATCH 7/7] Add return annotation --- data_structures/heap/heap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/heap/heap.py b/data_structures/heap/heap.py index 5bfb8e3d2113..8592362c23b9 100644 --- a/data_structures/heap/heap.py +++ b/data_structures/heap/heap.py @@ -77,7 +77,7 @@ def max_heapify(self, index: int) -> None: # fix the subsequent violation recursively if any self.max_heapify(violation) - def build_max_heap(self, collection: Iterable[float]): + def build_max_heap(self, collection: Iterable[float]) -> None: """ build max heap from an unsorted array""" self.h = list(collection) self.heap_size = len(self.h)