Skip to content

Add randomized heap. #3241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 14, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions data_structures/heap/randomized_heap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env python3

from __future__ import annotations

import random
from typing import Generic, Iterable, List, Optional, TypeVar

T = TypeVar("T")


class RandomizedHeapNode(Generic[T]):
"""
One node of the randomized heap. Contains the value and references to
two children.
"""

def __init__(self, value: T) -> None:
self._value: T = value
self.left: Optional[RandomizedHeapNode[T]] = None
self.right: Optional[RandomizedHeapNode[T]] = None

@property
def value(self) -> T:
"""Return the value of the node."""
return self._value

@staticmethod
def merge(
root1: Optional[RandomizedHeapNode[T]], root2: Optional[RandomizedHeapNode[T]]
) -> Optional[RandomizedHeapNode[T]]:
"""Merge 2 nodes together."""
if not root1:
return root2

if not root2:
return root1

if root1.value > root2.value:
root1, root2 = root2, root1

if random.choice([True, False]):
root1.left, root1.right = root1.right, root1.left

root1.left = RandomizedHeapNode.merge(root1.left, root2)

return root1


class RandomizedHeap(Generic[T]):
"""
A data structure that allows inserting a new value and to pop the smallest
values. Both operations take O(logN) time where N is the size of the
structure.
Wiki: https://en.wikipedia.org/wiki/Randomized_meldable_heap

>>> RandomizedHeap([2, 3, 1, 5, 1, 7]).to_sorted_list()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
>>> RandomizedHeap([2, 3, 1, 5, 1, 7]).to_sorted_list()
>>> list(RandomizedHeap([2, 3, 1, 5, 1, 7]))

Should pass if .__iter__() is properly implemented.

[1, 1, 2, 3, 5, 7]

>>> rh = RandomizedHeap()
>>> rh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.

>>> rh.insert(1)
>>> rh.insert(-1)
>>> rh.insert(0)
>>> rh.to_sorted_list()
[-1, 0, 1]
"""

def __init__(self, data: Optional[Iterable[T]] = ()) -> None:
"""
>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.to_sorted_list()
[1, 3, 3, 7]
"""
self._root: Optional[RandomizedHeapNode[T]] = None
for item in data:
self.insert(item)

def insert(self, value: T) -> None:
"""
Insert the value into the heap.

>>> rh = RandomizedHeap()
>>> rh.insert(3)
>>> rh.insert(1)
>>> rh.insert(3)
>>> rh.insert(7)
>>> rh.to_sorted_list()
[1, 3, 3, 7]
"""
self._root = RandomizedHeapNode.merge(self._root, RandomizedHeapNode(value))

def pop(self) -> T:
"""
Pop the smallest value from the heap and return it.

>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.pop()
1
>>> rh.pop()
3
>>> rh.pop()
3
>>> rh.pop()
7
>>> rh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
"""
result = self.top()
self._root = RandomizedHeapNode.merge(self._root.left, self._root.right)

return result

def top(self) -> T:
"""
Return the smallest value from the heap.

>>> rh = RandomizedHeap()
>>> rh.insert(3)
>>> rh.top()
3
>>> rh.insert(1)
>>> rh.top()
1
>>> rh.insert(3)
>>> rh.top()
1
>>> rh.insert(7)
>>> rh.top()
1
"""
if not self._root:
raise IndexError("Can't get top element for the empty heap.")
return self._root.value

def clear(self):
"""
Clear the heap.

>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.clear()
>>> rh.pop()
Traceback (most recent call last):
...
IndexError: Can't get top element for the empty heap.
"""
self._root = None

def to_sorted_list(self) -> List[T]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""
Returns sorted list containing all the values in the heap.

>>> rh = RandomizedHeap([3, 1, 3, 7])
>>> rh.to_sorted_list()
[1, 3, 3, 7]
"""
result = []
while self:
result.append(self.pop())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would surprise me as a user of this class that requesting a list would remove all data from the heap.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you suggest to add all the items back to the heap at the end of the method?

Copy link
Member

@cclauss cclauss Oct 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest that you replace .to_sorted_list() with .__iter__() and that method should not use .pop() but should just yield values as it walks thru the heap.

Please search this repo for instances of __intr__() to see how this is done.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean I know how __iter__ works in python. But we can't iterate over heap to get sorted values. In BST we know where is the next element but in the heap, we can only pop the smallest. We can't get the second element.

I can do it like this since __iter__ can work in time larger than O(1). And we don't need the structure to be thread-safe, we can modify it. So poping and pushing back works.

WDYT?

    def __iter__(self) -> Iterator[T]:
        """
        Returns sorted list containing all the values in the heap.

        >>> sh = RandomizedHeap([3, 1, 3, 7])
        >>> list(sh)
        [1, 3, 3, 7]
        """
        result = []
        while self:
            result.append(self.pop())

        # Pushing items back to the heap not to clear it.
        for item in result:
            self.insert(item)

        return iter(result)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good enough for now. Thanks for doing this!


return result

def __bool__(self) -> bool:
"""
Check if the heap is not empty.

>>> rh = RandomizedHeap()
>>> bool(rh)
False
>>> rh.insert(1)
>>> bool(rh)
True
>>> rh.clear()
>>> bool(rh)
False
"""
return self._root is not None


if __name__ == "__main__":
import doctest

doctest.testmod()