Skip to content

refactor: Enhance docs, add more tests in BloomFilter #5948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

/**
* A generic BloomFilter implementation for probabilistic membership checking.
* <p>
* Bloom filters are space-efficient data structures that provide a fast way to test whether an
* element is a member of a set. They may produce false positives, indicating an element is
* in the set when it is not, but they will never produce false negatives.
* </p>
*
* @param <T> The type of elements to be stored in the Bloom filter.
*/
Expand All @@ -17,18 +22,22 @@ public class BloomFilter<T> {
* Constructs a BloomFilter with a specified number of hash functions and bit array size.
*
* @param numberOfHashFunctions the number of hash functions to use
* @param bitArraySize the size of the bit array
* @param bitArraySize the size of the bit array, which determines the capacity of the filter
* @throws IllegalArgumentException if numberOfHashFunctions or bitArraySize is less than 1
*/
@SuppressWarnings("unchecked")
public BloomFilter(int numberOfHashFunctions, int bitArraySize) {
if (numberOfHashFunctions < 1 || bitArraySize < 1) {
throw new IllegalArgumentException("Number of hash functions and bit array size must be greater than 0");
}
this.numberOfHashFunctions = numberOfHashFunctions;
this.bitArray = new BitSet(bitArraySize);
this.hashFunctions = new Hash[numberOfHashFunctions];
initializeHashFunctions();
}

/**
* Initializes the hash functions with unique indices.
* Initializes the hash functions with unique indices to ensure different hashing.
*/
private void initializeHashFunctions() {
for (int i = 0; i < numberOfHashFunctions; i++) {
Expand All @@ -38,8 +47,12 @@ private void initializeHashFunctions() {

/**
* Inserts an element into the Bloom filter.
* <p>
* This method hashes the element using all defined hash functions and sets the corresponding
* bits in the bit array.
* </p>
*
* @param key the element to insert
* @param key the element to insert into the Bloom filter
*/
public void insert(T key) {
for (Hash<T> hash : hashFunctions) {
Expand All @@ -50,8 +63,13 @@ public void insert(T key) {

/**
* Checks if an element might be in the Bloom filter.
* <p>
* This method checks the bits at the positions computed by each hash function. If any of these
* bits are not set, the element is definitely not in the filter. If all bits are set, the element
* might be in the filter.
* </p>
*
* @param key the element to check
* @param key the element to check for membership in the Bloom filter
* @return {@code true} if the element might be in the Bloom filter, {@code false} if it is definitely not
*/
public boolean contains(T key) {
Expand All @@ -66,6 +84,9 @@ public boolean contains(T key) {

/**
* Inner class representing a hash function used by the Bloom filter.
* <p>
* Each instance of this class represents a different hash function based on its index.
* </p>
*
* @param <T> The type of elements to be hashed.
*/
Expand All @@ -76,27 +97,35 @@ private static class Hash<T> {
/**
* Constructs a Hash function with a specified index.
*
* @param index the index of this hash function
* @param index the index of this hash function, used to create a unique hash
*/
Hash(int index) {
this.index = index;
}

/**
* Computes the hash of the given key.
* <p>
* The hash value is calculated by multiplying the index of the hash function
* with the ASCII sum of the string representation of the key.
* </p>
*
* @param key the element to hash
* @return the hash value
* @return the computed hash value
*/
public int compute(T key) {
return index * asciiString(String.valueOf(key));
}

/**
* Computes the ASCII value sum of the characters in a string.
* <p>
* This method iterates through each character of the string and accumulates
* their ASCII values to produce a single integer value.
* </p>
*
* @param word the string to compute
* @return the sum of ASCII values of the characters
* @return the sum of ASCII values of the characters in the string
*/
private int asciiString(String word) {
int sum = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,55 @@ void testMultipleInsertions() {

Assertions.assertFalse(bloomFilter.contains("key" + 200));
}

@Test
void testEmptyFilterContains() {
Assertions.assertFalse(bloomFilter.contains("notInserted"), "Filter should not contain any elements when empty");
Assertions.assertFalse(bloomFilter.contains(null), "Filter should not contain null elements");
}

@Test
void testDifferentTypes() {
BloomFilter<Object> filter = new BloomFilter<>(3, 100);
filter.insert("string");
filter.insert(123);
filter.insert(45.67);

Assertions.assertTrue(filter.contains("string"), "Filter should contain the string 'string'");
Assertions.assertTrue(filter.contains(123), "Filter should contain the integer 123");
Assertions.assertTrue(filter.contains(45.67), "Filter should contain the double 45.67");
Assertions.assertFalse(filter.contains("missing"), "Filter should not contain elements that were not inserted");
}

@Test
void testFalsePositiveAfterInsertions() {
bloomFilter.insert("cat");
bloomFilter.insert("dog");
bloomFilter.insert("fish");

// Checking for an element that was not added
Assertions.assertFalse(bloomFilter.contains("bird"), "Filter should not contain 'bird' which was never inserted");

// To increase chances of false positives, we can add more items
for (int i = 0; i < 100; i++) {
bloomFilter.insert("item" + i);
}

Assertions.assertFalse(bloomFilter.contains("nonexistent"), "Filter should not contain 'nonexistent' which was never inserted");
}

@Test
void testBoundaryConditions() {
BloomFilter<String> filter = new BloomFilter<>(3, 10);
filter.insert("a");
filter.insert("b");
filter.insert("c");
filter.insert("d");

Assertions.assertTrue(filter.contains("a"), "Filter should contain 'a'");
Assertions.assertTrue(filter.contains("b"), "Filter should contain 'b'");
Assertions.assertTrue(filter.contains("c"), "Filter should contain 'c'");
Assertions.assertTrue(filter.contains("d"), "Filter should contain 'd'");
Assertions.assertFalse(filter.contains("e"), "Filter should not contain 'e' which was not inserted");
}
}