File tree 1 file changed +48
-0
lines changed
1 file changed +48
-0
lines changed Original file line number Diff line number Diff line change
1
+ """
2
+ Reservoir Sampling Algorithm
3
+
4
+ Use Case:
5
+ Efficient for selecting k random items from a data stream of unknown size,
6
+ or when the entire dataset cannot fit into memory.
7
+
8
+ Time Complexity:
9
+ - O(n), where n is the total number of items
10
+ - Space Complexity: O(k)
11
+
12
+ Author: Michael Alexander Montoya
13
+ """
14
+
15
+ import random
16
+
17
+ def reservoir_sampling (stream , k ):
18
+ """
19
+ Performs reservoir sampling on a stream of items.
20
+
21
+ Args:
22
+ stream: An iterable data stream.
23
+ k: Number of items to sample.
24
+
25
+ Returns:
26
+ A list containing k randomly sampled items from the stream.
27
+ """
28
+
29
+ reservoir = []
30
+
31
+ for i , item in enumerate (stream ):
32
+ if i < k :
33
+ reservoir .append (item )
34
+ else :
35
+ j = random .randint (0 , i )
36
+ if j < k :
37
+ reservoir [j ] = item
38
+
39
+ return reservoir
40
+
41
+
42
+ # Example usage
43
+ if __name__ == "__main__" :
44
+ stream_data = range (1 , 1001 ) # Simulate a stream of numbers from 1 to 1000
45
+ sample_size = 10
46
+
47
+ sample = reservoir_sampling (stream_data , sample_size )
48
+ print (f"Random sample of { sample_size } items from stream: { sample } " )
You can’t perform that action at this time.
0 commit comments