Skip to content

Commit 249b0e8

Browse files
authored
Update dbscan.py
1 parent b7e5e9c commit 249b0e8

File tree

1 file changed

+51
-85
lines changed

1 file changed

+51
-85
lines changed

machine_learning/dbscan.py

+51-85
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import math
2+
from typing import dict, list, optional
23
import matplotlib.pyplot as plt
34
import pandas as pd
4-
from typing import dict, list, optional
5-
6-
75
class DbScan:
8-
"""
6+
'''
97
DBSCAN Algorithm :
108
Density-Based Spatial Clustering Of Applications With Noise
11-
Reference Website : https://en.wikipedia.org/wiki/DBSCAN
12-
Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU
9+
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
1310
1411
Functions:
1512
----------
@@ -26,28 +23,14 @@ class DbScan:
2623
obj = dbscan.DbScan(minpts, radius, file)
2724
obj.print_dbscan()
2825
obj.plot_dbscan()
29-
"""
30-
31-
def __init__(
32-
self,
33-
minpts: int,
34-
radius: int,
35-
file: optional[str] = [
36-
{"x": 3, "y": 7},
37-
{"x": 4, "y": 6},
38-
{"x": 5, "y": 5},
39-
{"x": 6, "y": 4},
40-
{"x": 7, "y": 3},
41-
{"x": 6, "y": 2},
42-
{"x": 7, "y": 2},
43-
{"x": 8, "y": 4},
44-
{"x": 3, "y": 3},
45-
{"x": 2, "y": 6},
46-
{"x": 3, "y": 5},
47-
{"x": 2, "y": 4},
48-
],
49-
) -> None:
50-
"""
26+
'''
27+
def __init__(self, minpts : int, radius : int, file : optional[str] =
28+
({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
29+
{'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
30+
{'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
31+
{'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4})
32+
) -> None:
33+
'''
5134
Constructor
5235
5336
Args:
@@ -75,14 +58,13 @@ def __init__(
7558
6 | 4
7659
7 | 3
7760
-----
78-
"""
61+
'''
7962
self.minpts = minpts
8063
self.radius = radius
8164
self.file = file
8265
self.dict1 = self.perform_dbscan()
83-
8466
def perform_dbscan(self) -> dict[int, list[int]]:
85-
"""
67+
'''
8668
Args:
8769
-----------
8870
None
@@ -108,31 +90,25 @@ def perform_dbscan(self) -> dict[int, list[int]]:
10890
11 [2, 10, 11, 12]
10991
12 [9, 11, 12]
11092
111-
"""
112-
data = (
113-
pd.read_csv(self.file)
114-
if type(self.file) == type("str")
115-
else pd.DataFrame(self.file)
116-
)
93+
'''
94+
if type(self.file) is str:
95+
data = pd.read_csv(self.file)
96+
else:
97+
data = pd.DataFrame(list(self.file))
11798
e = self.radius
11899
dict1 = {}
119100
for i in range(len(data)):
120101
for j in range(len(data)):
121-
dist = math.sqrt(
122-
pow(data["x"][j] - data["x"][i], 2)
123-
+ pow(data["y"][j] - data["y"][i], 2)
124-
)
102+
dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
103+
+ pow(data['y'][j] - data['y'][i],2))
125104
if dist < e:
126-
if i + 1 in dict1:
127-
dict1[i + 1].append(j + 1)
105+
if i+1 in dict1:
106+
dict1[i+1].append(j+1)
128107
else:
129-
dict1[i + 1] = [
130-
j + 1,
131-
]
108+
dict1[i+1] = [j+1,]
132109
return dict1
133-
134110
def print_dbscan(self) -> None:
135-
"""
111+
'''
136112
Outputs:
137113
--------
138114
Prints each point and if it is a core or a noise (w/ border)
@@ -150,62 +126,52 @@ def print_dbscan(self) -> None:
150126
10 [1, 10, 11] ---> Noise ---> Border
151127
11 [2, 10, 11, 12] ---> Core
152128
12 [9, 11, 12] ---> Noise ---> Border
153-
"""
129+
'''
154130
for i in self.dict1:
155-
print(i, " ", self.dict1[i], end=" ---> ")
131+
print(i," ",self.dict1[i], end=' ---> ')
156132
if len(self.dict1[i]) >= self.minpts:
157133
print("Core")
158134
else:
159135
for j in self.dict1:
160-
if i != j and len(self.dict1[j]) >= self.minpts:
161-
if i in self.dict1[j]:
162-
print("Noise ---> Border")
163-
break
136+
if (
137+
i != j
138+
and len(self.dict1[j]) >= self.minpts
139+
and i in self.dict1[j]
140+
):
141+
print("Noise ---> Border")
142+
break
164143
else:
165144
print("Noise")
166-
167145
def plot_dbscan(self) -> None:
168-
"""
146+
'''
169147
Output:
170148
-------
171149
A matplotlib plot that show points as core and noise along
172150
with the circle that lie within it.
173151
174152
>>> DbScan(4,1.9).plot_dbscan()
175153
Plotted Successfully
176-
"""
177-
data = (
178-
pd.read_csv(self.file)
179-
if type(self.file) == type("str")
180-
else pd.DataFrame(self.file)
181-
)
154+
'''
155+
if type(self.file) is str:
156+
data = pd.read_csv(self.file)
157+
else:
158+
data = pd.DataFrame(list(self.file))
182159
e = self.radius
183160
for i in self.dict1:
184161
if len(self.dict1[i]) >= self.minpts:
185-
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red")
186-
circle = plt.Circle(
187-
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False
188-
)
162+
plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
163+
circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
164+
e, color='blue', fill=False)
189165
plt.gca().add_artist(circle)
190-
plt.text(
191-
data["x"][i - 1],
192-
data["y"][i - 1],
193-
"P" + str(i),
194-
ha="center",
195-
va="bottom",
196-
)
166+
plt.text(data['x'][i-1], data['y'][i-1],
167+
'P'+str(i), ha='center', va='bottom')
197168
else:
198-
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green")
199-
plt.text(
200-
data["x"][i - 1],
201-
data["y"][i - 1],
202-
"P" + str(i),
203-
ha="center",
204-
va="bottom",
205-
)
206-
plt.xlabel("X")
207-
plt.ylabel("Y")
208-
plt.title("DBSCAN Clustering")
209-
plt.legend(["Core", "Noise"])
169+
plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
170+
plt.text(data['x'][i-1], data['y'][i-1],
171+
'P'+str(i), ha='center', va='bottom')
172+
plt.xlabel('X')
173+
plt.ylabel('Y')
174+
plt.title('DBSCAN Clustering')
175+
plt.legend(['Core','Noise'])
210176
plt.show()
211177
print("Plotted Successfully")

0 commit comments

Comments
 (0)