Skip to content

Commit 8b4d5e8

Browse files
authored
Update final2 dbscan.py
1 parent 67ccda1 commit 8b4d5e8

File tree

1 file changed

+51
-73
lines changed

1 file changed

+51
-73
lines changed

machine_learning/dbscan.py

+51-73
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1+
'''
2+
3+
Author : Gowtham Kamalasekar
4+
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
5+
6+
'''
7+
18
import math
29

310
import matplotlib.pyplot as plt
411
import pandas as pd
5-
from typing import Dict, List, Optional
6-
12+
from typing import dict, list
713

814
class DbScan:
9-
"""
15+
'''
1016
DBSCAN Algorithm :
1117
Density-Based Spatial Clustering Of Applications With Noise
1218
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@@ -26,28 +32,14 @@ class DbScan:
2632
obj = dbscan.DbScan(minpts, radius, file)
2733
obj.print_dbscan()
2834
obj.plot_dbscan()
29-
"""
30-
31-
def __init__(
32-
self,
33-
minpts: int,
34-
radius: int,
35-
file: Optional[str] = (
36-
{"x": 3, "y": 7},
37-
{"x": 4, "y": 6},
38-
{"x": 5, "y": 5},
39-
{"x": 6, "y": 4},
40-
{"x": 7, "y": 3},
41-
{"x": 6, "y": 2},
42-
{"x": 7, "y": 2},
43-
{"x": 8, "y": 4},
44-
{"x": 3, "y": 3},
45-
{"x": 2, "y": 6},
46-
{"x": 3, "y": 5},
47-
{"x": 2, "y": 4},
48-
),
49-
) -> None:
50-
"""
35+
'''
36+
def __init__(self, minpts : int, radius : int, file : str =
37+
({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
38+
{'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
39+
{'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
40+
{'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4})
41+
) -> None:
42+
'''
5143
Constructor
5244
5345
Args:
@@ -75,14 +67,13 @@ def __init__(
7567
6 | 4
7668
7 | 3
7769
-----
78-
"""
70+
'''
7971
self.minpts = minpts
8072
self.radius = radius
8173
self.file = file
8274
self.dict1 = self.perform_dbscan()
83-
84-
def perform_dbscan(self) -> Dict[int, List[int]]:
85-
"""
75+
def perform_dbscan(self) -> dict[int, list[int]]:
76+
'''
8677
Args:
8778
-----------
8879
None
@@ -108,30 +99,25 @@ def perform_dbscan(self) -> Dict[int, List[int]]:
10899
11 [2, 10, 11, 12]
109100
12 [9, 11, 12]
110101
111-
"""
102+
'''
112103
if type(self.file) is str:
113-
data = pd.read_csv(self.file)
104+
data = pd.read_csv(self.file)
114105
else:
115106
data = pd.DataFrame(list(self.file))
116107
e = self.radius
117108
dict1 = {}
118109
for i in range(len(data)):
119110
for j in range(len(data)):
120-
dist = math.sqrt(
121-
pow(data["x"][j] - data["x"][i], 2)
122-
+ pow(data["y"][j] - data["y"][i], 2)
123-
)
111+
dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
112+
+ pow(data['y'][j] - data['y'][i],2))
124113
if dist < e:
125-
if i + 1 in dict1:
126-
dict1[i + 1].append(j + 1)
114+
if i+1 in dict1:
115+
dict1[i+1].append(j+1)
127116
else:
128-
dict1[i + 1] = [
129-
j + 1,
130-
]
117+
dict1[i+1] = [j+1,]
131118
return dict1
132-
133119
def print_dbscan(self) -> None:
134-
"""
120+
'''
135121
Outputs:
136122
--------
137123
Prints each point and if it is a core or a noise (w/ border)
@@ -149,64 +135,56 @@ def print_dbscan(self) -> None:
149135
10 [1, 10, 11] ---> Noise ---> Border
150136
11 [2, 10, 11, 12] ---> Core
151137
12 [9, 11, 12] ---> Noise ---> Border
152-
"""
138+
'''
153139
for i in self.dict1:
154-
print(i, " ", self.dict1[i], end=" ---> ")
140+
print(i," ",self.dict1[i], end=' ---> ')
155141
if len(self.dict1[i]) >= self.minpts:
156142
print("Core")
157143
else:
158144
for j in self.dict1:
159145
if (
160-
i != j
161-
and len(self.dict1[j]) >= self.minpts
146+
i != j
147+
and len(self.dict1[j]) >= self.minpts
162148
and i in self.dict1[j]
163149
):
164150
print("Noise ---> Border")
165151
break
166152
else:
167153
print("Noise")
168-
169154
def plot_dbscan(self) -> None:
170-
"""
155+
'''
171156
Output:
172157
-------
173158
A matplotlib plot that show points as core and noise along
174159
with the circle that lie within it.
175160
176161
>>> DbScan(4,1.9).plot_dbscan()
177162
Plotted Successfully
178-
"""
163+
'''
179164
if type(self.file) is str:
180-
data = pd.read_csv(self.file)
165+
data = pd.read_csv(self.file)
181166
else:
182167
data = pd.DataFrame(list(self.file))
183168
e = self.radius
184169
for i in self.dict1:
185170
if len(self.dict1[i]) >= self.minpts:
186-
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red")
187-
circle = plt.Circle(
188-
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False
189-
)
171+
plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
172+
circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
173+
e, color='blue', fill=False)
190174
plt.gca().add_artist(circle)
191-
plt.text(
192-
data["x"][i - 1],
193-
data["y"][i - 1],
194-
"P" + str(i),
195-
ha="center",
196-
va="bottom",
197-
)
175+
plt.text(data['x'][i-1], data['y'][i-1],
176+
'P'+str(i), ha='center', va='bottom')
198177
else:
199-
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green")
200-
plt.text(
201-
data["x"][i - 1],
202-
data["y"][i - 1],
203-
"P" + str(i),
204-
ha="center",
205-
va="bottom",
206-
)
207-
plt.xlabel("X")
208-
plt.ylabel("Y")
209-
plt.title("DBSCAN Clustering")
210-
plt.legend(["Core", "Noise"])
178+
plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
179+
plt.text(data['x'][i-1], data['y'][i-1],
180+
'P'+str(i), ha='center', va='bottom')
181+
plt.xlabel('X')
182+
plt.ylabel('Y')
183+
plt.title('DBSCAN Clustering')
184+
plt.legend(['Core','Noise'])
211185
plt.show()
212186
print("Plotted Successfully")
187+
188+
if __name__ == "__main__":
189+
import doctest
190+
doctest.testmod()

0 commit comments

Comments
 (0)