Skip to content

Commit e13b9d9

Browse files
authored
Update final3 dbscan.py
1 parent 0b65794 commit e13b9d9

File tree

1 file changed

+46
-81
lines changed

1 file changed

+46
-81
lines changed

machine_learning/dbscan.py

+46-81
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
1-
"""
1+
'''
22
33
Author : Gowtham Kamalasekar
44
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
55
6-
"""
7-
8-
import math
9-
10-
import matplotlib.pyplot as plt
11-
import pandas as pd
12-
from typing import dict, list
13-
6+
'''
147

158
class DbScan:
16-
"""
9+
import math
10+
11+
import matplotlib.pyplot as plt
12+
import pandas as pd
13+
from typing import dict, list
14+
15+
'''
1716
DBSCAN Algorithm :
1817
Density-Based Spatial Clustering Of Applications With Noise
1918
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@@ -33,28 +32,14 @@ class DbScan:
3332
obj = dbscan.DbScan(minpts, radius, file)
3433
obj.print_dbscan()
3534
obj.plot_dbscan()
36-
"""
37-
38-
def __init__(
39-
self,
40-
minpts: int,
41-
radius: int,
42-
file: str = (
43-
{"x": 3, "y": 7},
44-
{"x": 4, "y": 6},
45-
{"x": 5, "y": 5},
46-
{"x": 6, "y": 4},
47-
{"x": 7, "y": 3},
48-
{"x": 6, "y": 2},
49-
{"x": 7, "y": 2},
50-
{"x": 8, "y": 4},
51-
{"x": 3, "y": 3},
52-
{"x": 2, "y": 6},
53-
{"x": 3, "y": 5},
54-
{"x": 2, "y": 4},
55-
),
56-
) -> None:
57-
"""
35+
'''
36+
def __init__(self, minpts : int, radius : int, file : str =
37+
({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
38+
{'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
39+
{'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
40+
{'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4})
41+
) -> None:
42+
'''
5843
Constructor
5944
6045
Args:
@@ -82,14 +67,13 @@ def __init__(
8267
6 | 4
8368
7 | 3
8469
-----
85-
"""
70+
'''
8671
self.minpts = minpts
8772
self.radius = radius
8873
self.file = file
8974
self.dict1 = self.perform_dbscan()
90-
9175
def perform_dbscan(self) -> dict[int, list[int]]:
92-
"""
76+
'''
9377
Args:
9478
-----------
9579
None
@@ -115,30 +99,25 @@ def perform_dbscan(self) -> dict[int, list[int]]:
11599
11 [2, 10, 11, 12]
116100
12 [9, 11, 12]
117101
118-
"""
102+
'''
119103
if type(self.file) is str:
120-
data = pd.read_csv(self.file)
104+
data = pd.read_csv(self.file)
121105
else:
122106
data = pd.DataFrame(list(self.file))
123107
e = self.radius
124108
dict1 = {}
125109
for i in range(len(data)):
126110
for j in range(len(data)):
127-
dist = math.sqrt(
128-
pow(data["x"][j] - data["x"][i], 2)
129-
+ pow(data["y"][j] - data["y"][i], 2)
130-
)
111+
dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
112+
+ pow(data['y'][j] - data['y'][i],2))
131113
if dist < e:
132-
if i + 1 in dict1:
133-
dict1[i + 1].append(j + 1)
114+
if i+1 in dict1:
115+
dict1[i+1].append(j+1)
134116
else:
135-
dict1[i + 1] = [
136-
j + 1,
137-
]
117+
dict1[i+1] = [j+1,]
138118
return dict1
139-
140119
def print_dbscan(self) -> None:
141-
"""
120+
'''
142121
Outputs:
143122
--------
144123
Prints each point and if it is a core or a noise (w/ border)
@@ -156,70 +135,56 @@ def print_dbscan(self) -> None:
156135
10 [1, 10, 11] ---> Noise ---> Border
157136
11 [2, 10, 11, 12] ---> Core
158137
12 [9, 11, 12] ---> Noise ---> Border
159-
"""
138+
'''
160139
for i in self.dict1:
161-
print(i, " ", self.dict1[i], end=" ---> ")
140+
print(i," ",self.dict1[i], end=' ---> ')
162141
if len(self.dict1[i]) >= self.minpts:
163142
print("Core")
164143
else:
165144
for j in self.dict1:
166145
if (
167-
i != j
168-
and len(self.dict1[j]) >= self.minpts
146+
i != j
147+
and len(self.dict1[j]) >= self.minpts
169148
and i in self.dict1[j]
170149
):
171150
print("Noise ---> Border")
172151
break
173152
else:
174153
print("Noise")
175-
176154
def plot_dbscan(self) -> None:
177-
"""
155+
'''
178156
Output:
179157
-------
180158
A matplotlib plot that show points as core and noise along
181159
with the circle that lie within it.
182160
183161
>>> DbScan(4,1.9).plot_dbscan()
184162
Plotted Successfully
185-
"""
163+
'''
186164
if type(self.file) is str:
187-
data = pd.read_csv(self.file)
165+
data = pd.read_csv(self.file)
188166
else:
189167
data = pd.DataFrame(list(self.file))
190168
e = self.radius
191169
for i in self.dict1:
192170
if len(self.dict1[i]) >= self.minpts:
193-
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red")
194-
circle = plt.Circle(
195-
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False
196-
)
171+
plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
172+
circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
173+
e, color='blue', fill=False)
197174
plt.gca().add_artist(circle)
198-
plt.text(
199-
data["x"][i - 1],
200-
data["y"][i - 1],
201-
"P" + str(i),
202-
ha="center",
203-
va="bottom",
204-
)
175+
plt.text(data['x'][i-1], data['y'][i-1],
176+
'P'+str(i), ha='center', va='bottom')
205177
else:
206-
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green")
207-
plt.text(
208-
data["x"][i - 1],
209-
data["y"][i - 1],
210-
"P" + str(i),
211-
ha="center",
212-
va="bottom",
213-
)
214-
plt.xlabel("X")
215-
plt.ylabel("Y")
216-
plt.title("DBSCAN Clustering")
217-
plt.legend(["Core", "Noise"])
178+
plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
179+
plt.text(data['x'][i-1], data['y'][i-1],
180+
'P'+str(i), ha='center', va='bottom')
181+
plt.xlabel('X')
182+
plt.ylabel('Y')
183+
plt.title('DBSCAN Clustering')
184+
plt.legend(['Core','Noise'])
218185
plt.show()
219186
print("Plotted Successfully")
220187

221-
222188
if __name__ == "__main__":
223189
import doctest
224-
225190
doctest.testmod()

0 commit comments

Comments
 (0)