Skip to content

Commit 61beb79

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent e13b9d9 commit 61beb79

File tree

1 file changed

+76
-41
lines changed

1 file changed

+76
-41
lines changed

machine_learning/dbscan.py

+76-41
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
1-
'''
1+
"""
22
33
Author : Gowtham Kamalasekar
44
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
55
6-
'''
6+
"""
7+
78

89
class DbScan:
910
import math
1011

1112
import matplotlib.pyplot as plt
1213
import pandas as pd
1314
from typing import dict, list
14-
15-
'''
15+
16+
"""
1617
DBSCAN Algorithm :
1718
Density-Based Spatial Clustering Of Applications With Noise
1819
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@@ -32,14 +33,28 @@ class DbScan:
3233
obj = dbscan.DbScan(minpts, radius, file)
3334
obj.print_dbscan()
3435
obj.plot_dbscan()
35-
'''
36-
def __init__(self, minpts : int, radius : int, file : str =
37-
({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
38-
{'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
39-
{'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
40-
{'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4})
41-
) -> None:
42-
'''
36+
"""
37+
38+
def __init__(
39+
self,
40+
minpts: int,
41+
radius: int,
42+
file: str = (
43+
{"x": 3, "y": 7},
44+
{"x": 4, "y": 6},
45+
{"x": 5, "y": 5},
46+
{"x": 6, "y": 4},
47+
{"x": 7, "y": 3},
48+
{"x": 6, "y": 2},
49+
{"x": 7, "y": 2},
50+
{"x": 8, "y": 4},
51+
{"x": 3, "y": 3},
52+
{"x": 2, "y": 6},
53+
{"x": 3, "y": 5},
54+
{"x": 2, "y": 4},
55+
),
56+
) -> None:
57+
"""
4358
Constructor
4459
4560
Args:
@@ -67,13 +82,14 @@ def __init__(self, minpts : int, radius : int, file : str =
6782
6 | 4
6883
7 | 3
6984
-----
70-
'''
85+
"""
7186
self.minpts = minpts
7287
self.radius = radius
7388
self.file = file
7489
self.dict1 = self.perform_dbscan()
90+
7591
def perform_dbscan(self) -> dict[int, list[int]]:
76-
'''
92+
"""
7793
Args:
7894
-----------
7995
None
@@ -99,25 +115,30 @@ def perform_dbscan(self) -> dict[int, list[int]]:
99115
11 [2, 10, 11, 12]
100116
12 [9, 11, 12]
101117
102-
'''
118+
"""
103119
if type(self.file) is str:
104-
data = pd.read_csv(self.file)
120+
data = pd.read_csv(self.file)
105121
else:
106122
data = pd.DataFrame(list(self.file))
107123
e = self.radius
108124
dict1 = {}
109125
for i in range(len(data)):
110126
for j in range(len(data)):
111-
dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
112-
+ pow(data['y'][j] - data['y'][i],2))
127+
dist = math.sqrt(
128+
pow(data["x"][j] - data["x"][i], 2)
129+
+ pow(data["y"][j] - data["y"][i], 2)
130+
)
113131
if dist < e:
114-
if i+1 in dict1:
115-
dict1[i+1].append(j+1)
132+
if i + 1 in dict1:
133+
dict1[i + 1].append(j + 1)
116134
else:
117-
dict1[i+1] = [j+1,]
135+
dict1[i + 1] = [
136+
j + 1,
137+
]
118138
return dict1
139+
119140
def print_dbscan(self) -> None:
120-
'''
141+
"""
121142
Outputs:
122143
--------
123144
Prints each point and if it is a core or a noise (w/ border)
@@ -135,56 +156,70 @@ def print_dbscan(self) -> None:
135156
10 [1, 10, 11] ---> Noise ---> Border
136157
11 [2, 10, 11, 12] ---> Core
137158
12 [9, 11, 12] ---> Noise ---> Border
138-
'''
159+
"""
139160
for i in self.dict1:
140-
print(i," ",self.dict1[i], end=' ---> ')
161+
print(i, " ", self.dict1[i], end=" ---> ")
141162
if len(self.dict1[i]) >= self.minpts:
142163
print("Core")
143164
else:
144165
for j in self.dict1:
145166
if (
146-
i != j
147-
and len(self.dict1[j]) >= self.minpts
167+
i != j
168+
and len(self.dict1[j]) >= self.minpts
148169
and i in self.dict1[j]
149170
):
150171
print("Noise ---> Border")
151172
break
152173
else:
153174
print("Noise")
175+
154176
def plot_dbscan(self) -> None:
155-
'''
177+
"""
156178
Output:
157179
-------
158180
A matplotlib plot that show points as core and noise along
159181
with the circle that lie within it.
160182
161183
>>> DbScan(4,1.9).plot_dbscan()
162184
Plotted Successfully
163-
'''
185+
"""
164186
if type(self.file) is str:
165-
data = pd.read_csv(self.file)
187+
data = pd.read_csv(self.file)
166188
else:
167189
data = pd.DataFrame(list(self.file))
168190
e = self.radius
169191
for i in self.dict1:
170192
if len(self.dict1[i]) >= self.minpts:
171-
plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
172-
circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
173-
e, color='blue', fill=False)
193+
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red")
194+
circle = plt.Circle(
195+
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False
196+
)
174197
plt.gca().add_artist(circle)
175-
plt.text(data['x'][i-1], data['y'][i-1],
176-
'P'+str(i), ha='center', va='bottom')
198+
plt.text(
199+
data["x"][i - 1],
200+
data["y"][i - 1],
201+
"P" + str(i),
202+
ha="center",
203+
va="bottom",
204+
)
177205
else:
178-
plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
179-
plt.text(data['x'][i-1], data['y'][i-1],
180-
'P'+str(i), ha='center', va='bottom')
181-
plt.xlabel('X')
182-
plt.ylabel('Y')
183-
plt.title('DBSCAN Clustering')
184-
plt.legend(['Core','Noise'])
206+
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green")
207+
plt.text(
208+
data["x"][i - 1],
209+
data["y"][i - 1],
210+
"P" + str(i),
211+
ha="center",
212+
va="bottom",
213+
)
214+
plt.xlabel("X")
215+
plt.ylabel("Y")
216+
plt.title("DBSCAN Clustering")
217+
plt.legend(["Core", "Noise"])
185218
plt.show()
186219
print("Plotted Successfully")
187220

221+
188222
if __name__ == "__main__":
189223
import doctest
224+
190225
doctest.testmod()

0 commit comments

Comments
 (0)