1
- '''
1
+ """
2
2
3
3
Author : Gowtham Kamalasekar
4
4
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
5
5
6
- '''
6
+ """
7
+
7
8
8
9
class DbScan :
9
10
import math
10
11
11
12
import matplotlib .pyplot as plt
12
13
import pandas as pd
13
14
from typing import dict , list
14
-
15
- '''
15
+
16
+ """
16
17
DBSCAN Algorithm :
17
18
Density-Based Spatial Clustering Of Applications With Noise
18
19
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@@ -32,14 +33,28 @@ class DbScan:
32
33
obj = dbscan.DbScan(minpts, radius, file)
33
34
obj.print_dbscan()
34
35
obj.plot_dbscan()
35
- '''
36
- def __init__ (self , minpts : int , radius : int , file : str =
37
- ({'x' : 3 , 'y' : 7 }, {'x' : 4 , 'y' : 6 }, {'x' : 5 , 'y' : 5 },
38
- {'x' : 6 , 'y' : 4 },{'x' : 7 , 'y' : 3 }, {'x' : 6 , 'y' : 2 },
39
- {'x' : 7 , 'y' : 2 }, {'x' : 8 , 'y' : 4 },{'x' : 3 , 'y' : 3 },
40
- {'x' : 2 , 'y' : 6 }, {'x' : 3 , 'y' : 5 }, {'x' : 2 , 'y' : 4 })
41
- ) -> None :
42
- '''
36
+ """
37
+
38
+ def __init__ (
39
+ self ,
40
+ minpts : int ,
41
+ radius : int ,
42
+ file : str = (
43
+ {"x" : 3 , "y" : 7 },
44
+ {"x" : 4 , "y" : 6 },
45
+ {"x" : 5 , "y" : 5 },
46
+ {"x" : 6 , "y" : 4 },
47
+ {"x" : 7 , "y" : 3 },
48
+ {"x" : 6 , "y" : 2 },
49
+ {"x" : 7 , "y" : 2 },
50
+ {"x" : 8 , "y" : 4 },
51
+ {"x" : 3 , "y" : 3 },
52
+ {"x" : 2 , "y" : 6 },
53
+ {"x" : 3 , "y" : 5 },
54
+ {"x" : 2 , "y" : 4 },
55
+ ),
56
+ ) -> None :
57
+ """
43
58
Constructor
44
59
45
60
Args:
@@ -67,13 +82,14 @@ def __init__(self, minpts : int, radius : int, file : str =
67
82
6 | 4
68
83
7 | 3
69
84
-----
70
- '''
85
+ """
71
86
self .minpts = minpts
72
87
self .radius = radius
73
88
self .file = file
74
89
self .dict1 = self .perform_dbscan ()
90
+
75
91
def perform_dbscan (self ) -> dict [int , list [int ]]:
76
- '''
92
+ """
77
93
Args:
78
94
-----------
79
95
None
@@ -99,25 +115,30 @@ def perform_dbscan(self) -> dict[int, list[int]]:
99
115
11 [2, 10, 11, 12]
100
116
12 [9, 11, 12]
101
117
102
- '''
118
+ """
103
119
if type (self .file ) is str :
104
- data = pd .read_csv (self .file )
120
+ data = pd .read_csv (self .file )
105
121
else :
106
122
data = pd .DataFrame (list (self .file ))
107
123
e = self .radius
108
124
dict1 = {}
109
125
for i in range (len (data )):
110
126
for j in range (len (data )):
111
- dist = math .sqrt (pow (data ['x' ][j ] - data ['x' ][i ],2 )
112
- + pow (data ['y' ][j ] - data ['y' ][i ],2 ))
127
+ dist = math .sqrt (
128
+ pow (data ["x" ][j ] - data ["x" ][i ], 2 )
129
+ + pow (data ["y" ][j ] - data ["y" ][i ], 2 )
130
+ )
113
131
if dist < e :
114
- if i + 1 in dict1 :
115
- dict1 [i + 1 ].append (j + 1 )
132
+ if i + 1 in dict1 :
133
+ dict1 [i + 1 ].append (j + 1 )
116
134
else :
117
- dict1 [i + 1 ] = [j + 1 ,]
135
+ dict1 [i + 1 ] = [
136
+ j + 1 ,
137
+ ]
118
138
return dict1
139
+
119
140
def print_dbscan (self ) -> None :
120
- '''
141
+ """
121
142
Outputs:
122
143
--------
123
144
Prints each point and if it is a core or a noise (w/ border)
@@ -135,56 +156,70 @@ def print_dbscan(self) -> None:
135
156
10 [1, 10, 11] ---> Noise ---> Border
136
157
11 [2, 10, 11, 12] ---> Core
137
158
12 [9, 11, 12] ---> Noise ---> Border
138
- '''
159
+ """
139
160
for i in self .dict1 :
140
- print (i ," " ,self .dict1 [i ], end = ' ---> ' )
161
+ print (i , " " , self .dict1 [i ], end = " ---> " )
141
162
if len (self .dict1 [i ]) >= self .minpts :
142
163
print ("Core" )
143
164
else :
144
165
for j in self .dict1 :
145
166
if (
146
- i != j
147
- and len (self .dict1 [j ]) >= self .minpts
167
+ i != j
168
+ and len (self .dict1 [j ]) >= self .minpts
148
169
and i in self .dict1 [j ]
149
170
):
150
171
print ("Noise ---> Border" )
151
172
break
152
173
else :
153
174
print ("Noise" )
175
+
154
176
def plot_dbscan (self ) -> None :
155
- '''
177
+ """
156
178
Output:
157
179
-------
158
180
A matplotlib plot that show points as core and noise along
159
181
with the circle that lie within it.
160
182
161
183
>>> DbScan(4,1.9).plot_dbscan()
162
184
Plotted Successfully
163
- '''
185
+ """
164
186
if type (self .file ) is str :
165
- data = pd .read_csv (self .file )
187
+ data = pd .read_csv (self .file )
166
188
else :
167
189
data = pd .DataFrame (list (self .file ))
168
190
e = self .radius
169
191
for i in self .dict1 :
170
192
if len (self .dict1 [i ]) >= self .minpts :
171
- plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'red' )
172
- circle = plt .Circle ((data ['x' ][i - 1 ], data ['y' ][i - 1 ]),
173
- e , color = 'blue' , fill = False )
193
+ plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "red" )
194
+ circle = plt .Circle (
195
+ (data ["x" ][i - 1 ], data ["y" ][i - 1 ]), e , color = "blue" , fill = False
196
+ )
174
197
plt .gca ().add_artist (circle )
175
- plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
176
- 'P' + str (i ), ha = 'center' , va = 'bottom' )
198
+ plt .text (
199
+ data ["x" ][i - 1 ],
200
+ data ["y" ][i - 1 ],
201
+ "P" + str (i ),
202
+ ha = "center" ,
203
+ va = "bottom" ,
204
+ )
177
205
else :
178
- plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'green' )
179
- plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
180
- 'P' + str (i ), ha = 'center' , va = 'bottom' )
181
- plt .xlabel ('X' )
182
- plt .ylabel ('Y' )
183
- plt .title ('DBSCAN Clustering' )
184
- plt .legend (['Core' ,'Noise' ])
206
+ plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "green" )
207
+ plt .text (
208
+ data ["x" ][i - 1 ],
209
+ data ["y" ][i - 1 ],
210
+ "P" + str (i ),
211
+ ha = "center" ,
212
+ va = "bottom" ,
213
+ )
214
+ plt .xlabel ("X" )
215
+ plt .ylabel ("Y" )
216
+ plt .title ("DBSCAN Clustering" )
217
+ plt .legend (["Core" , "Noise" ])
185
218
plt .show ()
186
219
print ("Plotted Successfully" )
187
220
221
+
188
222
if __name__ == "__main__" :
189
223
import doctest
224
+
190
225
doctest .testmod ()
0 commit comments