1
- """
1
+ '''
2
2
3
3
Author : Gowtham Kamalasekar
4
4
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
5
5
6
- """
7
-
8
- import math
9
-
10
- import matplotlib .pyplot as plt
11
- import pandas as pd
12
- from typing import dict , list
13
-
6
+ '''
14
7
15
8
class DbScan :
16
- """
9
+ import math
10
+
11
+ import matplotlib .pyplot as plt
12
+ import pandas as pd
13
+ from typing import dict , list
14
+
15
+ '''
17
16
DBSCAN Algorithm :
18
17
Density-Based Spatial Clustering Of Applications With Noise
19
18
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@@ -33,28 +32,14 @@ class DbScan:
33
32
obj = dbscan.DbScan(minpts, radius, file)
34
33
obj.print_dbscan()
35
34
obj.plot_dbscan()
36
- """
37
-
38
- def __init__ (
39
- self ,
40
- minpts : int ,
41
- radius : int ,
42
- file : str = (
43
- {"x" : 3 , "y" : 7 },
44
- {"x" : 4 , "y" : 6 },
45
- {"x" : 5 , "y" : 5 },
46
- {"x" : 6 , "y" : 4 },
47
- {"x" : 7 , "y" : 3 },
48
- {"x" : 6 , "y" : 2 },
49
- {"x" : 7 , "y" : 2 },
50
- {"x" : 8 , "y" : 4 },
51
- {"x" : 3 , "y" : 3 },
52
- {"x" : 2 , "y" : 6 },
53
- {"x" : 3 , "y" : 5 },
54
- {"x" : 2 , "y" : 4 },
55
- ),
56
- ) -> None :
57
- """
35
+ '''
36
+ def __init__ (self , minpts : int , radius : int , file : str =
37
+ ({'x' : 3 , 'y' : 7 }, {'x' : 4 , 'y' : 6 }, {'x' : 5 , 'y' : 5 },
38
+ {'x' : 6 , 'y' : 4 },{'x' : 7 , 'y' : 3 }, {'x' : 6 , 'y' : 2 },
39
+ {'x' : 7 , 'y' : 2 }, {'x' : 8 , 'y' : 4 },{'x' : 3 , 'y' : 3 },
40
+ {'x' : 2 , 'y' : 6 }, {'x' : 3 , 'y' : 5 }, {'x' : 2 , 'y' : 4 })
41
+ ) -> None :
42
+ '''
58
43
Constructor
59
44
60
45
Args:
@@ -82,14 +67,13 @@ def __init__(
82
67
6 | 4
83
68
7 | 3
84
69
-----
85
- """
70
+ '''
86
71
self .minpts = minpts
87
72
self .radius = radius
88
73
self .file = file
89
74
self .dict1 = self .perform_dbscan ()
90
-
91
75
def perform_dbscan (self ) -> dict [int , list [int ]]:
92
- """
76
+ '''
93
77
Args:
94
78
-----------
95
79
None
@@ -115,30 +99,25 @@ def perform_dbscan(self) -> dict[int, list[int]]:
115
99
11 [2, 10, 11, 12]
116
100
12 [9, 11, 12]
117
101
118
- """
102
+ '''
119
103
if type (self .file ) is str :
120
- data = pd .read_csv (self .file )
104
+ data = pd .read_csv (self .file )
121
105
else :
122
106
data = pd .DataFrame (list (self .file ))
123
107
e = self .radius
124
108
dict1 = {}
125
109
for i in range (len (data )):
126
110
for j in range (len (data )):
127
- dist = math .sqrt (
128
- pow (data ["x" ][j ] - data ["x" ][i ], 2 )
129
- + pow (data ["y" ][j ] - data ["y" ][i ], 2 )
130
- )
111
+ dist = math .sqrt (pow (data ['x' ][j ] - data ['x' ][i ],2 )
112
+ + pow (data ['y' ][j ] - data ['y' ][i ],2 ))
131
113
if dist < e :
132
- if i + 1 in dict1 :
133
- dict1 [i + 1 ].append (j + 1 )
114
+ if i + 1 in dict1 :
115
+ dict1 [i + 1 ].append (j + 1 )
134
116
else :
135
- dict1 [i + 1 ] = [
136
- j + 1 ,
137
- ]
117
+ dict1 [i + 1 ] = [j + 1 ,]
138
118
return dict1
139
-
140
119
def print_dbscan (self ) -> None :
141
- """
120
+ '''
142
121
Outputs:
143
122
--------
144
123
Prints each point and if it is a core or a noise (w/ border)
@@ -156,70 +135,56 @@ def print_dbscan(self) -> None:
156
135
10 [1, 10, 11] ---> Noise ---> Border
157
136
11 [2, 10, 11, 12] ---> Core
158
137
12 [9, 11, 12] ---> Noise ---> Border
159
- """
138
+ '''
160
139
for i in self .dict1 :
161
- print (i , " " , self .dict1 [i ], end = " ---> " )
140
+ print (i ," " ,self .dict1 [i ], end = ' ---> ' )
162
141
if len (self .dict1 [i ]) >= self .minpts :
163
142
print ("Core" )
164
143
else :
165
144
for j in self .dict1 :
166
145
if (
167
- i != j
168
- and len (self .dict1 [j ]) >= self .minpts
146
+ i != j
147
+ and len (self .dict1 [j ]) >= self .minpts
169
148
and i in self .dict1 [j ]
170
149
):
171
150
print ("Noise ---> Border" )
172
151
break
173
152
else :
174
153
print ("Noise" )
175
-
176
154
def plot_dbscan (self ) -> None :
177
- """
155
+ '''
178
156
Output:
179
157
-------
180
158
A matplotlib plot that show points as core and noise along
181
159
with the circle that lie within it.
182
160
183
161
>>> DbScan(4,1.9).plot_dbscan()
184
162
Plotted Successfully
185
- """
163
+ '''
186
164
if type (self .file ) is str :
187
- data = pd .read_csv (self .file )
165
+ data = pd .read_csv (self .file )
188
166
else :
189
167
data = pd .DataFrame (list (self .file ))
190
168
e = self .radius
191
169
for i in self .dict1 :
192
170
if len (self .dict1 [i ]) >= self .minpts :
193
- plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "red" )
194
- circle = plt .Circle (
195
- (data ["x" ][i - 1 ], data ["y" ][i - 1 ]), e , color = "blue" , fill = False
196
- )
171
+ plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'red' )
172
+ circle = plt .Circle ((data ['x' ][i - 1 ], data ['y' ][i - 1 ]),
173
+ e , color = 'blue' , fill = False )
197
174
plt .gca ().add_artist (circle )
198
- plt .text (
199
- data ["x" ][i - 1 ],
200
- data ["y" ][i - 1 ],
201
- "P" + str (i ),
202
- ha = "center" ,
203
- va = "bottom" ,
204
- )
175
+ plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
176
+ 'P' + str (i ), ha = 'center' , va = 'bottom' )
205
177
else :
206
- plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "green" )
207
- plt .text (
208
- data ["x" ][i - 1 ],
209
- data ["y" ][i - 1 ],
210
- "P" + str (i ),
211
- ha = "center" ,
212
- va = "bottom" ,
213
- )
214
- plt .xlabel ("X" )
215
- plt .ylabel ("Y" )
216
- plt .title ("DBSCAN Clustering" )
217
- plt .legend (["Core" , "Noise" ])
178
+ plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'green' )
179
+ plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
180
+ 'P' + str (i ), ha = 'center' , va = 'bottom' )
181
+ plt .xlabel ('X' )
182
+ plt .ylabel ('Y' )
183
+ plt .title ('DBSCAN Clustering' )
184
+ plt .legend (['Core' ,'Noise' ])
218
185
plt .show ()
219
186
print ("Plotted Successfully" )
220
187
221
-
222
188
if __name__ == "__main__" :
223
189
import doctest
224
-
225
190
doctest .testmod ()
0 commit comments