1
+ '''
2
+
3
+ Author : Gowtham Kamalasekar
4
+ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
5
+
6
+ '''
7
+
1
8
import math
2
9
3
10
import matplotlib .pyplot as plt
4
11
import pandas as pd
5
- from typing import Dict , List , Optional
6
-
12
+ from typing import dict , list
7
13
8
14
class DbScan :
9
- """
15
+ '''
10
16
DBSCAN Algorithm :
11
17
Density-Based Spatial Clustering Of Applications With Noise
12
18
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@@ -26,28 +32,14 @@ class DbScan:
26
32
obj = dbscan.DbScan(minpts, radius, file)
27
33
obj.print_dbscan()
28
34
obj.plot_dbscan()
29
- """
30
-
31
- def __init__ (
32
- self ,
33
- minpts : int ,
34
- radius : int ,
35
- file : Optional [str ] = (
36
- {"x" : 3 , "y" : 7 },
37
- {"x" : 4 , "y" : 6 },
38
- {"x" : 5 , "y" : 5 },
39
- {"x" : 6 , "y" : 4 },
40
- {"x" : 7 , "y" : 3 },
41
- {"x" : 6 , "y" : 2 },
42
- {"x" : 7 , "y" : 2 },
43
- {"x" : 8 , "y" : 4 },
44
- {"x" : 3 , "y" : 3 },
45
- {"x" : 2 , "y" : 6 },
46
- {"x" : 3 , "y" : 5 },
47
- {"x" : 2 , "y" : 4 },
48
- ),
49
- ) -> None :
50
- """
35
+ '''
36
+ def __init__ (self , minpts : int , radius : int , file : str =
37
+ ({'x' : 3 , 'y' : 7 }, {'x' : 4 , 'y' : 6 }, {'x' : 5 , 'y' : 5 },
38
+ {'x' : 6 , 'y' : 4 },{'x' : 7 , 'y' : 3 }, {'x' : 6 , 'y' : 2 },
39
+ {'x' : 7 , 'y' : 2 }, {'x' : 8 , 'y' : 4 },{'x' : 3 , 'y' : 3 },
40
+ {'x' : 2 , 'y' : 6 }, {'x' : 3 , 'y' : 5 }, {'x' : 2 , 'y' : 4 })
41
+ ) -> None :
42
+ '''
51
43
Constructor
52
44
53
45
Args:
@@ -75,14 +67,13 @@ def __init__(
75
67
6 | 4
76
68
7 | 3
77
69
-----
78
- """
70
+ '''
79
71
self .minpts = minpts
80
72
self .radius = radius
81
73
self .file = file
82
74
self .dict1 = self .perform_dbscan ()
83
-
84
- def perform_dbscan (self ) -> Dict [int , List [int ]]:
85
- """
75
+ def perform_dbscan (self ) -> dict [int , list [int ]]:
76
+ '''
86
77
Args:
87
78
-----------
88
79
None
@@ -108,30 +99,25 @@ def perform_dbscan(self) -> Dict[int, List[int]]:
108
99
11 [2, 10, 11, 12]
109
100
12 [9, 11, 12]
110
101
111
- """
102
+ '''
112
103
if type (self .file ) is str :
113
- data = pd .read_csv (self .file )
104
+ data = pd .read_csv (self .file )
114
105
else :
115
106
data = pd .DataFrame (list (self .file ))
116
107
e = self .radius
117
108
dict1 = {}
118
109
for i in range (len (data )):
119
110
for j in range (len (data )):
120
- dist = math .sqrt (
121
- pow (data ["x" ][j ] - data ["x" ][i ], 2 )
122
- + pow (data ["y" ][j ] - data ["y" ][i ], 2 )
123
- )
111
+ dist = math .sqrt (pow (data ['x' ][j ] - data ['x' ][i ],2 )
112
+ + pow (data ['y' ][j ] - data ['y' ][i ],2 ))
124
113
if dist < e :
125
- if i + 1 in dict1 :
126
- dict1 [i + 1 ].append (j + 1 )
114
+ if i + 1 in dict1 :
115
+ dict1 [i + 1 ].append (j + 1 )
127
116
else :
128
- dict1 [i + 1 ] = [
129
- j + 1 ,
130
- ]
117
+ dict1 [i + 1 ] = [j + 1 ,]
131
118
return dict1
132
-
133
119
def print_dbscan (self ) -> None :
134
- """
120
+ '''
135
121
Outputs:
136
122
--------
137
123
Prints each point and if it is a core or a noise (w/ border)
@@ -149,64 +135,56 @@ def print_dbscan(self) -> None:
149
135
10 [1, 10, 11] ---> Noise ---> Border
150
136
11 [2, 10, 11, 12] ---> Core
151
137
12 [9, 11, 12] ---> Noise ---> Border
152
- """
138
+ '''
153
139
for i in self .dict1 :
154
- print (i , " " , self .dict1 [i ], end = " ---> " )
140
+ print (i ," " ,self .dict1 [i ], end = ' ---> ' )
155
141
if len (self .dict1 [i ]) >= self .minpts :
156
142
print ("Core" )
157
143
else :
158
144
for j in self .dict1 :
159
145
if (
160
- i != j
161
- and len (self .dict1 [j ]) >= self .minpts
146
+ i != j
147
+ and len (self .dict1 [j ]) >= self .minpts
162
148
and i in self .dict1 [j ]
163
149
):
164
150
print ("Noise ---> Border" )
165
151
break
166
152
else :
167
153
print ("Noise" )
168
-
169
154
def plot_dbscan (self ) -> None :
170
- """
155
+ '''
171
156
Output:
172
157
-------
173
158
A matplotlib plot that show points as core and noise along
174
159
with the circle that lie within it.
175
160
176
161
>>> DbScan(4,1.9).plot_dbscan()
177
162
Plotted Successfully
178
- """
163
+ '''
179
164
if type (self .file ) is str :
180
- data = pd .read_csv (self .file )
165
+ data = pd .read_csv (self .file )
181
166
else :
182
167
data = pd .DataFrame (list (self .file ))
183
168
e = self .radius
184
169
for i in self .dict1 :
185
170
if len (self .dict1 [i ]) >= self .minpts :
186
- plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "red" )
187
- circle = plt .Circle (
188
- (data ["x" ][i - 1 ], data ["y" ][i - 1 ]), e , color = "blue" , fill = False
189
- )
171
+ plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'red' )
172
+ circle = plt .Circle ((data ['x' ][i - 1 ], data ['y' ][i - 1 ]),
173
+ e , color = 'blue' , fill = False )
190
174
plt .gca ().add_artist (circle )
191
- plt .text (
192
- data ["x" ][i - 1 ],
193
- data ["y" ][i - 1 ],
194
- "P" + str (i ),
195
- ha = "center" ,
196
- va = "bottom" ,
197
- )
175
+ plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
176
+ 'P' + str (i ), ha = 'center' , va = 'bottom' )
198
177
else :
199
- plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "green" )
200
- plt .text (
201
- data ["x" ][i - 1 ],
202
- data ["y" ][i - 1 ],
203
- "P" + str (i ),
204
- ha = "center" ,
205
- va = "bottom" ,
206
- )
207
- plt .xlabel ("X" )
208
- plt .ylabel ("Y" )
209
- plt .title ("DBSCAN Clustering" )
210
- plt .legend (["Core" , "Noise" ])
178
+ plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'green' )
179
+ plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
180
+ 'P' + str (i ), ha = 'center' , va = 'bottom' )
181
+ plt .xlabel ('X' )
182
+ plt .ylabel ('Y' )
183
+ plt .title ('DBSCAN Clustering' )
184
+ plt .legend (['Core' ,'Noise' ])
211
185
plt .show ()
212
186
print ("Plotted Successfully" )
187
+
188
+ if __name__ == "__main__" :
189
+ import doctest
190
+ doctest .testmod ()
0 commit comments