1
1
import math
2
+ from typing import dict , list , optional
2
3
import matplotlib .pyplot as plt
3
4
import pandas as pd
4
- from typing import dict , list , optional
5
-
6
-
7
5
class DbScan :
8
- """
6
+ '''
9
7
DBSCAN Algorithm :
10
8
Density-Based Spatial Clustering Of Applications With Noise
11
- Reference Website : https://en.wikipedia.org/wiki/DBSCAN
12
- Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU
9
+ Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
13
10
14
11
Functions:
15
12
----------
@@ -26,28 +23,14 @@ class DbScan:
26
23
obj = dbscan.DbScan(minpts, radius, file)
27
24
obj.print_dbscan()
28
25
obj.plot_dbscan()
29
- """
30
-
31
- def __init__ (
32
- self ,
33
- minpts : int ,
34
- radius : int ,
35
- file : optional [str ] = [
36
- {"x" : 3 , "y" : 7 },
37
- {"x" : 4 , "y" : 6 },
38
- {"x" : 5 , "y" : 5 },
39
- {"x" : 6 , "y" : 4 },
40
- {"x" : 7 , "y" : 3 },
41
- {"x" : 6 , "y" : 2 },
42
- {"x" : 7 , "y" : 2 },
43
- {"x" : 8 , "y" : 4 },
44
- {"x" : 3 , "y" : 3 },
45
- {"x" : 2 , "y" : 6 },
46
- {"x" : 3 , "y" : 5 },
47
- {"x" : 2 , "y" : 4 },
48
- ],
49
- ) -> None :
50
- """
26
+ '''
27
+ def __init__ (self , minpts : int , radius : int , file : optional [str ] =
28
+ ({'x' : 3 , 'y' : 7 }, {'x' : 4 , 'y' : 6 }, {'x' : 5 , 'y' : 5 },
29
+ {'x' : 6 , 'y' : 4 },{'x' : 7 , 'y' : 3 }, {'x' : 6 , 'y' : 2 },
30
+ {'x' : 7 , 'y' : 2 }, {'x' : 8 , 'y' : 4 },{'x' : 3 , 'y' : 3 },
31
+ {'x' : 2 , 'y' : 6 }, {'x' : 3 , 'y' : 5 }, {'x' : 2 , 'y' : 4 })
32
+ ) -> None :
33
+ '''
51
34
Constructor
52
35
53
36
Args:
@@ -75,14 +58,13 @@ def __init__(
75
58
6 | 4
76
59
7 | 3
77
60
-----
78
- """
61
+ '''
79
62
self .minpts = minpts
80
63
self .radius = radius
81
64
self .file = file
82
65
self .dict1 = self .perform_dbscan ()
83
-
84
66
def perform_dbscan (self ) -> dict [int , list [int ]]:
85
- """
67
+ '''
86
68
Args:
87
69
-----------
88
70
None
@@ -108,31 +90,25 @@ def perform_dbscan(self) -> dict[int, list[int]]:
108
90
11 [2, 10, 11, 12]
109
91
12 [9, 11, 12]
110
92
111
- """
112
- data = (
113
- pd .read_csv (self .file )
114
- if type (self .file ) == type ("str" )
115
- else pd .DataFrame (self .file )
116
- )
93
+ '''
94
+ if type (self .file ) is str :
95
+ data = pd .read_csv (self .file )
96
+ else :
97
+ data = pd .DataFrame (list (self .file ))
117
98
e = self .radius
118
99
dict1 = {}
119
100
for i in range (len (data )):
120
101
for j in range (len (data )):
121
- dist = math .sqrt (
122
- pow (data ["x" ][j ] - data ["x" ][i ], 2 )
123
- + pow (data ["y" ][j ] - data ["y" ][i ], 2 )
124
- )
102
+ dist = math .sqrt (pow (data ['x' ][j ] - data ['x' ][i ],2 )
103
+ + pow (data ['y' ][j ] - data ['y' ][i ],2 ))
125
104
if dist < e :
126
- if i + 1 in dict1 :
127
- dict1 [i + 1 ].append (j + 1 )
105
+ if i + 1 in dict1 :
106
+ dict1 [i + 1 ].append (j + 1 )
128
107
else :
129
- dict1 [i + 1 ] = [
130
- j + 1 ,
131
- ]
108
+ dict1 [i + 1 ] = [j + 1 ,]
132
109
return dict1
133
-
134
110
def print_dbscan (self ) -> None :
135
- """
111
+ '''
136
112
Outputs:
137
113
--------
138
114
Prints each point and if it is a core or a noise (w/ border)
@@ -150,62 +126,52 @@ def print_dbscan(self) -> None:
150
126
10 [1, 10, 11] ---> Noise ---> Border
151
127
11 [2, 10, 11, 12] ---> Core
152
128
12 [9, 11, 12] ---> Noise ---> Border
153
- """
129
+ '''
154
130
for i in self .dict1 :
155
- print (i , " " , self .dict1 [i ], end = " ---> " )
131
+ print (i ," " ,self .dict1 [i ], end = ' ---> ' )
156
132
if len (self .dict1 [i ]) >= self .minpts :
157
133
print ("Core" )
158
134
else :
159
135
for j in self .dict1 :
160
- if i != j and len (self .dict1 [j ]) >= self .minpts :
161
- if i in self .dict1 [j ]:
162
- print ("Noise ---> Border" )
163
- break
136
+ if (
137
+ i != j
138
+ and len (self .dict1 [j ]) >= self .minpts
139
+ and i in self .dict1 [j ]
140
+ ):
141
+ print ("Noise ---> Border" )
142
+ break
164
143
else :
165
144
print ("Noise" )
166
-
167
145
def plot_dbscan (self ) -> None :
168
- """
146
+ '''
169
147
Output:
170
148
-------
171
149
A matplotlib plot that show points as core and noise along
172
150
with the circle that lie within it.
173
151
174
152
>>> DbScan(4,1.9).plot_dbscan()
175
153
Plotted Successfully
176
- """
177
- data = (
178
- pd .read_csv (self .file )
179
- if type (self .file ) == type ("str" )
180
- else pd .DataFrame (self .file )
181
- )
154
+ '''
155
+ if type (self .file ) is str :
156
+ data = pd .read_csv (self .file )
157
+ else :
158
+ data = pd .DataFrame (list (self .file ))
182
159
e = self .radius
183
160
for i in self .dict1 :
184
161
if len (self .dict1 [i ]) >= self .minpts :
185
- plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "red" )
186
- circle = plt .Circle (
187
- (data ["x" ][i - 1 ], data ["y" ][i - 1 ]), e , color = "blue" , fill = False
188
- )
162
+ plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'red' )
163
+ circle = plt .Circle ((data ['x' ][i - 1 ], data ['y' ][i - 1 ]),
164
+ e , color = 'blue' , fill = False )
189
165
plt .gca ().add_artist (circle )
190
- plt .text (
191
- data ["x" ][i - 1 ],
192
- data ["y" ][i - 1 ],
193
- "P" + str (i ),
194
- ha = "center" ,
195
- va = "bottom" ,
196
- )
166
+ plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
167
+ 'P' + str (i ), ha = 'center' , va = 'bottom' )
197
168
else :
198
- plt .scatter (data ["x" ][i - 1 ], data ["y" ][i - 1 ], color = "green" )
199
- plt .text (
200
- data ["x" ][i - 1 ],
201
- data ["y" ][i - 1 ],
202
- "P" + str (i ),
203
- ha = "center" ,
204
- va = "bottom" ,
205
- )
206
- plt .xlabel ("X" )
207
- plt .ylabel ("Y" )
208
- plt .title ("DBSCAN Clustering" )
209
- plt .legend (["Core" , "Noise" ])
169
+ plt .scatter (data ['x' ][i - 1 ], data ['y' ][i - 1 ], color = 'green' )
170
+ plt .text (data ['x' ][i - 1 ], data ['y' ][i - 1 ],
171
+ 'P' + str (i ), ha = 'center' , va = 'bottom' )
172
+ plt .xlabel ('X' )
173
+ plt .ylabel ('Y' )
174
+ plt .title ('DBSCAN Clustering' )
175
+ plt .legend (['Core' ,'Noise' ])
210
176
plt .show ()
211
177
print ("Plotted Successfully" )
0 commit comments