@@ -29,7 +29,7 @@ class GuideSearchAndScore:
29
29
def __init__ (self , ** kwargs ):
30
30
""" class for controlling the guide searching workflow and displaying results """
31
31
# track whether web or cli
32
- self .cli = kwargs ['command-line' ]
32
+ self .cli = kwargs ['command-line' ]
33
33
if self .cli :
34
34
self .output_file = kwargs ['output' ]
35
35
else :
@@ -43,21 +43,21 @@ def __init__(self, **kwargs):
43
43
else :
44
44
self .isValidInput (kwargs ['searchInput' ])
45
45
self .searchInput = kwargs ['searchInput' ]
46
-
46
+
47
47
# validate genome
48
48
if 'genome' not in kwargs :
49
49
self .sendError ("'genome' parameter not set" )
50
50
else :
51
51
# attempt connection to genome's db before storing
52
52
self .dbConnection = Config (kwargs ['genome' ])
53
53
self .genome = kwargs ['genome' ]
54
-
54
+
55
55
# check gene sent
56
56
if 'gene' not in kwargs :
57
57
self .sendError ("Please select a Target from the dropdown list" )
58
58
else :
59
59
self .gene = kwargs ['gene' ]
60
-
60
+
61
61
# validate rgenID
62
62
if 'rgenID' not in kwargs :
63
63
self .sendError ("Please select an RGEN from the dropdown list" )
@@ -79,11 +79,11 @@ def __init__(self, **kwargs):
79
79
self .sendError ("Max off targets is not set" )
80
80
else :
81
81
self .maxOffTargets = kwargs ['maxOffTargets' ]
82
-
82
+
83
83
#TODO: implement
84
84
if 'offtargetPAMS' in kwargs :
85
85
self .offTargetPAMS = kwargs ['offtargetPAMS' ]
86
-
86
+
87
87
self .guideDict , self .batchID = self .performGuideSearch ()
88
88
self .scores = self .setScores ()
89
89
@@ -108,7 +108,7 @@ def setScores(self):
108
108
available .append (s )
109
109
if len (available ) == len (options ):
110
110
break
111
-
111
+
112
112
return available
113
113
114
114
def renderTemplate (self , template_name , template_values ):
@@ -129,9 +129,9 @@ def guideTableHTML(self):
129
129
130
130
def tableHeadingHTML (self ):
131
131
""" determine which info for the guides is available and display the headings for those """
132
-
132
+
133
133
scoreHeading = ''
134
- scoreHeaders = ''
134
+ scoreHeaders = ''
135
135
if len (self .scores ) > 0 :
136
136
scoreHeading += "<th colspan='{num_scores}'>Scoring</th>" .format (num_scores = len (self .scores ))
137
137
for score in self .scores :
@@ -156,7 +156,7 @@ def tableBodyHTML(self):
156
156
157
157
sortedGuides = OrderedDict ((x , self .guideDict [x ]) for x in sortedIDs )
158
158
bodyHTML = "<tbody>" + self .tableRowsHTML (sortedGuides )+ "</tbody>"
159
-
159
+
160
160
return bodyHTML
161
161
162
162
def tableRowsHTML (self , sortedGuides ):
@@ -178,7 +178,7 @@ def tableRowsHTML(self, sortedGuides):
178
178
rowsHTML += self .renderTemplate ("table_row.html" , template_values )
179
179
180
180
return rowsHTML
181
-
181
+
182
182
def rowPopoverHTML (self , guideID ):
183
183
""" given a guideID fetch the label and notes if it's already in the database, give the option to add it otherwise """
184
184
@@ -197,20 +197,20 @@ def fetchGuideFromDatabase(self, guideID):
197
197
searchQuery = {
198
198
"guideSeq" : self .guideDict [guideID ]['guide_seq' ],
199
199
"pamSeq" : self .guideDict [guideID ]['pam_seq' ],
200
- "guideLocation" : self .calculateLocation (self .guideDict [guideID ])
200
+ "guideLocation" : self .calculateLocation (self .guideDict [guideID ])
201
201
}
202
202
if self .dbConnection .guideCollection .find (searchQuery ).count () > 0 :
203
203
existingGuide = self .dbConnection .guideCollection .find_one (searchQuery , {"label" : 1 , "Notes" : 1 })
204
204
return 'Update Guide' , existingGuide ['label' ], existingGuide ['Notes' ]
205
205
else :
206
206
return 'Add to Database' , '' , ''
207
-
207
+
208
208
def guideExistsInDatabase (self , guideID ):
209
209
""" return true if the guide is already stored, false otherwise """
210
210
searchQuery = {
211
211
"guideSeq" : self .guideDict [guideID ]['guide_seq' ],
212
212
"pamSeq" : self .guideDict [guideID ]['pam_seq' ],
213
- "guideLocation" : self .calculateLocation (self .guideDict [guideID ])
213
+ "guideLocation" : self .calculateLocation (self .guideDict [guideID ])
214
214
}
215
215
if self .dbConnection .guideCollection .find (searchQuery ).count () > 0 :
216
216
return True
@@ -225,7 +225,7 @@ def formatSequence(self, guide_seq, pam_seq):
225
225
return pam_seq + ", " + guide_seq
226
226
else :
227
227
self .sendError ("Unrecognized PAM Location for RGEN: " + str (self .rgenRecord ['PamLocation' ]))
228
-
228
+
229
229
def calculateLocation (self , guide ):
230
230
""" using the strand of the guide and the genomic start, format the location string """
231
231
if guide ['strand' ] == '+' :
@@ -234,7 +234,7 @@ def calculateLocation(self, guide):
234
234
return guide ['pam_chrom' ] + ":" + str (guide ['guide_genomic_start' ]) + "-" + str (int (guide ['guide_genomic_start' ]- len (guide ['guide_seq' ]))+ 1 ) + ":-"
235
235
else :
236
236
self .sendError ("Unrecognized strand for guide: " + str (guide ['strand' ]))
237
-
237
+
238
238
def offtargetHTML (self , guideID , guide ):
239
239
""" creates the HTML for the off-target modal of a given guide """
240
240
template_values = {
@@ -246,7 +246,7 @@ def offtargetHTML(self, guideID, guide):
246
246
'totalCount' : str (sum (guide ['offtarget_counts' ]))
247
247
}
248
248
return self .renderTemplate ("offtarget_cell.html" , template_values )
249
-
249
+
250
250
def offtargetCountsHTML (self , guideID , guide ):
251
251
""" formats the links to the off-target modals """
252
252
off_target_counts = "<div style='color: #2676ff; font-weight: bold'>"
@@ -300,13 +300,13 @@ def offtargetModalBodyHTML(self, guide, num_mismatches):
300
300
return resultHTML
301
301
else :
302
302
return "<p>No Off-Targets with {mismatches} Mismatches</p>" .format (mismatches = str (num_mismatches ))
303
-
303
+
304
304
def offTargetTableHTML (self , offtarget_list , maxShown ):
305
305
""" sorts, formats and returns a table of off-targets from the offtarget list provided """
306
-
306
+
307
307
num_offtargets = len (offtarget_list )
308
308
offtarget_list = offtarget_list [:maxShown ]
309
-
309
+
310
310
for offtarget in offtarget_list :
311
311
offtarget .update ({'formatted_seq' : self .colourLowercaseRed (self .formatSequence (offtarget ['seq' ], offtarget ['pam' ]))})
312
312
# sort by score if available
@@ -318,14 +318,14 @@ def offTargetTableHTML(self, offtarget_list, maxShown):
318
318
total_count = ''
319
319
if num_offtargets > maxShown :
320
320
total_count = "<p>({max} of {total} shown)</p>" .format (max = str (maxShown ), total = str (num_offtargets ))
321
-
321
+
322
322
template_values = {
323
323
'offtargetSubset' : offtarget_list ,
324
324
'totalCount' : total_count
325
325
}
326
326
327
327
return self .renderTemplate ('offtarget_table.html' , template_values )
328
-
328
+
329
329
def separateOffTargets (self , off_target_subset ):
330
330
""" given a list of filtered off-targets, separate the regular ones from those that have no mismatches in the rgen's seed region """
331
331
seedDirection = self .rgenRecord ['SeedRegion' ][0 ]
@@ -346,11 +346,11 @@ def hasMismatchInSeed(self, offtargetSeq, seedDirection, seedLength, guideLength
346
346
if seedDirection == '+' :
347
347
for idx in range (0 , seedLength ):
348
348
if offtargetSeq [idx ].islower ():
349
- return True
349
+ return True
350
350
elif seedDirection == '-' :
351
351
for idx in reversed (range (guideLength - seedLength ,guideLength )):
352
352
if offtargetSeq [idx ].islower ():
353
- return True
353
+ return True
354
354
355
355
return False
356
356
@@ -387,10 +387,10 @@ def sendNoResultHTML(self):
387
387
return
388
388
389
389
def sendError (self , errorString ):
390
- """ format exceptions in HTML to prevent page from crashing """
390
+ """ format exceptions in HTML to prevent page from crashing """
391
391
if not hasattr (self , 'dbConnection' ):
392
392
self .dbConnection = Config ()
393
-
393
+
394
394
if self .cli :
395
395
raise Exception (errorString )
396
396
else :
@@ -429,7 +429,7 @@ def writeCsvFiles(self):
429
429
num_skipped += 1
430
430
else :
431
431
writer .writerow (['Total number of potential off-target sites:' + str (sum (guide ['offtarget_counts' ]))])
432
- total_offtargets_processed += sum (guide ['offtarget_counts' ])
432
+ total_offtargets_processed += sum (guide ['offtarget_counts' ])
433
433
writer .writerow (['Off-target Counts: ' + "-" .join (map (str ,guide ['offtarget_counts' ]))])
434
434
writer .writerow (['No mismatches in Seed: ' + "-" .join (map (str ,guide ['offtargets_seed' ]))])
435
435
if guide ['MIT' ] and not guide ['max_exceeded' ]:
@@ -438,7 +438,7 @@ def writeCsvFiles(self):
438
438
writer .writerow (['CFD Score: ' + str (guide ['CFD' ])])
439
439
writer .writerow (['Location' , 'Sequence' , 'Mismatches' , 'Context' ])
440
440
writer .writerow ([self .calculateLocation (guide ), self .formatSequence (guide ['guide_seq' ], guide ['pam_seq' ]), '0' , 'guide' ])
441
- writer .writerow ([str (len (self .guideDict .keys ()))+ " GUIDES FOUND" ])
441
+ writer .writerow ([str (len (self .guideDict .keys ()))+ " GUIDES FOUND" ])
442
442
writer .writerow ([str (num_skipped )+ " SKIPPED" ])
443
443
writer .writerow ([str (num_exceeded )+ " EXCEEDED MAX OFF TARGETS" ])
444
444
writer .writerow ([str (total_offtargets_processed )+ " OFF TARGETS FOUND" ])
@@ -454,13 +454,13 @@ def writeCsvFiles(self):
454
454
row.append('-')
455
455
writer.writerow(row)
456
456
"""
457
- else :
458
- for guideID , guide in self .guideDict .items ():
457
+ else :
458
+ for guideID , guide in self .guideDict .items ():
459
459
csv_path = os .path .join (self .dbConnection .ROOT_PATH ,'src/guide-finder/tempfiles' , self .batchID + "_" + guideID + ".csv" )
460
460
try :
461
461
with open (csv_path , mode = 'w' ) as csv_file :
462
462
writer = csv .writer (csv_file , delimiter = ',' )
463
- # build and write heading row
463
+ # build and write heading row
464
464
column_headings = ['chromosome' , 'location' , 'strand' , 'protospacer sequence' , 'PAM' , 'mismatches' , 'context' ]
465
465
if 'MIT' in self .scores and not (guide ['max_exceeded' ] or guide ['skip' ]):
466
466
column_headings .append ('MIT' )
@@ -469,7 +469,7 @@ def writeCsvFiles(self):
469
469
column_headings .append ('no mismatches in seed' )
470
470
writer .writerow (column_headings )
471
471
472
- # build and write guide row
472
+ # build and write guide row
473
473
guide_row = [guide ['pam_chrom' ]]
474
474
guide_row .append (self .calculateLocation (guide ).split (":" )[1 ])
475
475
guide_row .append (guide ['strand' ])
@@ -487,7 +487,7 @@ def writeCsvFiles(self):
487
487
# initialize variables for determining whether offtarget has mismatch in seed
488
488
seedDirection = self .rgenRecord ['SeedRegion' ][0 ]
489
489
seedLength = int (self .rgenRecord ['SeedRegion' ][1 :])
490
- # build and write row for each of the potential off target sites
490
+ # build and write row for each of the potential off target sites
491
491
for offtarget in guide ['offtargets' ]:
492
492
offtarget_row = offtarget ['loc' ].split (':' )
493
493
offtarget_row .append (offtarget ['seq' ])
@@ -510,11 +510,11 @@ def writeCsvFiles(self):
510
510
print (guideID )
511
511
print (guide )
512
512
self .sendError ("Error writing off target CSV file, " + str (e ))
513
-
513
+
514
514
def getENSID (self ):
515
515
""" given the gene symbol, return the ENSEMBL ID from the stored gene collection """
516
516
geneCollection = self .dbConnection .curr_geneCollection
517
- result = geneCollection .find ({"Name" : self .gene })
517
+ result = geneCollection .find ({"Name" : self .gene })
518
518
if result .count () > 1 :
519
519
self .sendError ("More than one result in the database for gene symbol: " + self .gene )
520
520
elif result .count () < 1 :
@@ -547,35 +547,35 @@ def performGuideSearch(self):
547
547
# TODO: look into a collision-free hashing function so don't have to re-run entire pipeline if inputs don't change
548
548
batchID = binascii .b2a_hex (os .urandom (9 )).decode ('utf-8' )
549
549
550
- genome_fa = os .path .join (self .dbConnection .ROOT_PATH ,'jbrowse' , 'data.' + self .genome ,"downloads " ,self .genome + ".fa" )
550
+ genome_fa = os .path .join (self .dbConnection .ROOT_PATH ,'jbrowse' , 'data' , self .genome ,"processed " ,self .genome + ".fa" )
551
551
twoBitToFa_path = os .path .join (self .dbConnection .ROOT_PATH ,'bin/twoBitToFa' )
552
- genome_2bit = os .path .join (self .dbConnection .ROOT_PATH ,'jbrowse' , 'data.' + self .genome ,"downloads " ,self .genome + '.2bit' )
552
+ genome_2bit = os .path .join (self .dbConnection .ROOT_PATH ,'jbrowse' , 'data' , self .genome ,"processed " ,self .genome + '.2bit' )
553
553
tempfiles_path = os .path .join (self .dbConnection .ROOT_PATH ,'src/guide-finder/tempfiles' )
554
554
if self .cli :
555
555
import time
556
556
time_0 = time .time ()
557
557
print ("Fetching sequence..." )
558
558
559
- get_sequence .fetch_sequence (twoBitToFa_path , self .searchInput , genome_2bit , os .path .join (tempfiles_path ,batchID + '_out.fa' ))
560
- if self .cli :
559
+ get_sequence .fetch_sequence (twoBitToFa_path , self .searchInput , genome_2bit , os .path .join (tempfiles_path ,batchID + '_out.fa' ))
560
+ if self .cli :
561
561
time_1 = time .time ()
562
562
print ("Finished fetching sequence. " + str (round (time_1 - time_0 ,4 )))
563
563
print ("Determining guides in search region..." )
564
564
565
565
protospacer_length = getattr (self , 'guideLength' , 0 ) # passing 0 indicates default should be used
566
566
guideDict = find_grna .find_grna (self .rgenID , protospacer_length , os .path .join (tempfiles_path , batchID + '_out.fa' ))
567
567
568
- if self .cli :
568
+ if self .cli :
569
569
time_2 = time .time ()
570
570
print ("Finished finding gRNAs. " + str (round (time_2 - time_1 ,4 )))
571
571
print ("Searching for potential off target sites..." )
572
572
guideDict = find_offtargets .findOffTargets (guideDict , self .rgenID , self .genome , self .maxOffTargets , batchID , genome_fa , tempfiles_path )
573
- if self .cli :
573
+ if self .cli :
574
574
time_3 = time .time ()
575
575
print ("Finished finding offtargets. " + str (round (time_3 - time_2 ,4 )))
576
576
print ("Scoring potential off target sites and guides..." )
577
577
guideDict = score_offtargets .scoreOffTargets (guideDict , self .rgenID )
578
- if self .cli :
578
+ if self .cli :
579
579
time_4 = time .time ()
580
580
print ("Finished scoring. " + str (round (time_4 - time_3 ,4 )))
581
581
print ("Categorizing potential off target sites..." )
@@ -625,12 +625,12 @@ def main():
625
625
626
626
# only use the max if the checkbox is selected
627
627
parameters ['maxOffTargets' ] = parameters ['maxOffTargets' ] if parameters ['setMax' ] == 'true' else None
628
-
628
+
629
629
GuideSearchAndScore (** parameters )
630
630
else :
631
631
desc = """ The command-line version of GuideSearchAndScore.py will return potential guides along with their scores and off-targets.
632
632
"""
633
-
633
+
634
634
parser = argparse .ArgumentParser (prog = 'GuideSearchAndScore' ,description = desc )
635
635
parser ._action_groups .pop ()
636
636
required = parser .add_argument_group ('required arguments' )
0 commit comments