Skip to content

Commit 687e234

Browse files
committed
Update paths for processed assemblies
1 parent 950ee2a commit 687e234

File tree

3 files changed

+46
-46
lines changed

3 files changed

+46
-46
lines changed

Diff for: src/guide-finder/GuideSearchAndScore.py

+44-44
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class GuideSearchAndScore:
2929
def __init__(self, **kwargs):
3030
""" class for controlling the guide searching workflow and displaying results """
3131
# track whether web or cli
32-
self.cli = kwargs['command-line']
32+
self.cli = kwargs['command-line']
3333
if self.cli:
3434
self.output_file = kwargs['output']
3535
else:
@@ -43,21 +43,21 @@ def __init__(self, **kwargs):
4343
else:
4444
self.isValidInput(kwargs['searchInput'])
4545
self.searchInput = kwargs['searchInput']
46-
46+
4747
# validate genome
4848
if 'genome' not in kwargs:
4949
self.sendError("'genome' parameter not set")
5050
else:
5151
# attempt connection to genome's db before storing
5252
self.dbConnection = Config(kwargs['genome'])
5353
self.genome = kwargs['genome']
54-
54+
5555
# check gene sent
5656
if 'gene' not in kwargs:
5757
self.sendError("Please select a Target from the dropdown list")
5858
else:
5959
self.gene = kwargs['gene']
60-
60+
6161
# validate rgenID
6262
if 'rgenID' not in kwargs:
6363
self.sendError("Please select an RGEN from the dropdown list")
@@ -79,11 +79,11 @@ def __init__(self, **kwargs):
7979
self.sendError("Max off targets is not set")
8080
else:
8181
self.maxOffTargets = kwargs['maxOffTargets']
82-
82+
8383
#TODO: implement
8484
if 'offtargetPAMS' in kwargs:
8585
self.offTargetPAMS = kwargs['offtargetPAMS']
86-
86+
8787
self.guideDict, self.batchID = self.performGuideSearch()
8888
self.scores = self.setScores()
8989

@@ -108,7 +108,7 @@ def setScores(self):
108108
available.append(s)
109109
if len(available) == len(options):
110110
break
111-
111+
112112
return available
113113

114114
def renderTemplate(self, template_name, template_values):
@@ -129,9 +129,9 @@ def guideTableHTML(self):
129129

130130
def tableHeadingHTML(self):
131131
""" determine which info for the guides is available and display the headings for those """
132-
132+
133133
scoreHeading = ''
134-
scoreHeaders = ''
134+
scoreHeaders = ''
135135
if len(self.scores) > 0:
136136
scoreHeading += "<th colspan='{num_scores}'>Scoring</th>".format(num_scores=len(self.scores))
137137
for score in self.scores:
@@ -156,7 +156,7 @@ def tableBodyHTML(self):
156156

157157
sortedGuides = OrderedDict((x, self.guideDict[x]) for x in sortedIDs)
158158
bodyHTML = "<tbody>"+self.tableRowsHTML(sortedGuides)+"</tbody>"
159-
159+
160160
return bodyHTML
161161

162162
def tableRowsHTML(self, sortedGuides):
@@ -178,7 +178,7 @@ def tableRowsHTML(self, sortedGuides):
178178
rowsHTML += self.renderTemplate("table_row.html", template_values)
179179

180180
return rowsHTML
181-
181+
182182
def rowPopoverHTML(self, guideID):
183183
""" given a guideID fetch the label and notes if it's already in the database, give the option to add it otherwise """
184184

@@ -197,20 +197,20 @@ def fetchGuideFromDatabase(self, guideID):
197197
searchQuery = {
198198
"guideSeq": self.guideDict[guideID]['guide_seq'],
199199
"pamSeq": self.guideDict[guideID]['pam_seq'],
200-
"guideLocation": self.calculateLocation(self.guideDict[guideID])
200+
"guideLocation": self.calculateLocation(self.guideDict[guideID])
201201
}
202202
if self.dbConnection.guideCollection.find(searchQuery).count() > 0:
203203
existingGuide = self.dbConnection.guideCollection.find_one(searchQuery, {"label": 1, "Notes": 1})
204204
return 'Update Guide', existingGuide['label'], existingGuide['Notes']
205205
else:
206206
return 'Add to Database', '', ''
207-
207+
208208
def guideExistsInDatabase(self, guideID):
209209
""" return true if the guide is already stored, false otherwise """
210210
searchQuery = {
211211
"guideSeq": self.guideDict[guideID]['guide_seq'],
212212
"pamSeq": self.guideDict[guideID]['pam_seq'],
213-
"guideLocation": self.calculateLocation(self.guideDict[guideID])
213+
"guideLocation": self.calculateLocation(self.guideDict[guideID])
214214
}
215215
if self.dbConnection.guideCollection.find(searchQuery).count() > 0:
216216
return True
@@ -225,7 +225,7 @@ def formatSequence(self, guide_seq, pam_seq):
225225
return pam_seq + ", " + guide_seq
226226
else:
227227
self.sendError("Unrecognized PAM Location for RGEN: " + str(self.rgenRecord['PamLocation']))
228-
228+
229229
def calculateLocation(self, guide):
230230
""" using the strand of the guide and the genomic start, format the location string """
231231
if guide['strand'] == '+':
@@ -234,7 +234,7 @@ def calculateLocation(self, guide):
234234
return guide['pam_chrom'] + ":" + str(guide['guide_genomic_start']) + "-" + str(int(guide['guide_genomic_start']-len(guide['guide_seq']))+1) + ":-"
235235
else:
236236
self.sendError("Unrecognized strand for guide: " + str(guide['strand']))
237-
237+
238238
def offtargetHTML(self, guideID, guide):
239239
""" creates the HTML for the off-target modal of a given guide """
240240
template_values = {
@@ -246,7 +246,7 @@ def offtargetHTML(self, guideID, guide):
246246
'totalCount': str(sum(guide['offtarget_counts']))
247247
}
248248
return self.renderTemplate("offtarget_cell.html", template_values)
249-
249+
250250
def offtargetCountsHTML(self, guideID, guide):
251251
""" formats the links to the off-target modals """
252252
off_target_counts = "<div style='color: #2676ff; font-weight: bold'>"
@@ -300,13 +300,13 @@ def offtargetModalBodyHTML(self, guide, num_mismatches):
300300
return resultHTML
301301
else:
302302
return "<p>No Off-Targets with {mismatches} Mismatches</p>".format(mismatches=str(num_mismatches))
303-
303+
304304
def offTargetTableHTML(self, offtarget_list, maxShown):
305305
""" sorts, formats and returns a table of off-targets from the offtarget list provided """
306-
306+
307307
num_offtargets = len(offtarget_list)
308308
offtarget_list = offtarget_list[:maxShown]
309-
309+
310310
for offtarget in offtarget_list:
311311
offtarget.update({'formatted_seq': self.colourLowercaseRed(self.formatSequence(offtarget['seq'], offtarget['pam']))})
312312
# sort by score if available
@@ -318,14 +318,14 @@ def offTargetTableHTML(self, offtarget_list, maxShown):
318318
total_count = ''
319319
if num_offtargets > maxShown:
320320
total_count = "<p>({max} of {total} shown)</p>".format(max=str(maxShown), total=str(num_offtargets))
321-
321+
322322
template_values = {
323323
'offtargetSubset': offtarget_list,
324324
'totalCount': total_count
325325
}
326326

327327
return self.renderTemplate('offtarget_table.html', template_values)
328-
328+
329329
def separateOffTargets(self, off_target_subset):
330330
""" given a list of filtered off-targets, separate the regular ones from those that have no mismatches in the rgen's seed region """
331331
seedDirection = self.rgenRecord['SeedRegion'][0]
@@ -346,11 +346,11 @@ def hasMismatchInSeed(self, offtargetSeq, seedDirection, seedLength, guideLength
346346
if seedDirection == '+':
347347
for idx in range(0, seedLength):
348348
if offtargetSeq[idx].islower():
349-
return True
349+
return True
350350
elif seedDirection == '-':
351351
for idx in reversed(range(guideLength-seedLength,guideLength)):
352352
if offtargetSeq[idx].islower():
353-
return True
353+
return True
354354

355355
return False
356356

@@ -387,10 +387,10 @@ def sendNoResultHTML(self):
387387
return
388388

389389
def sendError(self, errorString):
390-
""" format exceptions in HTML to prevent page from crashing """
390+
""" format exceptions in HTML to prevent page from crashing """
391391
if not hasattr(self, 'dbConnection'):
392392
self.dbConnection = Config()
393-
393+
394394
if self.cli:
395395
raise Exception(errorString)
396396
else:
@@ -429,7 +429,7 @@ def writeCsvFiles(self):
429429
num_skipped += 1
430430
else:
431431
writer.writerow(['Total number of potential off-target sites:' + str(sum(guide['offtarget_counts']))])
432-
total_offtargets_processed += sum(guide['offtarget_counts'])
432+
total_offtargets_processed += sum(guide['offtarget_counts'])
433433
writer.writerow(['Off-target Counts: ' + "-".join(map(str,guide['offtarget_counts']))])
434434
writer.writerow(['No mismatches in Seed: ' + "-".join(map(str,guide['offtargets_seed']))])
435435
if guide['MIT'] and not guide['max_exceeded']:
@@ -438,7 +438,7 @@ def writeCsvFiles(self):
438438
writer.writerow(['CFD Score: ' + str(guide['CFD'])])
439439
writer.writerow(['Location', 'Sequence', 'Mismatches', 'Context'])
440440
writer.writerow([self.calculateLocation(guide), self.formatSequence(guide['guide_seq'], guide['pam_seq']), '0', 'guide'])
441-
writer.writerow([str(len(self.guideDict.keys()))+" GUIDES FOUND"])
441+
writer.writerow([str(len(self.guideDict.keys()))+" GUIDES FOUND"])
442442
writer.writerow([str(num_skipped)+" SKIPPED"])
443443
writer.writerow([str(num_exceeded)+" EXCEEDED MAX OFF TARGETS"])
444444
writer.writerow([str(total_offtargets_processed)+" OFF TARGETS FOUND"])
@@ -454,13 +454,13 @@ def writeCsvFiles(self):
454454
row.append('-')
455455
writer.writerow(row)
456456
"""
457-
else:
458-
for guideID, guide in self.guideDict.items():
457+
else:
458+
for guideID, guide in self.guideDict.items():
459459
csv_path = os.path.join(self.dbConnection.ROOT_PATH,'src/guide-finder/tempfiles', self.batchID+"_"+guideID+".csv")
460460
try:
461461
with open(csv_path, mode='w') as csv_file:
462462
writer = csv.writer(csv_file, delimiter=',')
463-
# build and write heading row
463+
# build and write heading row
464464
column_headings = ['chromosome', 'location', 'strand', 'protospacer sequence', 'PAM', 'mismatches', 'context']
465465
if 'MIT' in self.scores and not (guide['max_exceeded'] or guide['skip']):
466466
column_headings.append('MIT')
@@ -469,7 +469,7 @@ def writeCsvFiles(self):
469469
column_headings.append('no mismatches in seed')
470470
writer.writerow(column_headings)
471471

472-
# build and write guide row
472+
# build and write guide row
473473
guide_row = [guide['pam_chrom']]
474474
guide_row.append(self.calculateLocation(guide).split(":")[1])
475475
guide_row.append(guide['strand'])
@@ -487,7 +487,7 @@ def writeCsvFiles(self):
487487
# initialize variables for determining whether offtarget has mismatch in seed
488488
seedDirection = self.rgenRecord['SeedRegion'][0]
489489
seedLength = int(self.rgenRecord['SeedRegion'][1:])
490-
# build and write row for each of the potential off target sites
490+
# build and write row for each of the potential off target sites
491491
for offtarget in guide['offtargets']:
492492
offtarget_row = offtarget['loc'].split(':')
493493
offtarget_row.append(offtarget['seq'])
@@ -510,11 +510,11 @@ def writeCsvFiles(self):
510510
print(guideID)
511511
print(guide)
512512
self.sendError("Error writing off target CSV file, "+str(e))
513-
513+
514514
def getENSID(self):
515515
""" given the gene symbol, return the ENSEMBL ID from the stored gene collection """
516516
geneCollection = self.dbConnection.curr_geneCollection
517-
result = geneCollection.find({"Name": self.gene})
517+
result = geneCollection.find({"Name": self.gene})
518518
if result.count() > 1:
519519
self.sendError("More than one result in the database for gene symbol: " + self.gene)
520520
elif result.count() < 1:
@@ -547,35 +547,35 @@ def performGuideSearch(self):
547547
# TODO: look into a collision-free hashing function so don't have to re-run entire pipeline if inputs don't change
548548
batchID = binascii.b2a_hex(os.urandom(9)).decode('utf-8')
549549

550-
genome_fa = os.path.join(self.dbConnection.ROOT_PATH,'jbrowse', 'data.'+self.genome,"downloads",self.genome+".fa")
550+
genome_fa = os.path.join(self.dbConnection.ROOT_PATH,'jbrowse', 'data', self.genome,"processed",self.genome+".fa")
551551
twoBitToFa_path = os.path.join(self.dbConnection.ROOT_PATH,'bin/twoBitToFa')
552-
genome_2bit = os.path.join(self.dbConnection.ROOT_PATH,'jbrowse', 'data.'+self.genome,"downloads",self.genome+'.2bit')
552+
genome_2bit = os.path.join(self.dbConnection.ROOT_PATH,'jbrowse', 'data', self.genome,"processed",self.genome+'.2bit')
553553
tempfiles_path = os.path.join(self.dbConnection.ROOT_PATH,'src/guide-finder/tempfiles')
554554
if self.cli:
555555
import time
556556
time_0 = time.time()
557557
print("Fetching sequence...")
558558

559-
get_sequence.fetch_sequence(twoBitToFa_path, self.searchInput, genome_2bit, os.path.join(tempfiles_path,batchID+'_out.fa'))
560-
if self.cli:
559+
get_sequence.fetch_sequence(twoBitToFa_path, self.searchInput, genome_2bit, os.path.join(tempfiles_path,batchID+'_out.fa'))
560+
if self.cli:
561561
time_1 = time.time()
562562
print("Finished fetching sequence. " + str(round(time_1-time_0,4)))
563563
print("Determining guides in search region...")
564564

565565
protospacer_length = getattr(self, 'guideLength', 0) # passing 0 indicates default should be used
566566
guideDict = find_grna.find_grna(self.rgenID, protospacer_length, os.path.join(tempfiles_path, batchID+'_out.fa'))
567567

568-
if self.cli:
568+
if self.cli:
569569
time_2 = time.time()
570570
print("Finished finding gRNAs. " + str(round(time_2-time_1,4)))
571571
print("Searching for potential off target sites...")
572572
guideDict = find_offtargets.findOffTargets(guideDict, self.rgenID, self.genome, self.maxOffTargets, batchID, genome_fa, tempfiles_path)
573-
if self.cli:
573+
if self.cli:
574574
time_3 = time.time()
575575
print("Finished finding offtargets. " + str(round(time_3-time_2,4)))
576576
print("Scoring potential off target sites and guides...")
577577
guideDict = score_offtargets.scoreOffTargets(guideDict, self.rgenID)
578-
if self.cli:
578+
if self.cli:
579579
time_4 = time.time()
580580
print("Finished scoring. " + str(round(time_4-time_3,4)))
581581
print("Categorizing potential off target sites...")
@@ -625,12 +625,12 @@ def main():
625625

626626
# only use the max if the checkbox is selected
627627
parameters['maxOffTargets'] = parameters['maxOffTargets'] if parameters['setMax'] == 'true' else None
628-
628+
629629
GuideSearchAndScore(**parameters)
630630
else:
631631
desc = """ The command-line version of GuideSearchAndScore.py will return potential guides along with their scores and off-targets.
632632
"""
633-
633+
634634
parser = argparse.ArgumentParser(prog='GuideSearchAndScore',description=desc)
635635
parser._action_groups.pop()
636636
required = parser.add_argument_group('required arguments')

Diff for: src/guide-finder/core/categorize_offtargets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def categorizeOffTargets(guideDict, rgenID, genome, batchID):
3838
dbConnection = Config(genome)
3939
rgen = getRgenRecord(rgenID, dbConnection)
4040
# construct bed intersect command
41-
segmentsFile = os.path.join(dbConnection.ROOT_PATH, "jbrowse/data/"+genome, "downloads", genome+".segments.bed")
41+
segmentsFile = os.path.join(dbConnection.ROOT_PATH, "jbrowse/data/"+genome, "processed", genome+".segments.bed")
4242
extendedBed = os.path.join(dbConnection.ROOT_PATH, "src/guide-finder/tempfiles", str(batchID)+"_extended.bed")
4343
bedCommand = ["bedtools", "intersect", "-a", extendedBed, "-b", segmentsFile, "-wb"]
4444
p = Popen(bedCommand, stdin=PIPE, stdout=PIPE, stderr=PIPE)

Diff for: src/guide-finder/core/find_offtargets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def convertExtendedBedToFasta(batchID, genome, genome_fa, tempfile_directory):
296296
raise Exception("Error in bedtools getfasta command: " + str(p.stderr))
297297

298298
"""
299-
twoBitTwoFaCommand = [os.path.join(dir_path,"../../../bin/twoBitToFa"), os.path.join(dir_path,"../../../jbrowse/data/mm10/downloads/mm10.2bit"),"-bed="+extendedBed, extendedFasta]
299+
twoBitTwoFaCommand = [os.path.join(dir_path,"../../../bin/twoBitToFa"), os.path.join(dir_path,"../../../jbrowse/data/mm10/processed/mm10.2bit"),"-bed="+extendedBed, extendedFasta]
300300
p = run(twoBitTwoFaCommand,stderr=PIPE)
301301
if p.stderr:
302302
raise Exception("Error in twoBitToFa: "+str(p.stderr))

0 commit comments

Comments
 (0)