Skip to content

Commit 3ffe093

Browse files
committed
Fix Dicey, using Python 3, NamedTemporaryFile, new index paths
1 parent 940bd59 commit 3ffe093

File tree

10 files changed

+104
-197
lines changed

10 files changed

+104
-197
lines changed

Diff for: .gitignore

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
# program/data folders
22
.env
3-
bin/dicey
4-
bin/ncbi-blast-2.7.1+
5-
bin/primer3
63
jbrowse/data*
74
config
85
mongodb

Diff for: config-template/paths.conf

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
11
PRIMER3_EXEC=/usr/bin/primer3_core
22
PRIMER3_CONFIG=/etc/primer3_config
33
BLAST_EXEC=/usr/bin/blastn
4-
DICEY_EXEC=/opt/dicey/bin/dicey
5-
DICEY_PATH=/opt/dicey

Diff for: docker-compose.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,3 @@ volumes:
2222
# After startup, exec into the container and run
2323
# cd /var/www/html/src/setup
2424
# python3 setup.py -r /var/www/html -g Saccharomyces_cerevisiae -v R64-1-1 -fa2bit /var/www/html/bin/faToTwoBit -b /usr/bin
25-
# ./enable_dicey.sh Saccharomyces_cerevisiae R64-1-1

Diff for: src/helpers/Config.py

+31-42
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,28 @@
1414
from pymongo import MongoClient
1515

1616

17-
class Config:
18-
def __init__(self, genome=None):
17+
class Config:
18+
def __init__(self, genome=None):
1919
# get the credentials
2020
self.config = getCredentials()
2121
# the root directory is 2 levels up, get its dirname
2222
self.ROOT_PATH = (os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
2323
# set all the paths defined in ROOT/config/paths.conf
24-
self.setPaths()
24+
self.setPaths()
2525
self.PRIMER3_SETTINGS = os.path.join(self.ROOT_PATH, 'config/primer3settings.conf')
2626
# where the primer3 in and output files are written
2727
self.PRIMER3_DIR = os.path.join(self.ROOT_PATH, 'src/primer-design/files/primer3Files')
28-
28+
2929
if genome:
3030
# connect to the mongoDB
31-
self.genome = genome
31+
self.genome = genome
3232
mongoDB, release, curr_geneCollection, guideCollection, primerCollection, metadataCollection = self.getAttributes()
3333
self.mongoDB = mongoDB
3434
self.release = release
3535
self.curr_geneCollection = curr_geneCollection
3636
self.guideCollection = guideCollection
3737
self.primerCollection = primerCollection
38-
self.metadataCollection = metadataCollection
38+
self.metadataCollection = metadataCollection
3939
self.organismName = self.getOrgName()
4040

4141

@@ -48,24 +48,13 @@ def setPaths(self):
4848
self.PRIMER3 = line.split('=')[1].strip()
4949
elif re.match(r"^PRIMER3_CONFIG=", line):
5050
self.PRIMER3_CONFIG = line.split('=')[1].strip()
51-
elif re.match(r"DICEY_EXEC=", line):
52-
self.DICEY = line.split('=')[1].strip()
53-
elif re.match(r"DICEY_PATH=", line):
54-
self.DICEY_PATH = line.split('=')[1].strip()
5551

5652
if not self.BLAST:
5753
print("Error: path to BLAST executable not defined in paths.conf")
5854
elif not self.PRIMER3:
5955
print("Error: path to primer3 executable not defined in paths.conf")
6056
elif not self.PRIMER3_CONFIG:
6157
print("Error: path to primer3 config directory not defined in paths.conf")
62-
elif not self.DICEY:
63-
print("Error: path to dicey executable not defined in paths.conf")
64-
elif not self.DICEY_PATH:
65-
print("Error: path to dicey program folder not defined in paths.conf")
66-
67-
return
68-
6958

7059
def getAttributes(self):
7160
if self.config:
@@ -74,10 +63,10 @@ def getAttributes(self):
7463
else:
7564
# otherwise, attempt connecting without credentials
7665
client = MongoClient()
77-
66+
7867
db = client[self.genome]
7968
collections = db.collection_names()
80-
69+
8170
# get the release we're using from the most recent geneInfo collection
8271
curr_release = fetchCurrentRelease(self.genome, collections)
8372

@@ -86,23 +75,23 @@ def getAttributes(self):
8675
nameGuideCol = "gRNAResultCollection"
8776
namePrimerCol = "primerCollection"
8877
metadataCol = "metadata"
89-
78+
9079
return db, curr_release, db[nameGeneCol], db[nameGuideCol], db[namePrimerCol], db[metadataCol]
9180

9281

9382
def getOrgName(self):
94-
orgName = ''
83+
orgName = ''
9584
metadataRecords = self.metadataCollection.find({})
9685
if metadataRecords.count() == 1:
97-
for record in metadataRecords:
98-
orgName = str(record['org_name'])
86+
for record in metadataRecords:
87+
orgName = str(record['org_name'])
9988
else:
10089
print("Error: metadata collection for " + str(self.genome) + " is misconfigured")
101-
90+
10291
return orgName
103-
10492

105-
@staticmethod
93+
94+
@staticmethod
10695
def fetchStrandofGene(ensid, genome):
10796
curr_geneCollection = getCurrentGeneCollection(genome)
10897
record = curr_geneCollection.find_one({'ENSID': ensid})
@@ -119,40 +108,40 @@ def getCredentials():
119108
except Exception, e:
120109
# this is ok it just means there are no credentials
121110
return
122-
for line in config_file:
111+
for line in config_file:
123112
var, value = line.rstrip().split("=")
124113
if var == "password":
125114
# passwords need to have special characters escaped
126115
config[var] = urllib.quote(value)
127116
else:
128117
config[var] = value
129-
130-
return config
118+
119+
return config
131120

132121

133122
def getCurrentGeneCollection(genome):
134123
# get credentials and connect to mongodb
135-
config = getCredentials()
124+
config = getCredentials()
136125
if config:
137126
client = MongoClient('mongodb://%s:%s@localhost' % (config['username'], config['password']))
138127
else:
139128
# if no credentials defined
140129
client = MongoClient()
141-
130+
142131
db = client[genome]
143132
collections = db.collection_names()
144-
133+
145134
# get the release we're using from the most recent geneInfo collection
146135
curr_release = fetchCurrentRelease(genome, collections)
147136

148137
# determine the collection name
149138
nameGeneCol = "geneInfo_" + str(curr_release)
150-
139+
151140
return db[nameGeneCol]
152141

153142

154143
def fetchCurrentRelease(genome, collections=None):
155-
144+
156145
# optionally, if function calling already has list of connections, they can be passed
157146
# otherwise, connect to mongodb and get them
158147
if not collections:
@@ -171,7 +160,7 @@ def fetchCurrentRelease(genome, collections=None):
171160
cltn_release = c.split("_",1)[1]
172161
if int(cltn_release) > int(curr_release):
173162
curr_release = cltn_release
174-
163+
175164
return curr_release
176165

177166

@@ -183,11 +172,11 @@ def fetchInstalledGenomes():
183172
else:
184173
client = MongoClient()
185174

186-
genomes = []
175+
genomes = []
187176
dbs = sorted(client.database_names(), key=lambda v: v.upper())
188177
default_dbs = ['admin', 'local','config']
189178
for db in dbs:
190-
if db not in default_dbs:
179+
if db not in default_dbs:
191180
metadataCollection = client[db]['metadata']
192181
metadataRecords = metadataCollection.find({})
193182
if metadataRecords.count() == 1:
@@ -196,7 +185,7 @@ def fetchInstalledGenomes():
196185
genomes.append((db, orgName)) # store org code and name in list as tuple
197186
else:
198187
print("Error: metadata collection for " + str(db) + " is misconfigured")
199-
return genomes
188+
return genomes
200189

201190

202191
def main():
@@ -208,11 +197,11 @@ def main():
208197
print "Databases currently available:"
209198
genomes = fetchInstalledGenomes()
210199
for g in genomes:
211-
orgName = g[1]
200+
orgName = g[1]
212201
genomeString = g[0] + " (" + orgName + ")"
213202
print genomeString
214-
215-
203+
204+
216205
if __name__ == "__main__":
217206
main()
218-
207+

Diff for: src/helpers/Config3.py

-11
Original file line numberDiff line numberDiff line change
@@ -59,24 +59,13 @@ def setPaths(self):
5959
self.PRIMER3 = line.split('=')[1].strip()
6060
elif re.match(r"^PRIMER3_CONFIG=", line):
6161
self.PRIMER3_CONFIG = line.split('=')[1].strip()
62-
elif re.match(r"DICEY_EXEC=", line):
63-
self.DICEY = line.split('=')[1].strip()
64-
elif re.match(r"DICEY_PATH=", line):
65-
self.DICEY_PATH = line.split('=')[1].strip()
6662

6763
if not self.BLAST:
6864
print("Error: path to BLAST executable not defined in paths.conf")
6965
elif not self.PRIMER3:
7066
print("Error: path to primer3 executable not defined in paths.conf")
7167
elif not self.PRIMER3_CONFIG:
7268
print("Error: path to primer3 config directory not defined in paths.conf")
73-
elif not self.DICEY:
74-
print("Error: path to dicey executable not defined in paths.conf")
75-
elif not self.DICEY_PATH:
76-
print("Error: path to dicey program folder not defined in paths.conf")
77-
78-
return
79-
8069

8170
def getRGENs(self):
8271
" return the rgen collection"

Diff for: src/primer-design/classes/Dicey.py

+48-81
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,69 @@
1-
#!/usr/bin/python
1+
#!/usr/bin/env python3
22

33
"""
44
Hillary Elrick February 4th, 2019
55
66
Class definition to ease & organize access to Dicey in silico PCR tool
77
"""
88

9-
import sys
10-
import re
11-
import os
12-
import subprocess
13-
import time
14-
import json
9+
import json, os, subprocess, sys
10+
from tempfile import NamedTemporaryFile
11+
from typing import Any, Dict, List, Optional
1512

1613
# get the global root path from the Config object
1714
sys.path.append("..")
18-
from Config import Config
15+
from Config3 import Config
16+
1917

2018
class Dicey:
2119
"""
2220
Dicey is used to run in silico PCR on a pair of primers.
23-
Default temperature for the primers is 45C
2421
"""
25-
def __init__(self, sequences, temp='45', genome='mm10'):
26-
self.Config = Config()
27-
sys.path.insert(0, self.Config.DICEY_PATH)
22+
def __init__(self, sequences: List[str], temperature: int, genome: str):
2823
assert len(sequences) == 2, "Exactly two primers required"
24+
self.config = Config()
2925
self.sequences = sequences
30-
self.temp = temp # temperature
31-
self.genomePath = self.Config.DICEY_PATH+"/indexes/"+genome+"/"+genome+".fa.gz"
32-
self.tempfile = self.createTempFile()
33-
self.diceyCommand = self.constructDiceyCommand()
26+
self.temperature = temperature
27+
self.genome_path = os.path.join(self.config.ROOT_PATH, "jbrowse", "data", genome, "processed", genome + ".fa")
3428

35-
36-
def constructDiceyCommand(self):
29+
@property
30+
def command(self) -> List[str]:
3731
"""
38-
Returns the Dicey command with reference to the locally installed Primer3 in addition
39-
to the genome of interest and minimum temperature to consider for binding
32+
Returns the Dicey command prefix with reference to the locally installed Primer3 in addition
33+
to the genome of interest and minimum temperature to consider for binding.
34+
The final sequences.fasta parameter is not included.
4035
"""
41-
# base shell command
42-
diceyCommand = [self.Config.DICEY]
43-
diceyCommand.append('search')
44-
45-
# provide location of primer3 config directory
46-
diceyCommand.append('-i')
47-
diceyCommand.append(self.Config.PRIMER3_CONFIG)
48-
49-
# add in the temperature
50-
diceyCommand.append('-c')
51-
diceyCommand.append(self.temp)
52-
53-
# add in the genome
54-
diceyCommand.append('-g')
55-
diceyCommand.append(self.genomePath)
56-
57-
# add in temp file
58-
diceyCommand.append(self.tempfile)
59-
60-
return diceyCommand
61-
62-
63-
def createTempFile(self):
64-
filename = str(self.sequences[0])+"_"+str(self.sequences[1])+(time.strftime("%Y-%m-%d-%H:%M:%S"))
65-
f = open(os.path.join(self.Config.DICEY_PATH, "dicey_tempfiles", filename), "w+")
66-
67-
faFormat = '>leftPrimer'
68-
faFormat += '\n' + str(self.sequences[0])
69-
faFormat += '\n>rightPrimer'
70-
faFormat += '\n' + str(self.sequences[1])
71-
72-
f.write(faFormat)
73-
f.close()
74-
75-
return f.name
76-
77-
78-
def deleteTempFile(self):
79-
if os.path.exists(self.tempfile):
80-
os.remove(self.tempfile)
81-
82-
83-
def runSequences(self):
84-
diceyProcess = subprocess.Popen(self.diceyCommand, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
85-
(diceyOut, diceyErr) = diceyProcess.communicate()
86-
87-
try:
88-
# decode json
89-
jsonResult = json.loads(diceyOut)
90-
if 'errors' in jsonResult and len(jsonResult['errors']) > 0:
91-
print("Error(s) from Dicey program: ")
92-
for error in jsonResult['errors']:
93-
print(error['title'])
36+
return [
37+
"dicey",
38+
"search",
39+
"--config",
40+
self.config.PRIMER3_CONFIG,
41+
"--cutTemp",
42+
self.temperature,
43+
"--genome",
44+
self.genome_path
45+
]
46+
47+
48+
def run(self) -> Optional[Dict[Any]]:
49+
with NamedTemporaryFile(mode="w", encoding="utf-8") as sequences:
50+
sequences.write(f">leftPrimer\n{self.sequences[0]}\n>rightPrimer\n{self.sequences[1]}\n")
51+
sequences.flush()
52+
dicey_process = subprocess.run(self.command + [sequences.name], capture_output=True)
53+
# If exceptions are okay then switch to check=True
54+
if dicey_process.returncode:
55+
print(dicey_process.stdout)
56+
print(dicey_process.stderr)
9457
return
95-
data = jsonResult['data']
96-
except Exception, e:
97-
print("Error reading Dicey results: "+str(e))
98-
return
99-
100-
return data
101-
102-
58+
else:
59+
try:
60+
jsonResult = json.loads(dicey_process.stdout)
61+
if 'errors' in jsonResult and len(jsonResult['errors']) > 0:
62+
print("Error(s) from Dicey program: ")
63+
for error in jsonResult['errors']:
64+
print(error['title'])
65+
return
66+
return jsonResult['data']
67+
except Exception as e:
68+
print("Error reading Dicey results: " + str(e))
69+
return

0 commit comments

Comments
 (0)