Skip to content

Commit 3ebed57

Browse files
committed
Directly download UCSC Genome Browser kent binaries to /usr/local/bin
1 parent d7c95d8 commit 3ebed57

File tree

8 files changed

+20
-24
lines changed

8 files changed

+20
-24
lines changed

Diff for: Dockerfile

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ RUN curl -LO https://jbrowse.org/releases/JBrowse-${JBROWSE_VERSION}/JBrowse-${J
3232
cd /var/www/html/jbrowse && ./setup.sh
3333
RUN pip install --no-cache-dir pymongo==3.8.0 requests==2.22.0 && \
3434
pip3 install --no-cache-dir pymongo==3.8.0 requests==2.20.0 Jinja2==3.1.2
35+
RUN curl -Lo /usr/local/bin/faToTwoBit https://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/faToTwoBit \
36+
-Lo /usr/local/bin/twoBitToFa https://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/twoBitToFa && \
37+
chmod +x /usr/local/bin/faToTwoBit /usr/local/bin/twoBitToFa
3538
COPY config-template /var/www/html/config
3639
WORKDIR /var/www/html
3740
CMD service mongodb start && exec apache2-foreground

Diff for: bin/faToTwoBit

-5.29 MB
Binary file not shown.

Diff for: bin/twoBitToFa

-8.93 MB
Binary file not shown.

Diff for: docker-compose.yaml

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ services:
66
ports:
77
- 80:80
88
tmpfs:
9-
- /run
9+
- /run/apache2
10+
- /run/mongodb
11+
- /run/lock
1012
- /tmp
1113
volumes:
1214
- ./sites-enabled:/etc/apache2/sites-enabled
13-
- ./bin:/var/www/html/bin
1415
- ./docs:/var/www/html/docs
1516
- ./src:/var/www/html/src
1617
- ./jbrowse/data:/var/www/html/jbrowse/data

Diff for: src/guide-finder/GuideSearchAndScore.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/python3
1+
#!/usr/bin/env python3
22

33
"""
44
Hillary Elrick, September 16th, 2019
@@ -548,15 +548,14 @@ def performGuideSearch(self):
548548
batchID = binascii.b2a_hex(os.urandom(9)).decode('utf-8')
549549

550550
genome_fa = os.path.join(self.dbConnection.ROOT_PATH,'jbrowse', 'data', self.genome,"processed",self.genome+".fa")
551-
twoBitToFa_path = os.path.join(self.dbConnection.ROOT_PATH,'bin/twoBitToFa')
552551
genome_2bit = os.path.join(self.dbConnection.ROOT_PATH,'jbrowse', 'data', self.genome,"processed",self.genome+'.2bit')
553552
tempfiles_path = os.path.join(self.dbConnection.ROOT_PATH,'src/guide-finder/tempfiles')
554553
if self.cli:
555554
import time
556555
time_0 = time.time()
557556
print("Fetching sequence...")
558557

559-
get_sequence.fetch_sequence(twoBitToFa_path, self.searchInput, genome_2bit, os.path.join(tempfiles_path,batchID+'_out.fa'))
558+
get_sequence.fetch_sequence(self.searchInput, genome_2bit, os.path.join(tempfiles_path,batchID+'_out.fa'))
560559
if self.cli:
561560
time_1 = time.time()
562561
print("Finished fetching sequence. " + str(round(time_1-time_0,4)))

Diff for: src/guide-finder/core/find_offtargets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def convertExtendedBedToFasta(batchID, genome, genome_fa, tempfile_directory):
296296
raise Exception("Error in bedtools getfasta command: " + str(p.stderr))
297297

298298
"""
299-
twoBitTwoFaCommand = [os.path.join(dir_path,"../../../bin/twoBitToFa"), os.path.join(dir_path,"../../../jbrowse/data/mm10/processed/mm10.2bit"),"-bed="+extendedBed, extendedFasta]
299+
twoBitTwoFaCommand = ["twoBitToFa", os.path.join(dir_path,"../../../jbrowse/data/mm10/processed/mm10.2bit"),"-bed="+extendedBed, extendedFasta]
300300
p = run(twoBitTwoFaCommand,stderr=PIPE)
301301
if p.stderr:
302302
raise Exception("Error in twoBitToFa: "+str(p.stderr))

Diff for: src/guide-finder/core/get_sequence.py

+11-17
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
13
import subprocess
24
import sys
35
import os
@@ -12,31 +14,30 @@
1214

1315
dir_path = os.path.dirname(os.path.abspath(__file__))
1416

15-
def fetch_sequence(twobit_to_fa, chrom_coord, genome_twobit, output_fasta):
17+
def fetch_sequence(chrom_coord, genome_twobit, output_fasta):
1618
'''
17-
This function will return sequence for a given chromosome, start, end from the genome_twobit file
18-
19+
This function will return sequence for a given chromosome, start, end from the genome_twobit file
1920
'''
2021

2122
chrom_match = re.match(r"(.+):(\d+)-(\d+)", chrom_coord)
2223
if chrom_match is None:
2324
sys.exit("Chromosomal coordinate must be in the format chrom:start-end")
24-
25+
2526
chrom, start, end = chrom_match.group(1), chrom_match.group(2), chrom_match.group(3)
2627
# end for twoBitToFa is non-inclusive, add extra base
2728
start = str(int(start)-1)
2829
seq_param, start_param, end_param = '-seq={0}'.format(chrom), '-start={0}'.format(start), '-end={0}'.format(end)
2930
tmp_fasta = os.path.join(dir_path,"tmp.fa")
3031

3132
try:
32-
subprocess.run([twobit_to_fa, seq_param, start_param, end_param, genome_twobit, tmp_fasta])
33+
subprocess.run(["twoBitToFa", seq_param, start_param, end_param, genome_twobit, tmp_fasta])
3334
except Exception as err:
3435
print(err)
3536

3637
if not os.path.isfile(tmp_fasta) or os.path.getsize(tmp_fasta) == 0:
3738
print("twobitToFasta failed: "+" ".join([twobit_to_fa, seq_param, start_param, end_param, genome_twobit, tmp_fasta]))
3839
sys.exit("sequence is empty for given parameters. Please check your parameters again")
39-
40+
4041
with open(tmp_fasta,"r") as inp_fa, open(output_fasta,"w") as out_fa:
4142
out_fa.write(">"+chrom+":"+str(int(start)+1)+"-"+end+"\n")
4243
for line in inp_fa:
@@ -49,15 +50,8 @@ def fetch_sequence(twobit_to_fa, chrom_coord, genome_twobit, output_fasta):
4950
return 1
5051

5152

52-
53-
#main block
5453
if __name__ == "__main__":
55-
56-
if len(sys.argv) !=5:
57-
sys.exit("Need path to twoBittoFa executable, chromosomal coordinates (chr:start-stop), genome_twobit, output fasta file file as arguments.")
58-
59-
twobit_to_fa, chrom_coord, genome_twobit, output_fasta = sys.argv[1:]
60-
fetch_sequence(twobit_to_fa, chrom_coord, genome_twobit, output_fasta)
61-
62-
63-
54+
if len(sys.argv) != 5:
55+
sys.exit("Need chromosomal coordinates (chr:start-stop), genome_twobit, output fasta file as arguments.")
56+
chrom_coord, genome_twobit, output_fasta = sys.argv[1:]
57+
fetch_sequence(chrom_coord, genome_twobit, output_fasta)

Diff for: src/setup/process.sh

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ set -euo pipefail
55
ASSEMBLY="$1"
66

77
SETUP_BIN=$(dirname $(realpath "$0"))
8-
PATH="/var/www/html/bin:/opt/dicey/bin:$PATH"
98

109
gunzip --keep *.fa.gz *.gff*.gz
1110
FASTA=$(echo *.fa) # only one

0 commit comments

Comments
 (0)