Skip to content

Commit de47a89

Browse files
committed
Slight cleanup create_segments
1 parent 687e234 commit de47a89

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/setup/create_segments.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
15
fai_file=$1
26
gff3_file=$2
37
org=$3
@@ -20,7 +24,7 @@ awk 'BEGIN{OFS="\t";} {split($9,i,"Name="); print $1,$4-1,$5,"pseudogene:"i[2]}'
2024
# combine and sort all the genomic regions
2125
cat ${work_dir}/genes.bed ${work_dir}/ncRNA_genes.bed ${work_dir}/pseudogenes.bed | sort -k1,1 -k2,2n > ${work_dir}/genomic_regions.bed
2226
# complement genomic regions with the genome to get intergenic regions (need to merge features 1bp away to prevent bedtools from making malformed complementary regions)
23-
bedtools merge -i ${work_dir}/genomic_regions.bed -d 1 | bedtools complement -i - -g ${work_dir}/${org}.genome | awk '{print $0"\tintergenic"}' > ${work_dir}/intergenic.bed
27+
bedtools merge -i ${work_dir}/genomic_regions.bed -d 1 | bedtools complement -i - -g ${work_dir}/${org}.genome | awk '{print $0"\tintergenic"}' > ${work_dir}/intergenic.bed
2428

2529
# get exons
2630
grep -oP "(chr\S+)\s+\S+\s+exon\s+\d+\s+\d+.+Parent=[^;]+" $gff3_file | grep -Ff ${work_dir}/${org}.chr >${work_dir}/exons.gff3
@@ -32,7 +36,3 @@ bedtools subtract -a ${work_dir}/genomic_regions.bed -b ${work_dir}/exons.bed |
3236
# to self: maybe should just cat the exons, introns, and intergenic? this will mean that the exons won't have a gene, just a transcript id
3337
# cat the genomic regions with the everything else together and sort
3438
cat ${work_dir}/genomic_regions.bed ${work_dir}/exons.bed ${work_dir}/introns.bed ${work_dir}/intergenic.bed | sort -k1,1 -k2,2n > ${work_dir}/${org}.segments.bed
35-
36-
37-
38-

0 commit comments

Comments
 (0)