Skip to content

Commit d77468a

Browse files
authored
Create Nussinov.java
Initial commit: Implemented Nussinov algorithm for RNA secondary structure prediction - Added main class `Nussinov` for RNA secondary structure prediction using dynamic programming. - Implemented base pair compatibility checks and dynamic programming matrix setup. - Included support for reconstructing the RNA folding structure. - Added evaluation function to score the correctness of RNA folding predictions. This implementation aims to maximize base-pair compatibility in RNA sequences.
1 parent 03bb8ee commit d77468a

File tree

1 file changed

+166
-0
lines changed

1 file changed

+166
-0
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import es.uma.eda.problem.combinatorial.sequence.folding.RNASecondaryStructurePredictor;
2+
3+
/**
4+
* Implementation of the Nussinov algorithm for RNA secondary structure prediction by
5+
* base-pair maximization using dynamic programming.
6+
*/
7+
public class Nussinov extends RNASecondaryStructurePredictor {
8+
/**
9+
* Strings describing compatible base pairs: basepairs1(i) is compatible with basepairs2(i)
10+
*/
11+
private final static String basepairs1 = "AUUCGG";
12+
private final static String basepairs2 = "UAGGCU";
13+
14+
/**
15+
* Default constructor
16+
*/
17+
public Nussinov() {
18+
}
19+
20+
/**
21+
* Returns true iff nucleotides b1 and b2 are compatible for pairing
22+
* @param b1 a nucleotide (A, C, G, U)
23+
* @param b2 a nucleotide (A, C, G, U)
24+
* @return true iff nucleotides b1 and b2 are compatible for pairing
25+
*/
26+
protected boolean isCompatible (char b1, char b2) {
27+
b1 = Character.toUpperCase(b1);
28+
b2 = Character.toUpperCase(b2);
29+
for (int i=0; i<basepairs1.length(); i++)
30+
if ((basepairs1.charAt(i) == b1) && (basepairs2.charAt(i) == b2))
31+
return true;
32+
33+
return false;
34+
}
35+
36+
@Override
37+
public String getName() {
38+
return "Nussinov algorithm";
39+
}
40+
41+
@Override
42+
protected String _run(String rnaSeq, int minLoopSize) {
43+
int n = rnaSeq.length(); // number of nucleotides in the sequence
44+
int[][] M = new int[n][n+1]; // to store costs
45+
int[][] D = new int[n][n]; // to store decisions
46+
boolean[][] B = new boolean[n][n]; // to precompute which base pairs match
47+
48+
for (int i=0; i<n; i++)
49+
for (int j=i+1; j<n; j++)
50+
B[i][j] = isCompatible(rnaSeq.charAt(i), rnaSeq.charAt(j));
51+
52+
// Base case: M(i,j) = 0 if j <= i+minLoopSize
53+
for (int i=0; i<n; i++) {
54+
M[i][i] = 0; // M(i,i-1) = 0
55+
for (int j=i, k=0; (k<=minLoopSize) && (j<n); j++, k++) {
56+
M[i][j+1] = 0; // M(i,j) = 0;
57+
D[i][j] = -1; // -1 => unpaired
58+
}
59+
}
60+
61+
// General case: M(i,j) = max(M(i,j-1), max (M(i, k-1) + M(k+1,j-1) + 1 | i <= k < j-minLoopSize, s_k and s_j are complementary)) if j > i+minLoopSize
62+
for (int l = minLoopSize + 1; l < n; l++) { // iterate possible lengths (j - i)
63+
for (int i = 0; i + l < n; i++) { // iterate through sequence
64+
int j = i + l;
65+
M[i][j + 1] = M[i][j]; // option 1: do not pair s_j
66+
D[i][j] = -1; // initially, unpaired
67+
68+
// Option 2: find max possible pairing with previous nucleotides
69+
for (int k = i; k < j - minLoopSize; k++) {
70+
if (B[k][j]) { // if nucleotides are compatible
71+
int pairs = M[i][k] + M[k + 1][j] + 1;
72+
if (pairs > M[i][j + 1]) {
73+
M[i][j + 1] = pairs;
74+
D[i][j] = k; // pair s_k with s_j
75+
}
76+
}
77+
}
78+
}
79+
}
80+
81+
if (verbosity) {
82+
System.out.println("\nScore matrix:\n");
83+
84+
System.out.print(" \t|");
85+
for (int j=0; j<=n; j++)
86+
System.out.print("\t"+(j-1));
87+
System.out.print("\n \t|\t ");
88+
for (int j=1; j<=n; j++)
89+
System.out.print("\t"+rnaSeq.charAt(j-1));
90+
System.out.print("\n---- \t+");
91+
for (int j=0; j<=n; j++)
92+
System.out.print("\t----");
93+
for (int i=0; i<n; i++) {
94+
System.out.print("\n"+i+ " " + rnaSeq.charAt(i)+"\t|\t");
95+
for (int j=0; j<i; j++) {
96+
System.out.print(" \t");
97+
}
98+
for (int j=i; j<=n; j++) {
99+
System.out.format("%d \t", M[i][j]);
100+
}
101+
}
102+
103+
System.out.println("\n\nDecision matrix:\n");
104+
105+
System.out.print(" \t|");
106+
for (int j=0; j<n; j++)
107+
System.out.print("\t"+j);
108+
System.out.print("\n \t|");
109+
for (int j=0; j<n; j++)
110+
System.out.print("\t"+rnaSeq.charAt(j));
111+
System.out.print("\n---- \t+");
112+
for (int j=0; j<n; j++)
113+
System.out.print("\t----");
114+
for (int i=0; i<n; i++) {
115+
System.out.print("\n"+i+ " " + rnaSeq.charAt(i)+"\t|\t");
116+
for (int j=0; j<n; j++) {
117+
System.out.format("%d \t", D[i][j]);
118+
}
119+
}
120+
System.out.println("\n");
121+
}
122+
123+
// Reconstruction of the optimal solution
124+
return reconstructSolution(D, 0, n-1);
125+
}
126+
127+
128+
private String reconstructSolution (int[][] D, int i, int j) {
129+
if (i >= j) {
130+
return ".".repeat(Math.max(0, j - i + 1)); // no possible pairs
131+
}
132+
133+
if (D[i][j] == -1) {
134+
return reconstructSolution(D, i, j - 1) + "."; // no pairing at j
135+
} else {
136+
int k = D[i][j];
137+
return reconstructSolution(D, i, k - 1) + "(" + reconstructSolution(D, k + 1, j - 1) + ")"; // pair s_k with s_j
138+
}
139+
}
140+
141+
@Override
142+
public double evaluate(String rnaSeq, String folding) {
143+
if (rnaSeq.length() != folding.length()) // error: the folding string does not match the RNA sequence
144+
return -1.0;
145+
double contacts = 0.0;
146+
int n = folding.length();
147+
int open = 0;
148+
149+
for (int i=0; i<n; i++)
150+
switch (folding.charAt(i)) {
151+
case '.': break;
152+
case '(': contacts += 1.0;
153+
open++;
154+
break;
155+
case ')': open --;
156+
if (open < 0) // error: parentheses are not balanced
157+
return -1.0;
158+
break;
159+
default: return -1.0; // error: unknown symbol found
160+
}
161+
if (open > 0) // error: parentheses are not balanced
162+
contacts = -1.0;
163+
return contacts;
164+
}
165+
166+
}

0 commit comments

Comments
 (0)