The optimum cost of an alignment of the strings x1 x2 x3 x4 ... x_m and y1 y2 y3
ID: 3856321 • Letter: T
Question
The optimum cost of an alignment of the strings
x1 x2 x3 x4 ... x_m and y1 y2 y3 ... y_n
will always be greater than the optimum cost of an alignment of
x2 x3 x4 ... x_m and y1 y2 y3 ... y_n
because any alignment of the first pair of strings necessarily contains an alignment of the second pair of strings.
This is NOT correct!
It is NOT true that any alignment of the first pair of strings necessarily contains an alignment of the second pair of strings: For example, let x = CT and let y = CG. It is NOT true that any alignment of these start with x1 = C against a gap, followed by an alignment of x2 against y1 y2 (i.e., an alignment of T against CG). Here is such an alignment:
A T
A G
This alignment has cost 1, whereas any alignment that starts with the first A in x against a gap necessarily will have cost at least 2 for that first gap, and optimally has cost 5:
A T -
- A G
Is it possible to have a situation in table "opt" where x_i is the same character as y_j and have
opt[ i ][ j ] = 2 + opt[ i+1][ j ] = 2 + opt[ i ][ j+1] = 0 + opt[ i + 1][ j + 1]
(In other words, when creating the alignment, we could have come from ANY of the 3 neighboring squares below and to the right?
can you give me another example of same type?
Explanation / Answer
#!/usr/bin/python -O
import numpy as np
from numpy import array
A, C, G, T = 0, 1, 2, 3
int_to_char = {0:'A', 1:'C', 2:'G', 3:'T'}
#indel = -1
#scoring = array([[1,-1,-1,-1],
#[-1,1,-1,-1],
#[-1,-1,1,-1],
#[-1,-1,-1,1]])
indel = -5
scoring = array([[2,-4,-1,-4],
[-4,2,-4,-1],
[-1,-4,2,-4],
[-4,-1,-4,2]])
class AlignmentFinder(object):
def __init__(self, seq1, seq2):
self.seq1 = seq1
self.seq2 = seq2
self.D = None
def find_gobal_alignment(self):
self.D = np.zeros((self.seq1.size+1, self.seq2.size+1), dtype=np.int16)
self._compute_array()
print self.D
return self._traceback()
def _compute_array(self):
for i in xrange(self.seq1.size+1):
self.D[i,0] = i*indel
for j in xrange(self.seq2.size+1):
self.D[0,j] = j*indel
for i in xrange(1, self.seq1.size+1):
for j in xrange(1, self.seq2.size+1):
self.D[i,j] = max( self.D[i-1, j-1] + self._get_score(i, j),
self.D[i-1, j] + indel,
self.D[i, j-1] + indel)
def _get_score(self, i, j):
''' The indexing is quite tricky because the matrix as one more row & column.
That causes a shift between the matrix index and the sequence indices.
Therefore, to obtain the correct nucleotide in the sequence, we must
substract 1 to the matrix index. '''
return scoring[self.seq1[i-1], self.seq2[j-1]]
def _get_aligned_pair(self, i, j):
n1 = int_to_char[self.seq1[i-1]] if i>0 else '_'
n2 = int_to_char[self.seq2[j-1]] if j>0 else '_'
return (n1, n2)
def _traceback(self):
alignment= []
i = self.seq1.size
j = self.seq2.size
while i >0 and j>0:
if self.D[i-1, j-1] + self._get_score(i, j) == self.D[i,j]:
alignment.append(self._get_aligned_pair(i, j))
i -= 1
j -= 1
elif self.D[i-1, j] + indel == self.D[i,j]:
alignment.append(self._get_aligned_pair(i, 0))
i -= 1
else:
alignment.append(self._get_aligned_pair(0, j))
j -= 1
while i > 0:
alignment.append(self._get_aligned_pair(i, 0))
i -= 1
while j > 0:
alignment.append(self._get_aligned_pair(0, j))
j -= 1
alignment.reverse()
return alignment
def print_sequences(pairs):
top_seq = []
bottom_seq = []
for (b, t) in pairs:
bottom_seq.append(b)
top_seq.append(t)
for n in top_seq:
print n,
print ' '
for n in bottom_seq:
print n,
if __name__ == "__main__":
s1 = array([G, T, A, C, A, G, T, A], dtype=np.int16)
s2 = array([G, G, T, A, C, G, T], dtype=np.int16)
aligner = AlignmentFinder(s1, s2)
pairs = aligner.find_gobal_alignment()
print_sequences(pairs)
while i > 1 and j > 1 do
if D[i,j] - sim_mat[BToInt(s[j-1]),BToInt(t[i-1])] = D[i-1,j-1] then
t_aln := t[i-1].t_aln:
s_aln := s[j-1].s_aln:
i := i-1: j := j-1:
elif D[i,j] - gap_score = D[i,j-1] then
s_aln := s[j-1].s_aln:
t_aln := '_'.t_aln:
j := j-1:
elif D[i,j] - gap_score = D[i-1,j] then
s_aln := '_'.s_aln:
t_aln := t[i-1].t_aln:
i := i-1:
else
error('should not happen'):
fi:
od:
OR
protected void fillInCell(Cell currentCell, Cell cellAbove, Cell cellToLeft,
Cell cellAboveLeft) {
int rowSpaceScore = cellAbove.getScore() + space;
int colSpaceScore = cellToLeft.getScore() + space;
int matchOrMismatchScore = cellAboveLeft.getScore();
if (sequence2.charAt(currentCell.getRow() - 1) == sequence1
.charAt(currentCell.getCol() - 1)) {
matchOrMismatchScore += match;
} else {
matchOrMismatchScore += mismatch;
}
if (rowSpaceScore >= colSpaceScore) {
if (matchOrMismatchScore >= rowSpaceScore) {
currentCell.setScore(matchOrMismatchScore);
currentCell.setPrevCell(cellAboveLeft);
} else {
currentCell.setScore(rowSpaceScore);
currentCell.setPrevCell(cellAbove);
}
} else {
if (matchOrMismatchScore >= colSpaceScore) {
currentCell.setScore(matchOrMismatchScore);
currentCell.setPrevCell(cellAboveLeft);
} else {
currentCell.setScore(colSpaceScore);
currentCell.setPrevCell(cellToLeft);
}
}
}
protected Object getTraceback() {
StringBuffer align1Buf = new StringBuffer();
StringBuffer align2Buf = new StringBuffer();
Cell currentCell = getTracebackStartingCell();
while (traceBackIsNotDone(currentCell)) {
if (currentCell.getRow() - currentCell.getPrevCell().getRow() == 1) {
align2Buf.insert(0, sequence2.charAt(currentCell.getRow() - 1));
} else {
align2Buf.insert(0, '-');
}
if (currentCell.getCol() - currentCell.getPrevCell().getCol() == 1) {
align1Buf.insert(0, sequence1.charAt(currentCell.getCol() - 1));
} else {
align1Buf.insert(0, '-');
}
currentCell = currentCell.getPrevCell();
}
String[] alignments = new String[] { align1Buf.toString(),
align2Buf.toString() };
return alignments;
}
protected abstract boolean traceBackIsNotDone(Cell currentCell);
protected abstract Cell getTracebackStartingCell();
protected void fillInCell(Cell currentCell, Cell cellAbove, Cell cellToLeft,
Cell cellAboveLeft) {
int rowSpaceScore = cellAbove.getScore() + space;
int colSpaceScore = cellToLeft.getScore() + space;
int matchOrMismatchScore = cellAboveLeft.getScore();
if (sequence2.charAt(currentCell.getRow() - 1) == sequence1
.charAt(currentCell.getCol() - 1)) {
matchOrMismatchScore += match;
} else {
matchOrMismatchScore += mismatch;
}
if (rowSpaceScore >= colSpaceScore) {
if (matchOrMismatchScore >= rowSpaceScore) {
if (matchOrMismatchScore > 0) {
currentCell.setScore(matchOrMismatchScore);
currentCell.setPrevCell(cellAboveLeft);
}
} else {
if (rowSpaceScore > 0) {
currentCell.setScore(rowSpaceScore);
currentCell.setPrevCell(cellAbove);
}
}
} else {
if (matchOrMismatchScore >= colSpaceScore) {
if (matchOrMismatchScore > 0) {
currentCell.setScore(matchOrMismatchScore);
currentCell.setPrevCell(cellAboveLeft);
}
} else {
if (colSpaceScore > 0) {
currentCell.setScore(colSpaceScore);
currentCell.setPrevCell(cellToLeft);
}
}
}
if (currentCell.getScore() > highScoreCell.getScore()) {
highScoreCell = currentCell;
}
}