I need some help finding the flaw in my Needleman-Wunsch algorithm implementation. Right now, I'm focusing on one problem at a time, and the problem I face is when calculating my optimal alignment score it's off and I can't seem to pinpoint why. I have gone over and over the logic, done it by pen and paper and I have no idea what I'm doing wrong. I searched around and I can't find anyone who has a problem calculating their alignment with InDel extensions (which is my problem). I have considered the possibility that my test data is corrupt, but I don't think it's that. My implementation works with a match/mismatch of +3/-3, InDel of -8, and an InDel Extension of -1. I have tried to thoroughly document my code as I write so I can understand my train of thought. If anyone could please help me, it'd be much appreciated. Currently, I am getting -1 when I should be getting -3 (assuming my test data is correct which it should be because it was provided specifically for debug).
Sorry everyone, I forgot to mention, the way I debugged this was by replacing my scoring values with values from wikipedia's NWA page. I was able to reproduce their matrix with my code.
using System;
using System.IO;
namespace Program
{
class Driver
{
static int Main(string[] args)
{
//Implementation of Needleman-Wunsch algorithm, Match +3, Mismatch -3, InDel -8, InDel Extension -1
/*using (StreamReader reader = File.OpenText(args[0]))
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
if (null == line)
continue;
//split line by Pipe |*/
string line = "GCATGCT | GATTACA"; //Answer should be -3
//string line = "GAAAAAAT | GAAT"; //Answer should be 1
//Split our 2 sequences by pipe
string[] arg = line.Split('|');
//Strip all white space from both sequences and assign our sequences to A and B
string A = arg[1].Trim();
string B = arg[0].Trim();
// We add +1 to the length of our strings to allow for the gap in the Needleman-Wunsch matrix
Cell[,] scoringMatrix = new Cell[A.Length + 1, B.Length + 1];
// We begin initiliazation of our N-W matrix starting from 0,0
int i = 0, j = 0;
while (i < scoringMatrix.GetLength(0))
{
while (j < scoringMatrix.GetLength(1))
{
if (i == 0 && j == 0) // This is our origin (0,0)
scoringMatrix[i, j] = new Cell();
else if (i == 0) // First row which is simply j * gap
scoringMatrix[i, j] = new Cell(i, j, null, scoringMatrix[i, j - 1], null, A, B);
else if (j == 0) // First column which is i * gap
scoringMatrix[i, j] = new Cell(i, j, scoringMatrix[i - 1, j], null, null, A, B);
else
scoringMatrix[i, j] = new Cell(i, j, scoringMatrix[i - 1, j], scoringMatrix[i, j - 1], scoringMatrix[i - 1, j - 1], A, B);
j++;
}
j = 0;
i++;
}
// Now we look at our scoringMatrix for the last cell of our last column and begin traceback
Console.WriteLine(scoringMatrix[A.Length, B.Length].C());
//}
return 0;
}
}
// This class represents each individual cell within the N-W matrix
class Cell
{
private int score;
private int row;
private int col;
char charA;
char charB;
private bool isGap;
private Source source;
public Cell()
{
// Default constructor for our matrix origin (0,0)
score = 0;
col = 0;
row = 0;
isGap = false;
source = Source.origin;
charA = '-';
charB = '-';
}
public Cell(int r, int c, Cell t, Cell l, Cell d, string A, string B)
{
if (r == 0)
{
// Special logic for our first row, D is our gap value which is either -8 for InDel OR -1 for InDel Extension
// We do not consider this row as gaps because they are givens
int D = c == 1 ? -8 : -1;
row = r;
col = c;
score = l.score + D;
isGap = false;
source = Source.left;
charA = '-';
charB = B[c - 1];
}
else if (c == 0)
{
// Special logic for our first column, D is our gap value which is either -8 for InDel OR -1 for InDel Extension
// As above, we do not consider this column as gaps because they are givens
int D = r == 1 ? -8 : -1;
row = r;
col = c;
score = t.score + D;
isGap = false;
source = Source.top;
charA = A[r - 1];
charB = '-';
}
else
{
// To determine our gap costs we look to our left and see if this is an extension or a new instance of gap
int D = l.isGap ? -1 : -8;
// We determine our top sum score using the cell above us score C() plus our predetermined gap value D
int top = t.C() + D;
// We determine our left sum score using the cell left of us score C() plus our predetermined gap value D
int left = l.C() + D;
// For debugging purposes we are saving which characters we are comparing
charA = A[r - 1];
charB = B[c - 1];
// We determine our diagonal sum score using the left diagonal of us score C() plus our match/mismatch value from S(A[i],B[j])
int diag = d.C() + S(charA, charB);
row = r;
col = c;
// The right decision is made by choosing the max of the 3 computed scores top, left, and diag
score = Math.Max(top, Math.Max(left, diag));
// We determine this cell to be a gap IF top or left is chosen (i.e. top OR left > diag)
isGap = determineGap(top, left, diag);
// We assign our source based on which of these three was chosen
source = findSource(top, left, diag);
}
}
public int S(char A, char B)
{
// We determine our match/mismatch value by checking if these 2 characters match, this value is either +3 or -3
return (A == B) ? 3 : -3;
}
public int C()
{
// Return our score
return score;
}
private bool determineGap(int top, int left, int diag)
{
// If the diagonal cell is greater than both left and top then we are definitely NOT going to have a gap
if (diag > top && diag > left)
return false;
else
return true;
}
private Source findSource(int top, int left, int diag)
{
// Here we need to determine where did we get our score from, we cover all the possible permutations
if (diag > top && diag > left)
return Source.diag;
else if (top > diag && top > left)
return Source.top;
else if (left > diag && left > top)
return Source.left;
else if (left == diag || top == diag)
return Source.diag;
else
return Source.origin;
}
public enum Source
{
// Which cell did our score come from
origin,
left,
top,
diag
}
public bool isOrigin()
{
// Is this our origin cell
return source == Source.origin ? true : false;
}
public string mySource()
{
// Convert the enum to a string
return source.ToString();
}
}
}