I implement the Longest Common Subsequence problem in C#. I need to detect ALL the common maximal subsequences between two strings.
To do this, I create a table using Needleman-Wunsch algorithm to store the LCS sequence for each step of the calculation.
Is there any chance to determine, how many maximal subsequences were found (using a table)?
Depending on this I want to choose a method how to collect each subsequence. The point is, for one subsequence recursion is not required, so it will give a better performance. And its crucial for my task.
Here is a code snippet, where the basic functions from the project are implemented:
private static int[][] GetMatrixLCS(string x, string y)
{
var lenX = x.Length;
var lenY = y.Length;
matrixLCS = new int[lenX + 1][];
for (var i = 0; i < matrixLCS.Length; i++)
{
matrixLCS[i] = new int[lenY + 1];
}
for (int i = 0; i <= lenX; i++)
{
for (int j = 0; j <= lenY; j++)
{
if (i == 0 || j == 0)
matrixLCS[i][j] = 0;
else
if (x[i - 1] == y[j - 1])
matrixLCS[i][j] = matrixLCS[i - 1][j - 1] + 1;
else
matrixLCS[i][j] = Math.Max(matrixLCS[i - 1][j], matrixLCS[i][j - 1]);
}
}
return matrixLCS;
}
static HashSet<string> FindAllLcs(string X, string Y, int lenX, int lenY)
{
var set = new HashSet<string>();
if (lenX == 0 || lenY == 0)
return emptySet;
if (X[lenX - 1] == Y[lenY - 1])
{
var tempResult = FindAllLcs(X, Y, lenX - 1, lenY - 1);
foreach (var temp in tempResult)
set.Add(temp + X[lenX - 1]);
return set;
}
if (matrixLCS[lenX - 1][lenY] >= matrixLCS[lenX][lenY - 1])
set = FindAllLcs(X, Y, lenX - 1, lenY);
if (matrixLCS[lenX][lenY - 1] >= matrixLCS[lenX - 1][lenY])
set.UnionWith(FindAllLcs(X, Y, lenX, lenY - 1));
return set;
}
And the example with two types of inputs and expected outputs:
public void SingleOutput()
{
var sequence = LCS.FindLCS("ABC", "AB");
Assert.AreEqual(1, sequence.Length);
Assert.AreEqual("AB", sequence[0]);
}
public void MultipleOutput()
{
var sequence = LCS.FindLCS("BCAB", "ABC");
Assert.AreEqual(2, sequence.Length);
Assert.AreEqual("AB", sequence [0]);
Assert.AreEqual("BC", sequence [1]);
}
Any help would be strongly appreciated.