4

I've written the following code in C# for obtaining the length of longest common subsequence of two texts given by use, but it doesn't work with large strings. Could you please help me. I'm really confused.

public Form1()
{
    InitializeComponent();
}

public int lcs(char[] s1, char[] s2, int s1size, int s2size)
{
    if (s1size == 0 || s2size == 0)
    {
        return 0;
    }
    else
    {
        if (s1[s1size - 1] == s2[s2size - 1])
        {
            return (lcs(s1, s2, s1size - 1, s2size - 1) + 1);
        }
        else
        {
            int x = lcs(s1, s2, s1size, s2size - 1);
            int y = lcs(s1, s2, s1size - 1, s2size);
            if (x > y)
            {
                return x;
            }
            else
                return y;
        }
    }
}

private void button1_Click(object sender, EventArgs e)
{
    string st1 = textBox2.Text.Trim(' ');
    string st2 = textBox3.Text.Trim(' ');

    char[] a = st1.ToCharArray();
    char[] b = st2.ToCharArray();

    int s1 = a.Length;
    int s2 = b.Length;

    textBox1.Text = lcs(a, b, s1, s2).ToString(); 
}
Kevin Brechbühl
  • 4,717
  • 3
  • 24
  • 47
ilia7
  • 49
  • 1
  • 9
  • How large your strings? And why `c` tag? – Soner Gönül Feb 15 '14 at 12:36
  • using `recursive functions` cause to happen this problem friend, is it possible to don't use recursive functions? – Hamidreza Feb 15 '14 at 12:50
  • The shortest way to find out would be find all iterations of the words and use the contains string function with the second string. That would give the answer and you can obtain the length from that. – Ram Mehta Feb 15 '14 at 12:57

4 Answers4

9

Here you are using the Recursion method. So it leads the program to occur performance problems as you mentioned.

Instead of recursion, use the dynamic programming approach.

Here is the C# Code.

public static void LCS(char[] str1, char[] str2)
    {
        int[,] l = new int[str1.Length, str2.Length];
        int lcs = -1;
        string substr = string.Empty;
        int end = -1;

        for (int i = 0; i < str1.Length; i++)
        {
            for (int j = 0; j < str2.Length; j++)
            {
                if (str1[i] == str2[j])
                {
                    if (i == 0 || j == 0)
                    {
                        l[i, j] = 1;
                    }
                    else
                        l[i, j] = l[i - 1, j - 1] + 1;
                    if (l[i, j] > lcs)
                    {
                        lcs = l[i, j];
                        end = i;
                    }

                }
                else
                    l[i, j] = 0;
            }
        }

        for (int i = end - lcs + 1; i <= end; i++)
        {
            substr += str1[i];
        }

        Console.WriteLine("Longest Common SubString Length = {0}, Longest Common Substring = {1}", lcs, substr);
    } 
Tharindu Kumara
  • 4,398
  • 2
  • 28
  • 46
  • The question is tagged `C#`. You should be providing `C#` code. – ajay Feb 15 '14 at 12:43
  • I guess this is a common algorithm - do you have the name (or chapter in Cormen et al. or any other reference worthy link)?. – mbx Sep 14 '17 at 13:17
  • This code does not work given example `LCS("OPT".ToCharArray(), "Optional Practical Training".ToCharArray())`. It prints `Longest Common SubString Length = 1, Longest Common Substring = O` – Brian Dec 14 '17 at 18:09
3

Here is a solution how to find the longest common substring in C#:

public static string GetLongestCommonSubstring(params string[] strings)
{
    var commonSubstrings = new HashSet<string>(strings[0].GetSubstrings());
    foreach (string str in strings.Skip(1))
    {
        commonSubstrings.IntersectWith(str.GetSubstrings());
        if (commonSubstrings.Count == 0)
            return string.Empty;
    }

    return commonSubstrings.OrderByDescending(s => s.Length).DefaultIfEmpty(string.Empty).First();
}

private static IEnumerable<string> GetSubstrings(this string str)
{
    for (int c = 0; c < str.Length - 1; c++)
    {
        for (int cc = 1; c + cc <= str.Length; cc++)
        {
            yield return str.Substring(c, cc);
        }
    }
}

I found it here: http://www.snippetsource.net/Snippet/75/longest-common-substring

Christian Moser
  • 1,871
  • 20
  • 10
2

Just for fun, here is one example using Queue<T>:

string LongestCommonSubstring(params string[] strings)
{
    return LongestCommonSubstring(strings[0], new Queue<string>(strings.Skip(1)));
}

string LongestCommonSubstring(string x, Queue<string> strings)
{
    if (!strings.TryDequeue(out var y))
    {
        return x;
    }

    var output = string.Empty;

    for (int i = 0; i < x.Length; i++)
    {
        for (int j = x.Length - i; j > -1; j--)
        {
            string common = x.Substring(i, j);
            if (y.IndexOf(common) > -1 && common.Length > output.Length) output = common;
        }
    }

    return LongestCommonSubstring(output, strings);
}

It's still using recursion though, but it's a nice example of Queue<T>.

Oskar
  • 1,996
  • 1
  • 22
  • 39
  • Nice, except for one thing. It cannot be used in .Net Framework (4.5) since there is no method TryDequeue. Replaceing `if (!strings.TryDequeue(out var y)) { return x; }` with ` string y; try { y = strings.Dequeue(); } catch { return x; } ` will also make it usable in .Net Framework ;-) – Chieleman Mar 05 '21 at 09:50
1

I refactored the C++ code from Ashutosh Singh at https://iq.opengenus.org/longest-common-substring-using-rolling-hash/ to create a rolling hash approach in C# - this will find the substring in O(N * log(N)^2) time and O(N) space

using System;
using System.Collections.Generic;
public class RollingHash
{
    private class RollingHashPowers
    {
        // _mod = prime modulus of polynomial hashing
        // any prime number over a billion should suffice
        internal const int _mod = (int)1e9 + 123;
        // _hashBase = base (point of hashing)
        // this should be a prime number larger than the number of characters used
        // in my use case I am only interested in ASCII (256) characters
        // for strings in languages using non-latin characters, this should be much larger
        internal const long _hashBase = 257;
        // _pow1 = powers of base modulo mod
        internal readonly List<int> _pow1 = new List<int> { 1 };
        // _pow2 = powers of base modulo 2^64
        internal readonly List<long> _pow2 = new List<long> { 1L };

        internal void EnsureLength(int length)
        {
            if (_pow1.Capacity < length)
            {
                _pow1.Capacity = _pow2.Capacity = length;
            }
            for (int currentIndx = _pow1.Count - 1; currentIndx < length; ++currentIndx)
            {
                _pow1.Add((int)(_pow1[currentIndx] * _hashBase % _mod));
                _pow2.Add(_pow2[currentIndx] * _hashBase);
            }
        }
    }

    private class RollingHashedString
    {
        readonly RollingHashPowers _pows;
        readonly int[] _pref1; // Hash on prefix modulo mod
        readonly long[] _pref2; // Hash on prefix modulo 2^64

        // Constructor from string:
        internal RollingHashedString(RollingHashPowers pows, string s, bool caseInsensitive = false)
        {
            _pows = pows;
            _pref1 = new int[s.Length + 1];
            _pref2 = new long[s.Length + 1];

            const long capAVal = 'A';
            const long capZVal = 'Z';
            const long aADif = 'a' - 'A';

            unsafe
            {
                fixed (char* c = s)
                {
                    // Fill arrays with polynomial hashes on prefix
                    for (int i = 0; i < s.Length; ++i)
                    {
                        long v = c[i];
                        if (caseInsensitive && capAVal <= v && v <= capZVal)
                        {
                            v += aADif;
                        }
                        _pref1[i + 1] = (int)((_pref1[i] + v * _pows._pow1[i]) % RollingHashPowers._mod);
                        _pref2[i + 1] = _pref2[i] + v * _pows._pow2[i];
                    }
                }
            }
        }

        // Rollingnomial hash of subsequence [pos, pos+len)
        // If mxPow != 0, value automatically multiply on base in needed power.
        // Finally base ^ mxPow
        internal Tuple<int, long> Apply(int pos, int len, int mxPow = 0)
        {
            int hash1 = _pref1[pos + len] - _pref1[pos];
            long hash2 = _pref2[pos + len] - _pref2[pos];
            if (hash1 < 0)
            {
                hash1 += RollingHashPowers._mod;
            }
            if (mxPow != 0)
            {
                hash1 = (int)((long)hash1 * _pows._pow1[mxPow - (pos + len - 1)] % RollingHashPowers._mod);
                hash2 *= _pows._pow2[mxPow - (pos + len - 1)];
            }
            return Tuple.Create(hash1, hash2);
        }
    }

    private readonly RollingHashPowers _rhp;
    public RollingHash(int longestLength = 0)
    {
        _rhp = new RollingHashPowers();
        if (longestLength > 0)
        {
            _rhp.EnsureLength(longestLength);
        }
    }

    public string FindCommonSubstring(string a, string b, bool caseInsensitive = false)
    {
        // Calculate max neede power of base:
        int mxPow = Math.Max(a.Length, b.Length);
        _rhp.EnsureLength(mxPow);
        // Create hashing objects from strings:
        RollingHashedString hash_a = new RollingHashedString(_rhp, a, caseInsensitive);
        RollingHashedString hash_b = new RollingHashedString(_rhp, b, caseInsensitive);

        // Binary search by length of same subsequence:
        int pos = -1;
        int low = 0;
        int minLen = Math.Min(a.Length, b.Length);
        int high = minLen + 1;
        var tupleCompare = Comparer<Tuple<int, long>>.Default;
        while (high - low > 1)
        {
            int mid = (low + high) / 2;
            List<Tuple<int, long>> hashes = new List<Tuple<int, long>>(a.Length - mid + 1);
            for (int i = 0; i + mid <= a.Length; ++i)
            {
                hashes.Add(hash_a.Apply(i, mid, mxPow));
            }
            hashes.Sort(tupleCompare);
            int p = -1;
            for (int i = 0; i + mid <= b.Length; ++i)
            {
                if (hashes.BinarySearch(hash_b.Apply(i, mid, mxPow), tupleCompare) >= 0)
                {
                    p = i;
                    break;
                }
            }
            if (p >= 0)
            {
                low = mid;
                pos = p;
            }
            else
            {
                high = mid;
            }
        }
        // Output answer:
        return pos >= 0
            ? b.Substring(pos, low)
            : string.Empty;
    }
}
Brent
  • 4,611
  • 4
  • 38
  • 55