0

Given the following lines

                "Alberry K2503 F40 D",
                "Alberry K2503 F40 S",
                "Demi Deco Denver BLK",
                "Demi Deco Denver BRN",
                "Demi Deco Tank",
                "Demi Deco Audi",
                "Samsung S 19 S10",
                "Samsung S 19 S12"

I need to get a list with the following

 Alberry K2503 F40
 Demi Deco Denver
 Demi Deco
 Samsung S 19

I tried to implement it in such way.

    class TrieNode
    {
        public string Word { get; set; }

        public TrieNode Parent { get; set; }

        public Dictionary<string, TrieNode> Children { get; set; } = new Dictionary<string, TrieNode>();

        public override string ToString()
        {
            return $"{Word}";
        }
    }

    class Program
    {
        static string JoinSentence(TrieNode node)
        {
            List<string> sentence = new List<string>();
            while (node != null)
            {
                sentence.Insert(0, node.Word);
                node = node.Parent;
            }

            return string.Join(" ", sentence);
        }

        static void GetSentences(TrieNode node, HashSet<string> sentences)
        {
            if (node.Children.Count > 0)
            {
                foreach (var nodeChild in node.Children)
                {
                    GetSentences(nodeChild.Value, sentences);
                }
            }
            else
            {
                if (node.Parent.Children.Count == 1)
                {
                    sentences.Add(JoinSentence(node));
                }
                else
                {
                    bool lastChildren = false;
                    foreach (var child in node.Parent.Children)
                    {
                        if (child.Value != node)
                        {
                            if (child.Value.Children.Count > 0)
                            {
                                lastChildren = true;
                                sentences.Add(JoinSentence(node));
                                break;
                            }
                        }
                    }

                    if (!lastChildren)
                    {
                        sentences.Add(JoinSentence(node.Parent));
                    }
                }
            }
        }

        static void Main(string[] args)
        {
            var root = new TrieNode();
            var sentences = new[]
            {
                "Alberry K2503 F40 D",
                "Alberry K2503 F40 S",
                "Demi Deco Denver BLK",
                "Demi Deco Denver BRN",
                "Demi Deco Tank",
                "Demi Deco Audi",
                "Samsung S 19 S10",
                "Samsung S 19 S12"
            };

            foreach (var sentence in sentences)
            {
                var words = sentence.Split(' ');
                TrieNode node = null;
                foreach (var word in words)
                {
                    if (node == null)
                    {
                        if (root.Children.ContainsKey(word))
                        {
                            node = root.Children[word];
                        }
                        else
                        {
                            node = new TrieNode {Word = word, Parent = root};
                            root.Children.Add(word, node);
                        }
                    }
                    else
                    {
                        if (node.Children.ContainsKey(word))
                        {
                            node = node.Children[word];
                        }
                        else
                        {
                            node.Children.Add(word, node = new TrieNode {Word = word, Parent = node});
                        }
                    }
                }
            }

            var sentencesCommon = new HashSet<string>();

            GetSentences(root, sentencesCommon);
            foreach (var sentence in sentencesCommon)
            {
                Debug.WriteLine(sentence);
            }
        }
    }

it seems to work, but is missing the result Demi Deco where Audi and Tank shall be omitted. I think I really messed with traversing the tree properly and getting unique sentences. Looks like I'm reinventing the wheel. Would someone recommend a better solution?

Thx

Eugen
  • 2,934
  • 2
  • 26
  • 47
  • What if list contains `"Alberry K2503 F40 D"` and `"Alberry F40 K2503 S"` that means after swapping of some words. – er-sho Sep 06 '19 at 06:36
  • 1
    Have you tried implementing the algorithm on [the Wikipedia page about this problem](https://en.wikipedia.org/wiki/Longest_common_substring_problem)? – Matthew Watson Sep 06 '19 at 07:40
  • the 2 new sentences have only 1 common starting word, "Alverry" and that would be the result. – Eugen Sep 06 '19 at 15:01

1 Answers1

0

You ask for the longest common substring, you can do that by matching each string against every other. However your expected output indicates you only want to match whole words, ie up to a space character. This works with your example test data:

public void Run()
{
    List<string> output = new List<string>();
    for (int i = 0; i < input.Count-1; ++i)
    {
        for (int j = i+1; j< input.Count; ++j)
        {
            string leftMatch = LeftMatch(input[i], input[j]);
            if (leftMatch.Length>0 && !output.Contains(leftMatch))
            {
                output.Add(leftMatch);
            }
        }
    }
    output.ForEach(x=>Console.WriteLine(x));
}
public string LeftMatch(string a, string b)
{
    string result = "";
    for ( int i=0; i<a.Length&& i<b.Length; ++i)
    {
        if (a[i] != b[i])
        {
            if (!result.Contains(" ")) return "";
            return result.Substring(0, result.LastIndexOf(" ", StringComparison.Ordinal));
        }
        result += a[i];
    }
    return result;
}
List<string> input =
new List<string>{
    "Alberry K2503 F40 D",
    "Alberry K2503 F40 S",
    "Demi Deco Denver BLK",
    "Demi Deco Denver BRN",
    "Demi Deco Tank",
    "Demi Deco Audi",
    "Samsung S 19 S10",
    "Samsung S 19 S12"
};

Which gives the result:

Alberry K2503 F40 
Demi Deco Denver
Demi Deco  
Samsung S 19
iakobski
  • 1,000
  • 7
  • 8