Inspired by Jon Bentley's "Algorithm Alley" column in Dr. Dobb's.
Build an index of every suffix. Sorting the index brings common substrings together. Walk the sorted index comparing adjacent substrings, and you can easily find the longest one (or the most common one).
#include <algorithm>
#include <cstddef>
#include <iostream>
#include <string>
#include <vector>
std::size_t LengthInCommon(const char *left, const char *right) {
std::size_t length_of_match = 0;
while (*left == *right && *left != '\0') {
++length_of_match;
++left;
++right;
}
return length_of_match;
}
std::string FindLongestMatchingSubstring(const std::vector<std::string> &strings) {
// Build an index with a pointer to each possible suffix in the array. O(n)
std::vector<const char *> index;
for (const auto &s : strings) {
for (const auto &suffix : s) {
index.push_back(&suffix);
}
}
// Sort the index using the underlying substrings. O(n log_2 n)
std::sort(index.begin(), index.end(), [](const char *left, const char *right) {
return std::strcmp(left, right) < 0;
});
// Common strings will now be adjacent to each other in the index.
// Walk the index to find the longest matching substring.
// O(n * m) where m is average matching length of two adjacent strings.
std::size_t length_of_longest_match = 0;
std::string match;
for (std::size_t i = 1; i < index.size(); ++i) {
const char *left = index[i - 1];
const char *right = index[i];
std::size_t length_of_match = LengthInCommon(left, right);
if (length_of_longest_match < length_of_match) {
length_of_longest_match = length_of_match;
match.assign(index[i], index[i] + length_of_longest_match);
}
}
return match;
}
int main () {
std::vector<std::string> strings;
strings.push_back("Country_Name");
strings.push_back("Country_id");
strings.push_back("RankCountry");
strings.push_back("ThisWillNotMatch");
std::cout << FindLongestMatchingSubstring(strings) << std::endl;
return 0;
}
Prints:
Country_