I was trying to solve this problem from acm.timus.ru which basically wants me to output the number of different substrings of a given string (max length 5000).
The solutions I am about to present are desperately inefficient and doomed for Time Limit Exceeded verdict given the constraints. However, the only way in which these two solutions differ (at least as far as I can see/understand) is that one uses std::map<long long, bool>
, while the other uses std::set <long long>
(see the beginning of the last for loop. The rest is identical, you can check by any diff tool). The map solution results in "Time Limit Exceeded on Test 3", whereas the set solution results in "Time Limit Exceeded on Test 2", which means that Test 2 is such that the map solution works faster on it than the set solution. This is the case if I choose Microsoft Visual Studio 2010 compiler. If I choose GCC, then both solutions result in TLE on test 3.
I am not asking for how to solve the problem efficiently. What I am asking
is how can one explain that using std::map
can apparently be more efficient than using std::set
. I just fail to see the mechanics of this phenomenon and hope that someone could have any insights.
Code1 (uses map, TLE 3):
#include <iostream>
#include <map>
#include <string>
#include <vector>
using namespace std;
int main ()
{
string s;
cin >> s;
vector <long long> p;
p.push_back(1);
for (int i = 1; i < s.size(); i++)
p.push_back(31 * p[i - 1]);
vector <long long> hash_temp;
hash_temp.push_back((s[0] - 'a' + 1) * p[0]);
for (int i = 1; i < s.size(); i++)
hash_temp.push_back((s[i] - 'a' + 1) * p[i] + hash_temp[i - 1]);
int n = s.size();
int answer = 0;
for (int i = 1; i <= n; i++)
{
map <long long, bool> hash_ans;
for (int j = 0; j < n - i + 1; j++)
{
if (j == 0)
hash_ans[hash_temp[j + i - 1] * p[n - j - 1]] = true;
else
hash_ans[(hash_temp[j + i - 1] - hash_temp[j - 1]) * p[n - j - 1]] = true;
}
answer += hash_ans.size();
}
cout << answer;
}
Code2 (uses set, TLE 2):
#include <iostream>
#include <string>
#include <vector>
#include <set>
using namespace std;
int main ()
{
string s;
cin >> s;
vector <long long> p;
p.push_back(1);
for (int i = 1; i < s.size(); i++)
p.push_back(31 * p[i - 1]);
vector <long long> hash_temp;
hash_temp.push_back((s[0] - 'a' + 1) * p[0]);
for (int i = 1; i < s.size(); i++)
hash_temp.push_back((s[i] - 'a' + 1) * p[i] + hash_temp[i - 1]);
int n = s.size();
int answer = 0;
for (int i = 1; i <= n; i++)
{
set <long long> hash_ans;
for (int j = 0; j < n - i + 1; j++)
{
if (j == 0)
hash_ans.insert(hash_temp[j + i - 1] * p[n - j - 1]);
else
hash_ans.insert((hash_temp[j + i - 1] - hash_temp[j - 1]) * p[n - j - 1]);
}
answer += hash_ans.size();
}
cout << answer;
}