Is it possible to use std::variant
as a zero-overhead and safe container for handling situations like the following using Visual Studio?
#include <variant>
extern bool bar(),foo();
bool lessForMyVariants(const std::variant<int*, float*, double *> x, const std::variant<int*, float*, double *> y) {
if (x.index()!=y.index()) {
return x.index()<y.index();
} else {
switch(x.index()) {
case 0:
if (x.index()==0 && y.index()==0)
return *std::get<0>(x)<*std::get<0>(y);
break;
case 1:
if (x.index()==1 && y.index()==1)
return *std::get<1>(x)<*std::get<1>(y);
break;
case 2:
if (x.index()==2 && y.index()==2)
return *std::get<2>(x)<*std::get<2>(y);
break;
default:
return foo();
}
}
return bar();
}
Since x.index()
and y.index()
have the correct values in the case-statements there should be neither the possibility of exception (for incorrect call of std::get) nor call of bar()
.
Gcc can do that optimization - https://godbolt.org/z/6vsqPz5qe - but I cannot get the same for Visual Studio.
Note:
- I know that the if-statements are redundant, but they are needed to trigger the optimization for gcc. Removing them would be nice.
- I'm not interested in unsafe access in contrast to Unsafe, `noexcept` and no-overhead way of accessing `std::variant`
- Alternatives to std::variant are welcome, assuming they solve the issue.
- The calls of foo and bar are just there to check the compiler output.
I noticed that many thought that std::visit would be faster than a switch. Testing found the opposite:
// ConsoleApplication1.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include <variant>
#include <vector>
#include <iostream>
#include <algorithm>
#include <chrono>
typedef std::variant<int*, float*, double*> V;
extern bool bar() {
throw 2;
}
extern bool foo() {
throw 3;
}
bool lessForMyVariantsSwitch(const std::variant<int*, float*, double*> x, const std::variant<int*, float*, double*> y) {
if (x.index() != y.index()) {
return x.index() < y.index();
}
else {
switch (x.index()) {
case 0:
if (x.index() == 0 && y.index() == 0)
return *std::get<0>(x) < *std::get<0>(y);
break;
case 1:
if (x.index() == 1 && y.index() == 1)
return *std::get<1>(x) < *std::get<1>(y);
break;
case 2:
if (x.index() == 2 && y.index() == 2)
return *std::get<2>(x) < *std::get<2>(y);
break;
default:
return foo();
}
}
return bar();
}
bool lessForMyVariantsSwitchConstSimple(const std::variant<int*, float*, double*>&x, const std::variant<int*, float*, double*>&y) {
if (x.index() != y.index()) {
return x.index() < y.index();
}
else {
switch (x.index()) {
case 0:
return *std::get<0>(x) < *std::get<0>(y);
break;
case 1:
return *std::get<1>(x) < *std::get<1>(y);
break;
case 2:
return *std::get<2>(x) < *std::get<2>(y);
break;
default:
return foo();
}
}
return bar();
}
bool lessForMyVariantsSwitchConst(const std::variant<int*, float*, double*>&x, const std::variant<int*, float*, double*>&y) {
if (x.index() != y.index()) {
return x.index() < y.index();
}
else {
switch (x.index()) {
case 0:
if (x.index() == 0 && y.index() == 0)
return *std::get<0>(x) < *std::get<0>(y);
break;
case 1:
if (x.index() == 1 && y.index() == 1)
return *std::get<1>(x) < *std::get<1>(y);
break;
case 2:
if (x.index() == 2 && y.index() == 2)
return *std::get<2>(x) < *std::get<2>(y);
break;
default:
return foo();
}
}
return bar();
}
// helper type for the visitor #4
template<class... Ts>
struct overloaded : Ts... { using Ts::operator()...; };
// explicit deduction guide (not needed as of C++20)
template<class... Ts>
overloaded(Ts...)->overloaded<Ts...>;
bool lessForMyVariantsVisit(const std::variant<int*, float*, double*> x, const std::variant<int*, float*, double*> y) {
return std::visit(overloaded{
[] <typename T>(T * lhs, T * rhs) { return *lhs < *rhs; },
[&](auto,auto) { return x.index() < y.index(); } },
x, y);
}
bool lessForMyVariantsVisitConst(const std::variant<int*, float*, double*>&x, const std::variant<int*, float*, double*>&y) {
return std::visit(overloaded{
[] <typename T>(T * lhs, T * rhs) { return *lhs < *rhs; },
[&](auto,auto) { return x.index() < y.index(); } },
x, y);
}
template <class P> size_t checkSort(std::vector<V> const& v, P p, const char* whichSort) {
size_t z=0;
std::vector<V> v2;
auto t1 = std::chrono::high_resolution_clock::now();
constexpr int maxNum = 1000000;
for (int j = 0; j < maxNum; ++j) {
v2 = v;
//std::ranges::sort(v2, p);
std::sort(v2.begin(), v2.end(), p);
z += v2[0].index();
}
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << whichSort <<" took "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count()*1.0 / maxNum
<< " nanoseconds per\n";
return z;
}
int main()
{
int varr[4] = { 6,7,1,10 };
float farr[4] = { 5.0f, 1.2f, 4.5f, 2.2f };
double darr[4] = { 5.0, 1.2, 4.5, 2.2 };
std::vector<V> v;
for (int i = 0; i < 4; ++i) {
v.emplace_back(varr + i);
v.emplace_back(farr + i);
v.emplace_back(darr + i);
}
double z=0;
z+=checkSort(v, lessForMyVariantsSwitch, "lessForMyVariantsSwitch");
z += checkSort(v, lessForMyVariantsVisit, "lessForMyVariantsVisit");
z += checkSort(v, lessForMyVariantsSwitchConst, "lessForMyVariantsSwitchConst const&");
z += checkSort(v, lessForMyVariantsVisitConst, "lessForMyVariantsVisitConst const&");
z += checkSort(v, lessForMyVariantsSwitchConstSimple, "lessForMyVariantsSwitchConstSimple const&");
std::cout << "dummy: " << z;
return 0;
}
Unfortunately the switch-statement was faster with /O2 (Visual Studio 2022).
- lessForMyVariantsSwitch took 90.0676 nanoseconds per
- lessForMyVariantsVisit took 125.441 nanoseconds per
- lessForMyVariantsSwitchConst const& took 88.1962 nanoseconds per
- lessForMyVariantsVisitConst const& took 121.182 nanoseconds per
- lessForMyVariantsSwitchConstSimple const& took 96.642 nanoseconds per
Similar with g++ 11.3.0 and clang++ 14.0.0 under WSL-Ubuntu, with the minor difference that clang++ had lessForMyVariantsSwitchConstSimple as the fastest.