I found a strange bug in clang optimization. Here is the code:
#include <iostream>
unsigned int rot2(unsigned int a, size_t k) {
return (a >> k) | (a << (sizeof(unsigned int) * 8 - k));
}
int main() {
unsigned int x;
std::cin >> x;
auto n = rot2(x, 5);
std::cout << n;
}
And here is assembly generated for clang 8.0.0 and 9.0.0 with -O3
option (from godbolt):
rot2(unsigned int, unsigned long): # @rot2(unsigned int, unsigned long)
mov rcx, rsi
mov eax, edi
ror eax, cl
ret
main: # @main
push rax
lea rsi, [rsp + 4]
mov edi, offset std::cin
call std::basic_istream<char, std::char_traits<char> >& std::basic_istream<char, std::char_traits<char> >::_M_extract<unsigned int>(unsigned int&)
mov esi, dword ptr [rsp + 4]
mov rax, rsi
shr rax, 5
shl esi, 27
or rsi, rax
mov edi, offset std::cout
call std::basic_ostream<char, std::char_traits<char> >& std::basic_ostream<char, std::char_traits<char> >::_M_insert<unsigned long>(unsigned long)
xor eax, eax
pop rcx
ret
As you see, rot2
method is optimized correctly and a ror
instruction is used. But to my surprise, this instruction is not used in main()
and the whole code is using shift and or instruction.
Any idea why this strange bug happens? gcc and msvc do not make this strange mistake.