I wrote a simple program that executes a bunch of NOP instructions in a loop, and to my surprise it executes about 10600000000 of them per second, or about 10Ghz, while my CPU is only 2.2GHz.
How is this possible? Is the CPU treating them as a single mega-NOP, or did I just discover what "instruction level parallelism" means?
What would be a better measure for instructions per second? Doing add instructions reaches only 414900000/s, a tenth of the bogomips reported by my CPU: 4390.03
C code:
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#define ten(a) a a a a a a a a a a
#define hundred(a) ten(a) ten(a) ten(a) ten(a) ten(a) ten(a) ten(a) \
ten(a) ten(a) ten(a)
#define ITER 10000000
int main(void) {
uint64_t i=0;
uint64_t t=time(NULL);
while(1) {
for(int j=0; j<ITER;j++) {
hundred(asm volatile ("nop");)
}
i+=ITER*100;
printf("%lu/%lu\n", i, time(NULL)-t);
}
return 0;
}
Compiled assembly:
.file "gbloopinc.c"
.section .rodata
.LC0:
.string "%lu/%lu\n"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movq $0, -16(%rbp)
movl $0, %edi
call time
movq %rax, -8(%rbp)
.L4:
movl $0, -20(%rbp)
jmp .L2
.L3:
#APP
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
# 15 "gbloopinc.c" 1
nop
# 0 "" 2
#NO_APP
addl $1, -20(%rbp)
.L2:
cmpl $9999999, -20(%rbp)
jle .L3
addq $1000000000, -16(%rbp)
movl $0, %edi
call time
subq -8(%rbp), %rax
movq %rax, %rdx
movq -16(%rbp), %rax
movq %rax, %rsi
movl $.LC0, %edi
movl $0, %eax
call printf
jmp .L4
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.2) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits