How to demo redundancy removal in LLVM?

Question

I want to demo that redundancy removal is used in LLVM.

I found the option -gvn (global value numbering) from opt. I tested the following example:

int foo(int a, int b) {
    int c, d, e, f, g;

    c = a + b;
    d = a + b;
    e = a;
    f = e + b;
    g = c + d + e + f;

    return f;
}

with these procedure:

clang -S -emit-llvm eg.c
llvm-as eg.ll
opt -gvn eg.ll -o eg_opt.ll

However, I observed the same number of add operations as before.

eg.ll

define i32 @foo(i32 %a, i32 %b) #0 {
entry:
  %a.addr = alloca i32, align 4
  %b.addr = alloca i32, align 4
  %c = alloca i32, align 4
  %d = alloca i32, align 4
  %e = alloca i32, align 4
  %f = alloca i32, align 4
  %g = alloca i32, align 4
  store i32 %a, i32* %a.addr, align 4
  store i32 %b, i32* %b.addr, align 4
  %0 = load i32, i32* %a.addr, align 4
  %1 = load i32, i32* %b.addr, align 4
  %add = add nsw i32 %0, %1
  store i32 %add, i32* %c, align 4
  %2 = load i32, i32* %a.addr, align 4
  %3 = load i32, i32* %b.addr, align 4
  %add1 = add nsw i32 %2, %3
  store i32 %add1, i32* %d, align 4
  %4 = load i32, i32* %a.addr, align 4
  store i32 %4, i32* %e, align 4
  %5 = load i32, i32* %e, align 4
  %6 = load i32, i32* %b.addr, align 4
  %add2 = add nsw i32 %5, %6
  store i32 %add2, i32* %f, align 4
  %7 = load i32, i32* %c, align 4
  %8 = load i32, i32* %d, align 4
  %add3 = add nsw i32 %7, %8
  %9 = load i32, i32* %e, align 4
  %add4 = add nsw i32 %add3, %9
  %10 = load i32, i32* %f, align 4
  %add5 = add nsw i32 %add4, %10
  store i32 %add5, i32* %g, align 4
  %11 = load i32, i32* %f, align 4
  ret i32 %11
}

eg_opt.ll

define i32 @foo(i32 %a, i32 %b) #0 {
entry:
  %a.addr = alloca i32, align 4
  %b.addr = alloca i32, align 4
  %c = alloca i32, align 4
  %d = alloca i32, align 4
  %e = alloca i32, align 4
  %f = alloca i32, align 4
  %g = alloca i32, align 4
  store i32 %a, i32* %a.addr, align 4
  store i32 %b, i32* %b.addr, align 4
  %0 = load i32, i32* %a.addr, align 4
  %add = add nsw i32 %0, %b
  store i32 %add, i32* %c, align 4
  %1 = load i32, i32* %a.addr, align 4
  %2 = load i32, i32* %b.addr, align 4
  %add1 = add nsw i32 %1, %2
  store i32 %add1, i32* %d, align 4
  %3 = load i32, i32* %a.addr, align 4
  store i32 %3, i32* %e, align 4
  %4 = load i32, i32* %b.addr, align 4
  %add2 = add nsw i32 %3, %4
  store i32 %add2, i32* %f, align 4
  %5 = load i32, i32* %c, align 4
  %6 = load i32, i32* %d, align 4
  %add3 = add nsw i32 %5, %6
  %7 = load i32, i32* %e, align 4
  %add4 = add nsw i32 %add3, %7
  %add5 = add nsw i32 %add4, %add2
  store i32 %add5, i32* %g, align 4
  %8 = load i32, i32* %f, align 4
  ret i32 %8
}

Did I miss anything?

did you tried with -basicaa? without alias analysis it wont touch those load store. — Chirag Patel, Feb 29 '16 at 06:33
Or, even better, use mem2reg to get SSA. It's not just more likely to be optimized, it's also much easier to follow. — , Feb 29 '16 at 09:47

Michael Haidl · Answer 1 · 2016-03-01T15:47:57.093

I think the -instcombine pass is what you are looking for. Optimizing you code with -instcombine results in the following IR.

 define i32 @foo(i32 %a, i32 %b) #0 {
   %1 = add nsw i32 %a, %b
   ret i32 %1
 }

Instcombine tries to remove as much redundant instructions as possible from the IR.

Edit: But if you want to use gvn you have to bring your IR to a "better" SSA form first.

Using -mem2reg brings your IR into perfect SSA form:

 define i32 @foo(i32 %a, i32 %b) #0 {
    %1 = add nsw i32 %a, %b
    %2 = add nsw i32 %a, %b
    %3 = add nsw i32 %a, %b
    %4 = add nsw i32 %1, %2
    %5 = add nsw i32 %4, %a
    %6 = add nsw i32 %5, %3
    ret i32 %3
  }

Now using -gvn reduces redundant add instructions:

 define i32 @foo(i32 %a, i32 %b) #0 {
    %1 = add nsw i32 %a, %b
    %2 = add nsw i32 %1, %1
    %3 = add nsw i32 %2, %a
    %4 = add nsw i32 %3, %1
    ret i32 %1
 }

Edit2:

From lazyCoder's comment: supporting -gvn with an alias analysis also results in redundancy removal:

First using -basicaa results in the following IR:

define i32 @foo(i32 %a, i32 %b) #0 {
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %c = alloca i32, align 4
   %d = alloca i32, align 4
   %e = alloca i32, align 4
   %f = alloca i32, align 4
   %g = alloca i32, align 4
   store i32 %a, i32* %1, align 4
   store i32 %b, i32* %2, align 4
   %3 = load i32, i32* %1, align 4
   %4 = load i32, i32* %2, align 4
   %5 = add nsw i32 %3, %4
   store i32 %5, i32* %c, align 4
   %6 = load i32, i32* %1, align 4
   %7 = load i32, i32* %2, align 4
   %8 = add nsw i32 %6, %7
   store i32 %8, i32* %d, align 4
   %9 = load i32, i32* %1, align 4
   store i32 %9, i32* %e, align 4
   %10 = load i32, i32* %e, align 4
   %11 = load i32, i32* %2, align 4
   %12 = add nsw i32 %10, %11
   store i32 %12, i32* %f, align 4
   %13 = load i32, i32* %c, align 4
   %14 = load i32, i32* %d, align 4
   %15 = add nsw i32 %13, %14
   %16 = load i32, i32* %e, align 4
   %17 = add nsw i32 %15, %16
   %18 = load i32, i32* %f, align 4
   %19 = add nsw i32 %17, %18
   store i32 %19, i32* %g, align 4
   %20 = load i32, i32* %f, align 4
   ret i32 %20
}

Followed by -gvn results in:

define i32 @foo(i32 %a, i32 %b) #0 {                                    
    %1 = alloca i32, align 4                                                          
    %2 = alloca i32, align 4                                                       
    %c = alloca i32, align 4
    %d = alloca i32, align 4
    %e = alloca i32, align 4
    %f = alloca i32, align 4
    %g = alloca i32, align 4
    store i32 %a, i32* %1, align 4
    store i32 %b, i32* %2, align 4
    %3 = add nsw i32 %a, %b
    store i32 %3, i32* %c, align 4
    store i32 %3, i32* %d, align 4
    store i32 %a, i32* %e, align 4
    store i32 %3, i32* %f, align 4
    %4 = add nsw i32 %3, %3
    %5 = add nsw i32 %4, %a
    %6 = add nsw i32 %5, %3
    store i32 %6, i32* %g, align 4
    ret i32 %3
}

Where load and stores are preserved.

How to demo redundancy removal in LLVM?

1 Answers1