1

I am trying to unroll a loop in y86 code but I been getting 2 different values when I try to run a test program. The reg. code is:

    xorq %rax,%rax      # count = 0;
    andq %rdx,%rdx      # len <= 0?
    jle Done        # if so, goto Done:

Loop:   
    mrmovq (%rdi), %r10 # read val from src...
    rmmovq %r10, (%rsi) # ...and store it to dst
    andq %r10, %r10     # val <= 0?
    jle Npos        # if so, goto Npos:
    #irmovq $1, %r10
    #addq %r10, %rax        
    iaddq $1, %rax      # count++
Npos:   
    irmovq $1, %r10
    subq %r10, %rdx     # len--
    #irmovq $8, %r10
    #addq %r10, %rdi        
    #addq %r10, %rsi        
    iaddq $8, %rdi      # src++
    iaddq $8, %rsi      # dst++
    andq %rdx,%rdx      # len > 0?
    jg Loop         # if so, goto Loop:
Done:
    ret

and the unrolled version I made is:

xorq %rax,%rax      # count = 0;
    andq %rdx,%rdx      # len <= 0?
    jle Done        # if so, goto Done:

Loop:   
    mrmovq (%rdi), %r10     # read val from src…
    mrmovq 8(%rdi), %r11    # <- from class get second value
    rmmovq %r10, (%rsi)     # ...and store it to dst
    rmmovq %r11, 8(%rsi)         # store second val to dst
    andq %r10, %r10     # val <= 0?
    jle Npos            # if so, goto Npos:
    iaddq $1, %rax

Npos:   
    andq %r11, %r11 # check if src[1] <= 0
    jle Npos2       # if it is, don’t increase count
    iaddq $1, %rax

Npos2: 
    irmvoq %2, %r10
    iaddq $16, %rdi     # increase stack or base pointer to get next 2 vals
    iaddq $16, %rsi     # increase stack or base pointer to store next 2 vals
    subq %r10, %rdx     # decrease length by 2
    jge Loop            # go back into loop if length >= 2

len_cleanup:
    iaddq $2, %rdx

cleanup:
    irmovq $1, %r10
    subq %r10, %rdx
    jl Done             # if length < 0, jmp to Done, no cleanup needed
    mrmovq (%rdi), %r10     # get next val
    rmmovq %r10, (%rsi)     # move val onto stack
    andq %r10, %r10     # check if val <= 0
    jle Done            # skip count if val < 0
    iaddq $1, %rax      # same as iaddq $1, %rax

Done: 
    ret

The result I should be getting is 2 but the one being returned from the unrolled one is returning 3. I know that there is an extra iaddq being executed but I am not sure where. I unrolled the loop twice so that I would check 2 values instead.

Nukodi
  • 335
  • 1
  • 9
  • 24

1 Answers1

1

I just fixed it. I was suppose to decrement the %rdx before beginning the loop to properly unroll the function.

When you don't know that iteration count is a multiple of the unroll factor, you need to unroll do{ }while(--i >= 0); into something like
--i; do { }while(i-=2 >= 0); to make sure you don't overshoot.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
Nukodi
  • 335
  • 1
  • 9
  • 24