2

I am using visual studio 2005 to practice writing some assembly code. I wrote a simple function using .mmx.

Here is the complete file

        .586
    .mmx
    .model flat,c
    .code

Sum_ proc
    ; Sun_(unsigned short int*, number) 

    ;Pre Setup
    push ebp
    mov ebp, esp
    push esi
    push ebx

    ; Get the number of values to sum
    mov ecx, [ebp+12]; ecx = number
    xor eax, eax ; eax = 0

    ; Get data array to sum
    mov esi, [ebp+8]

    ; Set mm0 to 0
    movd mm0, eax   

@@:
    add eax, 4

    ; See if the system exceeds the number
    cmp eax, ecx
    jg @f

    ; process through mmx
    movq mm1, [esi + eax - 4]
    paddsw mm0, mm1
    jmp @b

@@:
    ; Clear some registers
    pxor mm1, mm1
    pxor mm2, mm2

    ; Unpack and sum
    punpckhwd mm1, mm0
    punpcklwd mm2, mm0
    paddd mm2, mm1

    ; Clear some registers
    pxor mm0, mm0
    pxor mm1, mm1

    punpckldq mm0, mm2
    punpckhdq mm1, mm2
    paddd mm0, mm1

    pshufw mm4, mm0, 00011011b ; Retreive the summed value
    movd edx, mm4

    ; Add the remaining values
    sub eax, 3
@@: cmp eax, ecx
    jge @f
    xor ebx, ebx
    mov bx, word ptr [esi + eax]
    add edx, ebx
    inc eax
    add edx, ebx
    jmp @b

@@: 
    mov eax, edx

    ; Return
    emms
    pop ebx
    pop esi
    pop ebp
    ret
Sum_ endp

    end

On the line: pshufw mm4, mm0, 00011011b ; Retreive the summed value I receive the following error :

Error 1 error A2085: instruction or register not accepted in current CPU mode

I know the command is valid. Can anyone tell my how to resolve this error.

Michael Petch
  • 46,082
  • 8
  • 107
  • 198
  • Make sure VS understands your binary constant. Can't find it in the documentation, maybe try `0x1b` instead. – Jester Mar 15 '16 at 18:30
  • I believe the binary constant is what I want. However, I tried this with both 0x1b and 0xE4 and I still get the same error. – Joseph Courtright Mar 15 '16 at 19:18
  • In NASM syntax, you can write `0b00011011`. IDK about MASM. Also, it's extremely rare that MMX is faster than SSE (using xmm registers) for anything. If you're tuning for a CPU as old as the compiler you're using (e.g. first-gen Core2 65nm (Merom/Conroe)), then MMX `pshufb` / `pshufd` is faster than the xmm versions, because it doesn't have a full 128b shuffle unit. For every newer CPU, xmm shuffles are full speed. In Skylake, some MMX instructions can't run on as many execution ports as their XMM equivalents. MMX is so obsolete that Intel is saving transistors at the cost of speed for it – Peter Cordes Mar 16 '16 at 03:37

0 Answers0