-2

I'm working on a simple JIT compiler for PowerPC, I followed the examples in https://github.com/spencertipping/jit-tutorial to get a hang of how to work with it.

The problem is that the identity function in the second example "jitproto.c" can't really be ported to powerpc as is, using the "LWA" and "BLR" instructions, it just causes segfaults when executed.

In the end I used the machine code output of the SLJIT compiler (https://github.com/linux-on-ibm-z/sljit) to see what I'm doing wrong, and I see it generates 12 instruction words before what I thought would be the function.

So what are those instructions doing? Why can't I just start the function directly like in x86?

Code can be compiled with a C99 compiler on PPC64 (tested in a powermac and a power8 server).

#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
typedef long(*fn0)(void);
typedef long(*fn1)(long);

//instruction stream for identity function, dumped from SLJIT
unsigned int code[] = 
{
0x7c0802a6,   //what do all these instructions do? I guess this is loading something from the R2 register?
0xfbe1fff8 ,  //
0xfbc1fff0 ,  //
0xf8010010 ,  //

0x3be00000,   //
0x7c7e1b78 ,  //
0xf821ff81,   //
0x38210080,   //

0xe8010010 ,  //
0xebc1fff0,   //
0xebe1fff8 ,  //end of unknown instructions
0x7c0803a6 ,  

0x4e800020,   
0x00000000,   
0x00000000,   
0x00000000};  


fn1 compile_identity(void) {
  //allocate exec memory
  unsigned int *memory = mmap(NULL,             // address
                      16*sizeof(int),             // size
                      PROT_READ | PROT_WRITE | PROT_EXEC,
                      MAP_PRIVATE | MAP_ANONYMOUS,
                      -1,               // fd (not used here)
                      0);               // offset (not used here)
  //copy instructions
  for (int i = 0; i <14; ++i){
    memory[i] = code[i];
  }
  //copy start adress to last pointer, else it only works in ppc64le
  ((unsigned long long*)memory)[7] = (unsigned long long)memory;

  return (fn1) memory;
}

int main() {
  void * test = compile_identity();
  //print stuff to check if its right
  printf("Pointer %p\n%p\n",test,((char*)test)[0]);
  for (int i = 0; i< 16; ++i){
    printf("INS %8x\n",((unsigned int*)test)[i]);
  }
  //load pointer containing function start address, for ppc64 BE and LE
#if __BYTE_ORDER == __BIG_ENDIAN
  fn1 f = (fn1*) ((unsigned long long*)test+7);
#elif __BYTE_ORDER == __LITTLE_ENDIAN
  fn1 f = test;
#endif
  //test function
  printf("%d\n",f(4));
  //free exec memory
  munmap(test, 16*sizeof(int));
  return 0;
}

objdump output of the SLJIT raw code

asm.bin:     file format binary


Disassembly of section .data:

0000000000000000 <.data>:
   0:   7c 08 02 a6     lhzu    r16,2172(r2)
   4:   fb e1 ff f8     .long 0xf8ffe1fb
   8:   fb c1 ff f0     xxsel   vs39,vs31,vs56,vs39
   c:   fb a1 ff e8     .long 0xe8ffa1fb
  10:   fb 81 ff e0     lq      r6,-32272(r31)
  14:   f8 01 00 10     ps_msub f0,f0,f7,f0
  18:   3b e0 00 00     .long 0xe03b
  1c:   7c 7e 1b 78     .long 0x781b7e7c
  20:   7c 9d 23 78     .long 0x78239d7c
  24:   7c bc 2b 78     .long 0x782bbc7c
  28:   f8 21 ff 71     andi.   r31,r15,8696
  2c:   7f a3 eb 78     .long 0x78eba37f
  30:   38 21 00 90     stw     r0,8504(0)
  34:   e8 01 00 10     vmsumshm v0,v0,v0,v7
  38:   eb 81 ff e0     lq      r6,-32288(r31)
  3c:   eb a1 ff e8     .long 0xe8ffa1eb
  40:   eb c1 ff f0     psq_st  f7,491(r31),1,4
  44:   eb e1 ff f8     .long 0xf8ffe1eb
  48:   7c 08 03 a6     lhzu    r16,2172(r3)  #These two instructions should have been enough in x86
  4c:   4e 80 00 20     subfic  r0,r0,-32690  #

GDB disassambler output

   0x00003ffff7ff9000:  mflr    r0
   0x00003ffff7ff9004:  std     r31,-8(r1)
   0x00003ffff7ff9008:  std     r30,-16(r1)
   0x00003ffff7ff900c:  std     r0,16(r1)
   0x00003ffff7ff9010:  li      r31,0
   0x00003ffff7ff9014:  mr      r30,r3
   0x00003ffff7ff9018:  stdu    r1,-128(r1)
   0x00003ffff7ff901c:  addi    r1,r1,128
   0x00003ffff7ff9020:  ld      r0,16(r1)
   0x00003ffff7ff9024:  ld      r30,-16(r1)
   0x00003ffff7ff9028:  ld      r31,-8(r1)
   0x00003ffff7ff902c:  mtlr    r0
   0x00003ffff7ff9030:  blr

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
nioroso
  • 65
  • 8
  • 3
    Have you run it through a disassembler? If you have please post assembly code as a courtesy. If not how do you expect to understand it? When your code segfaults what did the debugger say for the reason? – Jester Mar 24 '19 at 23:16
  • Theres no way to debug raw machine code with a debugger. This code works now, my question is why does it need all those additional instructions and what do they do, the disassembler output doesnt make much sense to me, there are even vector instructions there. – nioroso Mar 25 '19 at 00:56
  • 2
    @nioroso_x3 what are you talking about what do you a disassembler and debugger does? Please disassemble this code and examine it before asking questions like this. Before asking us to do it for you. Doing it for you is not what stackoverflow is for. the entire purpose of a debugger is to debug machine code, its the only thing a debugger knows how to do. – old_timer Mar 25 '19 at 00:59
  • 1
    Ok, now I am confused, why does objdump and gdb decode different instructions? – nioroso Mar 25 '19 at 01:18
  • Are you sure you used the right settings for objdump? Is it maybe disassembling in LE mode while GDB is using the actual correct mode that the executable is running in? – Peter Cordes Mar 25 '19 at 19:30

1 Answers1

0

The instructions are required for setting up the stack layout of PPC64 ABI. See here: http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK

dark100
  • 87
  • 2
  • That was it! The instructions needed to get a function working were mflr,r0 ; std r0,16(r1) as prologue and ld r0,16(r1); mtlr as epilogue. – nioroso Mar 25 '19 at 22:43