2

I am porting Mupen64plus to UWP for Windows 10 and Windows 10 Mobile. It's all good for both configuration platform Win32/x64, but failed when linking with the .obj file compiled from an ARM assembly source file.

There is only one arm assembly file called linkage_arm.asm written using Microsoft ARM assembler syntax in the project.

I used this command line to compile the assembly file "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_arm\armasm.exe" -o $(IntDir)linkage_arm.obj "%(FullPath)", and the out file could be succesfully generated as $(IntDir)linkage_arm.obj, though some warnings are generated like below.

1>C:\Users\Jason.Geng\Documents\Visual Studio 2015\Projects\Win64e\mupen64plus-core\src\r4300\new_dynarec\arm\linkage_arm.asm(149): warning A4509: This form of conditional instruction is deprecated
1>      ldrge  r12, [r4, r5, lsl #2]

When I built the solution, the errors were generated like below.

2>linkage_arm.obj : error LNK2013: BRANCH20(T) fixup overflow. Target "dyna_linker" is out of range
2>linkage_arm.obj : error LNK2013: BRANCH20(T) fixup overflow. Target "dyna_linker_ds" is out of range
2>linkage_arm.obj : error LNK2013: BRANCH20(T) fixup overflow. Target "tlb_exception" is out of range

Below is the source code of linkage_arm.asm

        MACRO
$name   GLOBAL_FUNCTION_BEGIN
        EXPORT $name [FUNC]
        ALIGN
$name   PROC
        MEND

        MACRO
$name   LOCAL_FUNCTION_BEGIN
        ALIGN
$name   PROC
        MEND

        MACRO
        FUNCTION_END
        ENDP
        MEND

        MACRO
        GLOBAL_VARIABLE $name
        ;ALIGN
        EXPORT $name [DATA]
        MEND

    EXTERN  jump_in
      ...
    EXTERN  invalidate_block

    GLOBAL_VARIABLE extra_memory
      ...
    GLOBAL_VARIABLE memory_map

    AREA    |.bbs|,DATA,ARM,READWRITE,ALIGN=12

extra_memory      SPACE 33554432+64+4+4+4+4+4+4+4+4+8+8+4+2+2+4+4+256+8+8+128+128+128+16+4+4+132+4+256+512+4194304
dynarec_local     EQU extra_memory      + 33554432
next_interupt     EQU dynarec_local     + 64
...
restore_candidate EQU mini_ht           + 256
memory_map        EQU restore_candidate + 512

|.jiptr_offset1|    EQU jump_in-(|.jiptr_pic1|+8)
|.jiptr_offset2|    EQU jump_in-(|.jiptr_pic2|+8)
...
|.htptr_offset1|    EQU hash_table-(|.htptr_pic1|+8)
|.htptr_offset2|    EQU hash_table-(|.htptr_pic2|+8)
|.htptr_offset3|    EQU hash_table-(|.htptr_pic3|+8)
|.dlptr_offset|     EQU dynarec_local+28;-(|.dlptr_pic|+8)
|.outptr_offset|    EQU out-(|.outptr_pic|+8)

    AREA    |.text|,CODE,ARM,READWRITE,ALIGN=2

dyna_linker GLOBAL_FUNCTION_BEGIN
    ; r0 = virtual target address
    ; r1 = instruction to patch
    ldr    r4, =|.tlbptr_offset1|
|.tlbptr_pic1|
    add    r4, pc, r4
    lsr    r5, r0, #12
    mov    r12, r0
    cmp    r0, #0xC0000000
    mov    r6, #4096
    ldrge  r12, [r4, r5, lsl #2]
    mov    r2, #0x80000
    ldr    r3, =|.jiptr_offset1|
|.jiptr_pic1|
    add    r3, pc, r3
    tst    r12, r12
    sub    r6, r6, #1
    moveq  r12, r0
    ldr    r7, [r1]
    eor    r2, r2, r12, lsr #12
    and    r6, r6, r12, lsr #12
    cmp    r2, #2048
    add    r12, r7, #2
    orrcs  r2, r6, #2048
    ldr    r5, [r3, r2, lsl #2]
    lsl    r12, r12, #8
    ; jump_in lookup
|.A1|
    movs   r4, r5
    beq    |.A3|
    ldr    r3, [r5]
    ldr    r5, [r4, #12]
    teq    r3, r0
    bne    |.A1|
    ldr    r3, [r4, #4]
    ldr    r4, [r4, #8]
    tst    r3, r3
    bne    |.A1|
|.A2|
    mov    r5, r1
    add    r1, r1, r12, asr #6
    teq    r1, r4
    moveq  pc, r4 ; Stale i-cache
    bl     add_link
    sub    r2, r4, r5
    and    r1, r7, #0xff000000
    lsl    r2, r2, #6
    sub    r1, r1, #2
    add    r1, r1, r2, lsr #8
    str    r1, [r5]
    mov    pc, r4
|.A3|
    ; hash_table lookup
    cmp    r2, #2048
    ldr    r3, =|.jdptr_offset1|
|.jdptr_pic1|
    add    r3, pc, r3
    eor    r4, r0, r0, lsl #16
    lslcc  r2, r0, #9
    ldr    r6, =|.htptr_offset1|
|.htptr_pic1|
    add    r6, pc, r6
    lsr    r4, r4, #12
    lsrcc  r2, r2, #21
    bic    r4, r4, #15
    ldr    r5, [r3, r2, lsl #2]
    ldr    r7, [r6, r4]!
    teq    r7, r0
    ldreq  pc, [r6, #4]
    ldr    r7, [r6, #8]
    teq    r7, r0
    ldreq  pc, [r6, #12]
    ; jump_dirty lookup
|.A6|
    movs   r4, r5
    beq    |.A8|
    ldr    r3, [r5]
    ldr    r5, [r4, #12]
    teq    r3, r0
    bne    |.A6|
|.A7|
    ldr    r1, [r4, #8]
    ; hash_table insert 
    ldr    r2, [r6]
    ldr    r3, [r6, #4]
    str    r0, [r6]
    str    r1, [r6, #4]
    str    r2, [r6, #8]
    str    r3, [r6, #12]
    mov    pc, r1
|.A8|
    mov    r4, r0
    mov    r5, r1
    bl     new_recompile_block
    tst    r0, r0
    mov    r0, r4
    mov    r1, r5
    beq    dyna_linker
    ; pagefault 
    mov    r1, r0
    mov    r2, #8
    FUNCTION_END

exec_pagefault  LOCAL_FUNCTION_BEGIN
    ; r0 = instruction pointer 
    ; r1 = fault address 
    ; r2 = cause 
    ldr    r3, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    mvn    r6, #0;0xF000000F
    ldr    r4, [fp, #g_cp0_regs+16-dynarec_local] ; Context 
    bic    r6, r6, #0x0F800000
    str    r0, [fp, #g_cp0_regs+56-dynarec_local] ; EPC 
    orr    r3, r3, #2
    str    r1, [fp, #g_cp0_regs+32-dynarec_local] ; BadVAddr 
    bic    r4, r4, r6
    str    r3, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    and    r5, r6, r1, lsr #9
    str    r2, [fp, #g_cp0_regs+52-dynarec_local] ; Cause 
    and    r1, r1, r6, lsl #9
    str    r1, [fp, #g_cp0_regs+40-dynarec_local] ; EntryHi 
    orr    r4, r4, r5
    str    r4, [fp, #g_cp0_regs+16-dynarec_local] ; Context 
    mov    r0, #0x80000000
    bl     get_addr_ht
    mov    pc, r0
    FUNCTION_END

    ; Special dynamic linker for the case where a page fault
    ; may occur in a branch delay slot 
dyna_linker_ds  GLOBAL_FUNCTION_BEGIN
    ; r0 = virtual target address 
    ; r1 = instruction to patch 
    ldr    r4, =|.tlbptr_offset2|
|.tlbptr_pic2|
    add    r4, pc, r4
    lsr    r5, r0, #12
    mov    r12, r0
    cmp    r0, #0xC0000000
    mov    r6, #4096
    ldrge  r12, [r4, r5, lsl #2]
    mov    r2, #0x80000
    ldr    r3, =|.jiptr_offset2|
|.jiptr_pic2|
    add    r3, pc, r3
    tst    r12, r12
    sub    r6, r6, #1
    moveq  r12, r0
    ldr    r7, [r1]
    eor    r2, r2, r12, lsr #12
    and    r6, r6, r12, lsr #12
    cmp    r2, #2048
    add    r12, r7, #2
    orrcs  r2, r6, #2048
    ldr    r5, [r3, r2, lsl #2]
    lsl    r12, r12, #8
    ; jump_in lookup 
|.B1|
    movs   r4, r5
    beq    |.B3|
    ldr    r3, [r5]
    ldr    r5, [r4, #12]
    teq    r3, r0
    bne    |.B1|
    ldr    r3, [r4, #4]
    ldr    r4, [r4, #8]
    tst    r3, r3
    bne    |.B1|
|.B2|
    mov    r5, r1
    add    r1, r1, r12, asr #6
    teq    r1, r4
    moveq  pc, r4 ; Stale i-cache 
    bl     add_link
    sub    r2, r4, r5
    and    r1, r7, #0xff000000
    lsl    r2, r2, #6
    sub    r1, r1, #2
    add    r1, r1, r2, lsr #8
    str    r1, [r5]
    mov    pc, r4
|.B3|
    ; hash_table lookup 
    cmp    r2, #2048
    ldr    r3, =|.jdptr_offset2|
|.jdptr_pic2|
    add    r3, pc, r3
    eor    r4, r0, r0, lsl #16
    lslcc  r2, r0, #9
    ldr    r6, =|.htptr_offset2|
|.htptr_pic2|
    add    r6, pc, r6
    lsr    r4, r4, #12
    lsrcc  r2, r2, #21
    bic    r4, r4, #15
    ldr    r5, [r3, r2, lsl #2]
    ldr    r7, [r6, r4]!
    teq    r7, r0
    ldreq  pc, [r6, #4]
    ldr    r7, [r6, #8]
    teq    r7, r0
    ldreq  pc, [r6, #12]
    ; jump_dirty lookup 
|.B6|
    movs   r4, r5
    beq    |.B8|
    ldr    r3, [r5]
    ldr    r5, [r4, #12]
    teq    r3, r0
    bne    |.B6|
|.B7|
    ldr    r1, [r4, #8]
    ; hash_table insert 
    ldr    r2, [r6]
    ldr    r3, [r6, #4]
    str    r0, [r6]
    str    r1, [r6, #4]
    str    r2, [r6, #8]
    str    r3, [r6, #12]
    mov    pc, r1
|.B8|
    mov    r4, r0
    bic    r0, r0, #7
    mov    r5, r1
    orr    r0, r0, #1
    bl     new_recompile_block
    tst    r0, r0
    mov    r0, r4
    mov    r1, r5
    beq    dyna_linker_ds
    ; pagefault 
    bic    r1, r0, #7
    mov    r2, #0;0x80000008 ; High bit set indicates pagefault in delay slot 
    sub    r0, r1, #4
    b      exec_pagefault
    FUNCTION_END

jump_vaddr_r0   GLOBAL_FUNCTION_BEGIN
    eor    r2, r0, r0, lsl #16
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r1   GLOBAL_FUNCTION_BEGIN
    eor    r2, r1, r1, lsl #16
    mov    r0, r1
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r2   GLOBAL_FUNCTION_BEGIN
    mov    r0, r2
    eor    r2, r2, r2, lsl #16
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r3   GLOBAL_FUNCTION_BEGIN
    eor    r2, r3, r3, lsl #16
    mov    r0, r3
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r4   GLOBAL_FUNCTION_BEGIN
    eor    r2, r4, r4, lsl #16
    mov    r0, r4
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r5   GLOBAL_FUNCTION_BEGIN
    eor    r2, r5, r5, lsl #16
    mov    r0, r5
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r6   GLOBAL_FUNCTION_BEGIN
    eor    r2, r6, r6, lsl #16
    mov    r0, r6
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r8   GLOBAL_FUNCTION_BEGIN
    eor    r2, r8, r8, lsl #16
    mov    r0, r8
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r9   GLOBAL_FUNCTION_BEGIN
    eor    r2, r9, r9, lsl #16
    mov    r0, r9
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r10  GLOBAL_FUNCTION_BEGIN
    eor    r2, r10, r10, lsl #16
    mov    r0, r10
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r12  GLOBAL_FUNCTION_BEGIN
    eor    r2, r12, r12, lsl #16
    mov    r0, r12
    b      jump_vaddr
    FUNCTION_END

jump_vaddr_r7   GLOBAL_FUNCTION_BEGIN
    eor    r2, r7, r7, lsl #16
    add    r0, r7, #0
    FUNCTION_END

jump_vaddr  GLOBAL_FUNCTION_BEGIN
    ldr    r1, =|.htptr_offset3|
|.htptr_pic3|
    add    r1, pc, r1
    mvn    r3, #15
    and    r2, r3, r2, lsr #12
    ldr    r2, [r1, r2]!
    teq    r2, r0
    ldreq  pc, [r1, #4]
    ldr    r2, [r1, #8]
    teq    r2, r0
    ldreq  pc, [r1, #12]
    str    r10, [fp, #cycle_count-dynarec_local]
    bl     get_addr
    ldr    r10, [fp, #cycle_count-dynarec_local]
    mov    pc, r0
    FUNCTION_END

verify_code_ds  GLOBAL_FUNCTION_BEGIN
    str    r8, [fp, #branch_target-dynarec_local]
    FUNCTION_END

verify_code_vm  GLOBAL_FUNCTION_BEGIN
    ; r0 = instruction pointer (virtual address) 
    ; r1 = source (virtual address) 
    ; r2 = target 
    ; r3 = length 
    cmp    r1, #0xC0000000
    blt    verify_code
    add    r12, fp, #memory_map-dynarec_local
    lsr    r4, r1, #12
    add    r5, r1, r3
    sub    r5, #1
    ldr    r6, [r12, r4, lsl #2]
    lsr    r5, r5, #12
    movs   r7, r6
    bmi    |.D5|
    add    r1, r1, r6, lsl #2
    lsl    r6, r6, #2
|.D1|
    add    r4, r4, #1
    teq    r6, r7, lsl #2
    bne    |.D5|
    ldr    r7, [r12, r4, lsl #2]
    cmp    r4, r5
    bls    |.D1|
    FUNCTION_END

verify_code GLOBAL_FUNCTION_BEGIN
    ; r1 = source 
    ; r2 = target 
    ; r3 = length 
    tst    r3, #4
    mov    r4, #0
    add    r3, r1, r3
    mov    r5, #0
    ldrne  r4, [r1], #4
    mov    r12, #0
    ldrne  r5, [r2], #4
    teq    r1, r3
    beq    |.D3|
|.D2|
    ldr    r7, [r1], #4
    eor    r9, r4, r5
    ldr    r8, [r2], #4
    orrs   r9, r9, r12
    bne    |.D4|
    ldr    r4, [r1], #4
    eor    r12, r7, r8
    ldr    r5, [r2], #4
    cmp    r1, r3
    bcc    |.D2|
    teq    r7, r8
|.D3|
    teqeq  r4, r5
|.D4|
    ldr    r8, [fp, #branch_target-dynarec_local]
    moveq  pc, lr
|.D5|
    bl     get_addr
    mov    pc, r0
    FUNCTION_END

cc_interrupt    GLOBAL_FUNCTION_BEGIN
    ldr    r0, [fp, #last_count-dynarec_local]
    mov    r1, #0
    mov    r2, #0x1fc
    add    r10, r0, r10
    str    r1, [fp, #pending_exception-dynarec_local]
    and    r2, r2, r10, lsr #19
    add    r3, fp, #restore_candidate-dynarec_local
    str    r10, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    ldr    r4, [r2, r3]
    mov    r10, lr
    tst    r4, r4
    bne    |.E4|
|.E1|
    bl     gen_interupt
    mov    lr, r10
    ldr    r10, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    ldr    r0, [fp, #next_interupt-dynarec_local]
    ldr    r1, [fp, #pending_exception-dynarec_local]
    ldr    r2, [fp, #stop-dynarec_local]
    str    r0, [fp, #last_count-dynarec_local]
    sub    r10, r10, r0
    tst    r2, r2
    bne    |.E3|
    tst    r1, r1
    moveq  pc, lr
|.E2|
    ldr    r0, [fp, #pcaddr-dynarec_local]
    bl     get_addr_ht
    mov    pc, r0
|.E3|
    add    r12, fp, #28
    ldmia  r12, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
|.E4|
    ; Move 'dirty' blocks to the 'clean' list 
    lsl    r5, r2, #3
    str    r1, [r2, r3]
    mov    r6,    #0
|.E5|
    lsrs   r4, r4, #1
    add    r0, r5, r6
    blcs   clean_blocks
    add    r6, r6, #1
    tst    r6, #31
    bne    |.E5|
    b      |.E1|
    FUNCTION_END

do_interrupt    GLOBAL_FUNCTION_BEGIN
    ldr    r0, [fp, #pcaddr-dynarec_local]
    bl     get_addr_ht
    ldr    r1, [fp, #next_interupt-dynarec_local]
    ldr    r10, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    str    r1, [fp, #last_count-dynarec_local]
    sub    r10, r10, r1
    add    r10, r10, #2
    mov    pc, r0
    FUNCTION_END

fp_exception    GLOBAL_FUNCTION_BEGIN
    mov    r2, #0x10000000
|.E7|
    ldr    r1, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    mov    r3, #0x80000000
    str    r0, [fp, #g_cp0_regs+56-dynarec_local] ; EPC 
    orr    r1, #2
    add    r2, r2, #0x2c
    str    r1, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    str    r2, [fp, #g_cp0_regs+52-dynarec_local] ; Cause 
    add    r0, r3, #0x180
    bl     get_addr_ht
    mov    pc, r0
    FUNCTION_END

fp_exception_ds GLOBAL_FUNCTION_BEGIN
    mov    r2, #0x90000000 ; Set high bit if delay slot 
    b      |.E7|
    FUNCTION_END

jump_syscall    GLOBAL_FUNCTION_BEGIN
    ldr    r1, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    mov    r3, #0x80000000
    str    r0, [fp, #g_cp0_regs+56-dynarec_local] ; EPC 
    orr    r1, #2
    mov    r2, #0x20
    str    r1, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    str    r2, [fp, #g_cp0_regs+52-dynarec_local] ; Cause 
    add    r0, r3, #0x180
    bl     get_addr_ht
    mov    pc, r0
    FUNCTION_END

indirect_jump_indexed   GLOBAL_FUNCTION_BEGIN
    ldr    r0, [r0, r1, lsl #2]
    FUNCTION_END

indirect_jump   GLOBAL_FUNCTION_BEGIN
    ldr    r12, [fp, #last_count-dynarec_local]
    add    r2, r2, r12 
    str    r2, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    mov    pc, r0
    FUNCTION_END

jump_eret   GLOBAL_FUNCTION_BEGIN
    ldr    r1, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    ldr    r0, [fp, #last_count-dynarec_local]
    bic    r1, r1, #2
    add    r10, r0, r10
    str    r1, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    str    r10, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    bl     check_interupt
    ldr    r1, [fp, #next_interupt-dynarec_local]
    ldr    r0, [fp, #g_cp0_regs+56-dynarec_local] ; EPC 
    str    r1, [fp, #last_count-dynarec_local]
    subs   r10, r10, r1
    bpl    |.E11|
|.E8|
    add    r6, fp, #reg+256-dynarec_local
    mov    r5, #248
    mov    r1, #0
|.E9|
    ldr    r2, [r6, #-8]!
    ldr    r3, [r6, #4]
    eor    r3, r3, r2, asr #31
    subs   r3, r3, #1
    adc    r1, r1, r1
    subs   r5, r5, #8
    bne    |.E9|
    ldr    r2, [fp, #hi-dynarec_local]
    ldr    r3, [fp, #hi+4-dynarec_local]
    eors   r3, r3, r2, asr #31
    ldr    r2, [fp, #lo-dynarec_local]
    ldreq  r3, [fp, #lo+4-dynarec_local]
    eoreq  r3, r3, r2, asr #31
    subs   r3, r3, #1
    adc    r1, r1, r1
    bl     get_addr_32
    mov    pc, r0
|.E11|
    str    r0, [fp, #pcaddr-dynarec_local]
    bl     cc_interrupt
    ldr    r0, [fp, #pcaddr-dynarec_local]
    b      |.E8|
    FUNCTION_END

new_dyna_start  GLOBAL_FUNCTION_BEGIN
    ldr    r12, =|.dlptr_offset|
|.dlptr_pic|
    add    r12, pc, r12
    ldr    r1, =|.outptr_offset|
|.outptr_pic|
    add    r1, pc, r1
    mov    r0, #0xa4000000
    stmia  r12, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
    sub    fp, r12, #28
    ldr    r4, [r1]
    add    r0, r0, #0x40
    bl     new_recompile_block
    ldr    r0, [fp, #next_interupt-dynarec_local]
    ldr    r10, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    str    r0, [fp, #last_count-dynarec_local]
    sub    r10, r10, r0
    mov    pc, r4
    FUNCTION_END

invalidate_addr_r0  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r0, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r1  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r1, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r2  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r2, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r3  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r3, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r4  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r4, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r5  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r5, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r6  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r6, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r7  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r7, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r8  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r8, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r9  GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r9, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r10 GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r10, #12    
    b      invalidate_addr_call
    FUNCTION_END

invalidate_addr_r12 GLOBAL_FUNCTION_BEGIN
    stmia  fp, {r0, r1, r2, r3, r12, lr}
    lsr    r0, r12, #12
    FUNCTION_END    

invalidate_addr_call    LOCAL_FUNCTION_BEGIN
    bl     invalidate_block
    ldmia  fp, {r0, r1, r2, r3, r12, pc}
    FUNCTION_END

write_rdram_new GLOBAL_FUNCTION_BEGIN
    ldr    r3, [fp, #ram_offset-dynarec_local]
    ldr    r2, [fp, #address-dynarec_local]
    ldr    r0, [fp, #cpu_word-dynarec_local]
    str    r0, [r2, r3, lsl #2]
    b      |.E12|
    FUNCTION_END

write_rdramb_new    GLOBAL_FUNCTION_BEGIN
    ldr    r3, [fp, #ram_offset-dynarec_local]
    ldr    r2, [fp, #address-dynarec_local]
    ldrb   r0, [fp, #cpu_byte-dynarec_local]
    eor    r2, r2, #3
    strb   r0, [r2, r3, lsl #2]
    b      |.E12|
    FUNCTION_END

write_rdramh_new    GLOBAL_FUNCTION_BEGIN
    ldr    r3, [fp, #ram_offset-dynarec_local]
    ldr    r2, [fp, #address-dynarec_local]
    ldrh   r0, [fp, #cpu_hword-dynarec_local]
    eor    r2, r2, #2
    lsl    r3, r3, #2
    strh   r0, [r2, r3]
    b      |.E12|
    FUNCTION_END

write_rdramd_new    GLOBAL_FUNCTION_BEGIN
    ldr    r3, [fp, #ram_offset-dynarec_local]
    ldr    r2, [fp, #address-dynarec_local]
    ;    ldrd    r0, [fp, #cpu_dword-dynarec_local]
    ldr    r0, [fp, #cpu_dword-dynarec_local]
    ldr    r1, [fp, #cpu_dword+4-dynarec_local]
    add    r3, r2, r3, lsl #2
    str    r0, [r3, #4]
    str    r1, [r3]
    b      |.E12|
    FUNCTION_END

do_invalidate   LOCAL_FUNCTION_BEGIN
    ldr    r2, [fp, #address-dynarec_local]
|.E12|
    ldr    r1, [fp, #invc_ptr-dynarec_local]
    lsr    r0, r2, #12
    ldrb   r2, [r1, r0]
    tst    r2, r2
    beq    invalidate_block
    mov    pc, lr
    FUNCTION_END

read_nomem_new  GLOBAL_FUNCTION_BEGIN
    ldr    r2, [fp, #address-dynarec_local]
    add    r12, fp, #memory_map-dynarec_local
    lsr    r0, r2, #12
    ldr    r12, [r12, r0, lsl #2]
    mov    r1, #8
    tst    r12, r12
    bmi    tlb_exception
    ldr    r0, [r2, r12, lsl #2]
    str    r0, [fp, #readmem_dword-dynarec_local]
    mov    pc, lr
    FUNCTION_END

read_nomemb_new GLOBAL_FUNCTION_BEGIN
    ldr    r2, [fp, #address-dynarec_local]
    add    r12, fp, #memory_map-dynarec_local
    lsr    r0, r2, #12
    ldr    r12, [r12, r0, lsl #2]
    mov    r1, #8
    tst    r12, r12
    bmi    tlb_exception
    eor    r2, r2, #3
    ldrb   r0, [r2, r12, lsl #2]
    str    r0, [fp, #readmem_dword-dynarec_local]
    mov    pc, lr
    FUNCTION_END

read_nomemh_new GLOBAL_FUNCTION_BEGIN
    ldr    r2, [fp, #address-dynarec_local]
    add    r12, fp, #memory_map-dynarec_local
    lsr    r0, r2, #12
    ldr    r12, [r12, r0, lsl #2]
    mov    r1, #8
    tst    r12, r12
    bmi    tlb_exception
    lsl    r12, r12, #2
    eor    r2, r2, #2
    ldrh   r0, [r2, r12]
    str    r0, [fp, #readmem_dword-dynarec_local]
    mov    pc, lr
    FUNCTION_END

read_nomemd_new GLOBAL_FUNCTION_BEGIN
    ldr    r2, [fp, #address-dynarec_local]
    add    r12, fp, #memory_map-dynarec_local
    lsr    r0, r2, #12
    ldr    r12, [r12, r0, lsl #2]
    mov    r1, #8
    tst    r12, r12
    bmi    tlb_exception
    lsl    r12, r12, #2
    ;    ldrd    r0, [r2, r12]
    add    r3, r2, #4
    ldr    r0, [r2, r12]
    ldr    r1, [r3, r12]
    str    r0, [fp, #readmem_dword+4-dynarec_local]
    str    r1, [fp, #readmem_dword-dynarec_local]
    mov    pc, lr
    FUNCTION_END

write_nomem_new GLOBAL_FUNCTION_BEGIN
    str    r3, [fp, #24]
    str    lr, [fp, #28]
    bl     do_invalidate
    ldr    r2, [fp, #address-dynarec_local]
    add    r12, fp, #memory_map-dynarec_local
    ldr    lr, [fp, #28]
    lsr    r0, r2, #12
    ldr    r3, [fp, #24]
    ldr    r12, [r12, r0, lsl #2]
    mov    r1, #0xc
    tst    r12, #0x40000000
    bne    tlb_exception
    ldr    r0, [fp, #cpu_word-dynarec_local]
    str    r0, [r2, r12, lsl #2]
    mov    pc, lr
    FUNCTION_END

...

tlb_exception   LOCAL_FUNCTION_BEGIN
    ; r1 = cause 
    ; r2 = address 
    ; r3 = instr addr/flags 
    ldr    r4, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    add    r5, fp, #memory_map-dynarec_local
    lsr    r6, r3, #12
    orr    r1, r1, r3, lsl #31
    orr    r4, r4, #2
    ldr    r7, [r5, r6, lsl #2]
    bic    r8, r3, #3
    str    r4, [fp, #g_cp0_regs+48-dynarec_local] ; Status 
    mov    r6, #0x6000000
    str    r1, [fp, #g_cp0_regs+52-dynarec_local] ; Cause 
    orr    r6, r6, #0x22
    ldr    r0, [r8, r7, lsl #2]
    add    r4, r8, r1, asr #29
    add    r5, fp, #reg-dynarec_local
    str    r4, [fp, #g_cp0_regs+56-dynarec_local] ; EPC 
    mov    r7, #0xf8
    ldr    r8, [fp, #g_cp0_regs+16-dynarec_local] ; Context 
    lsl    r1, r0, #16
    lsr    r4, r0,    #26
    and    r7, r7, r0, lsr #18
    mvn    r9, #0;0xF000000F
    sub    r2, r2, r1, asr #16
    bic    r9, r9, #0x0F800000
    rors   r6, r6, r4
    mov    r0, #0x80000000
    ldrcs  r2, [r5, r7]
    bic    r8, r8, r9
    tst    r3, #2
    str    r2, [r5, r7]
    add    r4, r2, r1, asr #16
    add    r6, fp, #reg+4-dynarec_local
    asr    r3, r2, #31
    str    r4, [fp, #g_cp0_regs+32-dynarec_local] ; BadVAddr 
    add    r0, r0, #0x180
    and    r4, r9, r4, lsr #9
    strne  r3, [r6, r7]
    orr    r8, r8, r4
    str    r8, [fp, #g_cp0_regs+16-dynarec_local] ; Context 
    bl     get_addr_ht
    ldr    r1, [fp, #next_interupt-dynarec_local]
    ldr    r10, [fp, #g_cp0_regs+36-dynarec_local] ; Count 
    str    r1, [fp, #last_count-dynarec_local]
    sub    r10, r10, r1
    mov    pc, r0
    FUNCTION_END    

breakpoint  GLOBAL_FUNCTION_BEGIN
    ; Set breakpoint here for debugging 
    mov    pc, lr
    FUNCTION_END

__clear_cache_bugfix    GLOBAL_FUNCTION_BEGIN
    ;  The following bug-fix implements __clear_cache (missing in Android)  
    push   {r7, lr}
    mov    r2, #0
    mov    r7, #0x2
    add    r7, r7, #0xf0000
    svc    0x00000000
    pop    {r7, pc}
    FUNCTION_END

    END

The link issue can be solved by setting AREA from ARM to THUMB, which directs the assembler to compile the file as Thumb code. However, there is another compile error. I uncommented (|.dlptr_pic|+8) in data AREA and got the error below when compiling the file.

ERROR A2009 '-' can not be applied; different sections or different base registers

This error points to the line which is modified.

|.dlptr_offset| EQU dynarec_local+28-(|.dlptr_pic|+8)

Jason Geng
  • 85
  • 1
  • 11
  • I'm not sure why, but the error indicates that one (or more?) of the Bcc instructions that jump to the given label is more than 1 MB away. That's more than Thumb 2 encoding of these instructions allows. You may be able to work around the problem by replacing the offending instruction with an unconditional branch (which has a 16 MB range), using the opposite conditional branch to jump over it. – Ross Ridge Dec 14 '15 at 04:49
  • Hmm... after looking at the Mupen64plus source code it appears that this code can never work. You'll need to rewrite the the recompiler to generate Thumb instructions, not ARM instructions. Windows on ARM only supports the Thumb 2 instruction set. https://msdn.microsoft.com/en-ca/library/dn736986.aspx#Anchor_3 – Ross Ridge Dec 14 '15 at 05:05
  • @RossRidge Do you mean the syntax is not Thumb so the code needs rewrite? But the file can be successfully compiled as Thumb 2 code by setting AREA from ARM to THUMB. – Jason Geng Dec 14 '15 at 21:36
  • Your fundamental problem here is that this code is meant to work with the ARM code generated by `assem_arm.c`. It dynamically recompiles MIPS instructions into ARM instructions. For this code to work under Windows you first need to write an `assem_thumb2.c` recompiler that generates Thumb 2 instructions. Only then would it make sense to try to create a `linkage_thumb2.asm` based on `linkage_arm.S`. You also probably also have to change a number of Linux/Android specific things so that it'll work on Windows. Alternatively you can disable the recompiler completely so this code isn't used. – Ross Ridge Dec 14 '15 at 22:16
  • @RossRidge I checked the difference between Thumb-2 UAL and ARM pre-UAL syntax in ARM Architecture Reference Manual. I think your answer is right. It helps me a lot. Thanks. – Jason Geng Dec 15 '15 at 21:58
  • Well, it's not a question of assembly syntax. The file `assem_arm.c` generates ARM machine code which is encoded differently than Thumb machine code. If you to try execute ARM machine code under Windows it will eventually crash on return from an interrupt or exception. The recompiled code isn't assembled. The file `assem_arm.c` converts MIPS machine code to ARM machine code, storing the result in memory which is later executed. Your problem is the same as if you're were trying to use the x86 "dynarec" code. The C code would compile correctly, but it would convert MIPS to x86 and crash. – Ross Ridge Dec 15 '15 at 22:31
  • @RossRidge Yes. Windows on ARM only supports Thumb-2. I have to rewrite the assem_arm.c so that it generates thumb-2 instructions instead of ARM instructions which is used currently. I understand your point but maybe I didn't make myself clear. – Jason Geng Dec 15 '15 at 22:45
  • @RossRidge According to this port https://github.com/hrydgard/ppsspp/pull/8315, it seems that ARM code can be run on W10M though having memory access failures. In the past months, I have read the ARM and Thumb2 manual, but found it's really hard to implement the dynarec. It's difficult to compute the PC increment amount dynamically, because thumb2 code has both 4 bytes length instructions and 2 byte ones making it unsure. – Jason Geng Apr 03 '16 at 22:06
  • @RossRidge I prefer to insist on the dynarec with ARM code. I rewrote linkage_arm.S with MS format and build successfully, but finally having stack overflow issue very similar to that in above link. I don't know how to solve this. Please help. – Jason Geng Apr 03 '16 at 22:06
  • Since instruction lengths on the x86 can vary much more than Thumb-2 instruction lengths you should be able to look at the x86 JIT to find how you can handle that problem. I can't help you with your stack overflow problem except to suggest that its caused because Windows doesn't support the ARM instruction set. I'd assume the PPSSPP memory access failures are also because of this. Note that ARM code can appear to work but will eventually crash because Windows forces Thumb-mode on interrupt returns. See: http://stackoverflow.com/questions/18344419/is-arm-not-thumb-supported-on-winphone8-at-all – Ross Ridge Apr 03 '16 at 23:02
  • @RossRidge Isn't x86 fixed 4 bytes instructions? – Jason Geng Apr 03 '16 at 23:25
  • @RossRidge In this post https://github.com/hrydgard/ppsspp/issues/7854, the person whose id is kika123 said the ARM mode works on the W10M. I don't know if he's right, but I know the android n64 emulator can be run on Astoria. – Jason Geng Apr 03 '16 at 23:30
  • x86 instructions can vary from 1 to 15 bytes long. If Microsoft have patched out their force Thumb-mode code for good then you should be OK with ARM code, but you'd be risking Microsoft changing their mind again. – Ross Ridge Apr 04 '16 at 00:07
  • @RossRidge The weird thing is if I use "beq invalidate_block", the linker reports fixup overflow, whereas I use "b invalidate_block", it links with no issue. I wonder the MS ARM assembler can't correctly compile an instruction with condition code. – Jason Geng Apr 08 '16 at 01:29
  • As I explained in my first comment, unconditional branches have a longer range than conditional branches. There's nothing wrong with the Microsoft assembler. – Ross Ridge Apr 08 '16 at 01:46
  • @RossRidge The source code is compiled as ARM code so the B/BL instruction should always have +32M/-32M bytes limit regardless of there is a condition code. It's only the condition code bits changing from 0001 to 1110. The offset bits shouldn't change at all. Sorry I forget to tell you I am trying the ARM mode. – Jason Geng Apr 08 '16 at 02:11
  • It seems that the linker can't correctly compute the offset if there's a condition code in the instruction. – Jason Geng Apr 08 '16 at 02:20
  • I doubt that. What's the exact error message you're getting? – Ross Ridge Apr 08 '16 at 02:59
  • @RossRidge : 1. Error LNK2013: BRANCH24 (A) fixup overflow. Target "invalidate_block" is out of range 2. Error LNK2008 Fixup target is not aligned "{thunk} invalidate_block". Actually there are two lines happening to this linker errors, which are "blcs clean_blocks" and "beq invalidate_block". Both lines get this same error. Once I remove the condition code, the error disappears and it builds successfully. I can send you the source file if you'd like. – Jason Geng Apr 08 '16 at 03:38
  • This is the command I use for building the file. "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_arm\armasm.exe" -o $(IntDir)linkage_arm.obj "%(FullPath)" -32. The -32 directs the assembler to assemble the source code as ARM instructions. – Jason Geng Apr 08 '16 at 03:44
  • The problem is that `invalidate_block` is Thumb code and you can't switch from ARM mode to Thumb mode with the B/BL instructions. The linker will fix unconditional BL instructions by changing them to BLX instructions, but it won't fix B instructions or conditional BL instructions. – Ross Ridge Apr 08 '16 at 06:48
  • @RossRidge The code can't switch between ARM and Thumb with B, but it should be able with BL. Besides, I also tried with BLX as I remember, which doesn't work either. – Jason Geng Apr 08 '16 at 07:34
  • No, the BL instruction cannot switch between ARM and Thumb modes, however as I said the linker automatically changes it to a BLX instruction if necessary. – Ross Ridge Apr 08 '16 at 07:54
  • Maybe I am wrong. I just read it today in a ARM Architecture Reference Manual randomly found on the web, which says BL and BLX can switch between ARM and Thumb mode as per the last bit of the branch target address. I just tried with blxcs clean_blocks, and unluckily got the same linker errors, which I don't understand the cause. – Jason Geng Apr 08 '16 at 08:07
  • I also tried to change "beq invalidate_block" to "bxeq invalidate_block", but the assembly reports an error like this "A2502 operand 1: Expected register", which prompts me that I should load invalidate_block's address into a register firstly and then replace invalidate_block in the instruction with that register. – Jason Geng Apr 08 '16 at 08:07
  • Yes, the BX instruction only allows a register operand, there's no `BX label` instruction. Instead you can use `blxeq invalidate_block` if you don't mind it clobbering LR, otherwise you can use `ldreq pc, =invalidate_block`. – Ross Ridge Apr 08 '16 at 09:14
  • I realized there are only BLX{cond} Rm and BLX label, so I should put the clean_blocks's address in a register firstly before blxcs to it. – Jason Geng Apr 08 '16 at 10:42
  • @RossRidge I think I should increment invalidate_block by 1 before ldreq pc, =invalidate_block to exchange the instruction set. – Jason Geng Apr 08 '16 at 20:50
  • The linker does that automatically if `invalidate_block` is in a Thumb section. – Ross Ridge Apr 08 '16 at 21:00
  • @RossRidge If use ldr r7, =clean_blocks and blxcs r7, will the linker automatically increment clean_blocks by 1? Or should I use ldr r7, =clean_blocks+1? – Jason Geng Apr 08 '16 at 21:17
  • You can't blxeq invalidate_block, since there is no BLX{cond} label. – Jason Geng Apr 08 '16 at 21:43
  • What you said about `BLXEQ label` is correct, there's no such instruction so it was a mistake for me to suggest it. I don't know what happens when you use LDR to load the address of a Thumb function into a register other than the PC register. – Ross Ridge Apr 08 '16 at 22:13
  • @RossRidge Thanks for your suggestion. I can successfully link the program now. So I want to close this question. Can you post an answer for me to select it? – Jason Geng Apr 09 '16 at 05:56
  • @RossRidge I am also facing another pointer issue while running the app. Can you help me with that?http://stackoverflow.com/questions/36513553/assembly-allocated-memory-pointer-automatically-changed-to-0xffffffff-in-an-uwp – Jason Geng Apr 09 '16 at 07:06
  • @RossRidge Have to bother you again. Can you help solve this?http://stackoverflow.com/questions/36527882/dynamically-generated-code-execute-in-wrong-address – Jason Geng Apr 10 '16 at 09:12

1 Answers1

0

The linkage_arm.asm should be built into ARM code instead of Thumb-2 code since the assembly language used in the file is in the ARM form. I pass option -32 to armasm.exe such that ARM machine code are generated. Then the linker can link with no issue.

For this error

ERROR A2009    '-' can not be applied; different sections or different base registers

It seems that MS's assembler can't deal with label's computation if they are within different sections. I work around this issue by loading the address of dynarec_local+28 directly into r12, which gets the same effect without assembling error.

;|.dlptr_offset|        DCD dynarec_local+28-(|.dlptr_pic|+8)

    ;ldr    r12, |.dlptr_offset|
;|.dlptr_pic|
    ;add    r12, pc, r12
    ldr    r12, =dynarec_local+28

There are several grammar mistakes in the current code, which are also corrected in the process of resolving the asked question. Since they are not the asked question, I didn't post the fixing details. Anybody having interest can check the comments between Ross and me for reference.

Last but not least, I get much knowledge from discussion with Ross who is a veteran of MS's ARM assembly and linker. Thanks again.

Jason Geng
  • 85
  • 1
  • 11