1

for an reverse engineering project i want to split an IDA Pro produced asm file into multiple segment files which must result in binary identical executeables

binary equal means to me 100% equal: segment-starts, ordering, sizes, opcodes etc. - because its from a reversed exe and only some of the symbols are detected by IDA, mostly just variable or function offsets, etc. so i can't rely on the assembler own ordering of segments,symbols etc. - it needs to be 100% exact or else i introduce unfindable errors into the reversed code, it is ok if the assembler acts stupid, as long as the result is equal

im using masm and the old microsoft linker:

ml.exe: Microsoft (R) Macro Assembler Version 14.16.27032.1, from VS2017 community endition
link.exe: Microsoft (R) Segmented Executable Linker  Version 5.60.339 Dec  5 1994, latest 16bit version

masm generates different code for multi.exe if i leave out the .model directive

single.asm

.model medium
.386

seg000 segment para public use16 
text db 'Hello World!',0ah,0dh,'$'
seg000 ends

seg001 segment para public use16 

start proc
  mov ax,seg seg000
  mov ds,ax
  push ax
  pop ax
  call print
  mov ax,4c00h
  int 21h
start endp

seg001 ends

seg002 segment para public use16

print proc far
  mov dx,offset text
  mov ah,09h
  int 21h
  retf
print endp

seg002 ends

seg003 segment para use16 stack
  db 256 dup (?)
seg003 ends

end start

multi asm file version (base on help from user rkhb)

segments.inc

seg000 segment para public use16 
seg000 ends

seg001 segment para public use16 
seg001 ends

seg002 segment para public use16
seg002 ends

seg003 segment para use16 stack
seg003 ends

seg000.asm

.model medium
.386
include segments.inc

public text

seg000 segment
  text db 'Hello World!',0ah,0dh,'$'
seg000 ends

end

seg001.asm

.model medium
.386
include segments.inc

extern print:FAR

seg001 segment

start proc
  mov ax, seg000
  mov ds,ax
  push ax
  pop ax
  call print
  mov ax,4c00h
  int 21h
start endp

seg001 ends

end start

seg002.asm

.model medium
.386
include segments.inc

extern text:BYTE

seg002 segment

print proc far
   mov dx,offset text
   mov ah,09h
   int 21h
   retf
print endp

seg002 ends

end

seg003.asm

.model medium
.386
include segments.inc

seg003 segment
  db 256 dup (?)
seg003 ends

end

build_single.cmd

ml.exe /c /omf single.asm
link.exe /MAP single.obj,,,,,

build_multi.cmd

ml.exe /c /omf seg000.asm
ml.exe /c /omf seg001.asm
ml.exe /c /omf seg002.asm
ml.exe /c /omf seg003.asm
link.exe /MAP /L seg000.obj seg001.obj seg002.obj seg003.obj,multi.exe,multi.map,,,

calling build_single and build_multi resulting in nearly equal executables but the multi.exe is 10 bytes larger (filled with 0 at the end)

hex-diff: left is single.exe

enter image description here

comparing the IDA Pro results shows that only the segment alignment before the stack segment is different (in single.exe is the alignment part of the uninitilized area, in multi.exe is it a 0 filled part of the image)

single.ida.asm

    .286
    .model medium

; ===========================================================================

; Segment type: Pure data
dseg    segment para public 'DATA'
    assume cs:dseg
aHelloWorld db 'Hello World!',0Ah
    db 0Dh,'$',0
dseg    ends

; ===========================================================================

; Segment type: Pure code
seg001    segment byte public 'CODE'
    assume cs:seg001
    assume es:nothing, ss:seg003, ds:nothing

; =============== S U B R O U T I N E =======================================

; Attributes: noreturn

    public start
start   proc near
    mov ax, seg dseg
    mov ds, ax
    assume ds:dseg
    push  ax
    pop ax
    call  sub_10030
    mov ax, 4C00h
    int 21h   ; DOS - 2+ - QUIT WITH EXIT CODE (EXIT)
start   endp      ; AL = exit code

; ---------------------------------------------------------------------------
    align 10h
seg001    ends

; ===========================================================================

; Segment type: Pure code
seg002    segment byte public 'CODE'
    assume cs:seg002
    assume es:nothing, ss:nothing, ds:dseg

; =============== S U B R O U T I N E =======================================


sub_10030 proc far    ; CODE XREF: start+7P
    mov dx, 0
    mov ah, 9
    int 21h   ; DOS - PRINT STRING
          ; DS:DX -> string terminated by "$"
    retf
sub_10030 endp

; ---------------------------------------------------------------------------
    db 8 dup(?) ; !!!DIFFERENCE!!!
seg002    ends

; ===========================================================================

; Segment type: Uninitialized
seg003    segment byte stack 'STACK'
    assume cs:seg003
    assume es:nothing, ss:nothing, ds:dseg
    db 100h dup(?)
seg003    ends


    end start

multi.ida.asm

    .286
    .model medium

; ===========================================================================

; Segment type: Pure data
dseg    segment para public 'DATA'
    assume cs:dseg
aHelloWorld db 'Hello World!',0Ah
    db 0Dh,'$',0
dseg    ends

; ===========================================================================

; Segment type: Pure code
seg001    segment byte public 'CODE'
    assume cs:seg001
    assume es:nothing, ss:seg003, ds:nothing

; =============== S U B R O U T I N E =======================================

; Attributes: noreturn

    public start
start   proc near
    mov ax, seg dseg
    mov ds, ax
    assume ds:dseg
    push  ax
    pop ax
    call  sub_10030
    mov ax, 4C00h
    int 21h   ; DOS - 2+ - QUIT WITH EXIT CODE (EXIT)
start   endp      ; AL = exit code

; ---------------------------------------------------------------------------
    align 10h
seg001    ends

; ===========================================================================

; Segment type: Pure code
seg002    segment byte public 'CODE'
    assume cs:seg002
    assume es:nothing, ss:nothing, ds:dseg

; =============== S U B R O U T I N E =======================================


sub_10030 proc far    ; CODE XREF: start+7P
    mov dx, 0
    mov ah, 9
    int 21h   ; DOS - PRINT STRING
          ; DS:DX -> string terminated by "$"
    retf
sub_10030 endp

; ---------------------------------------------------------------------------
    align 10h ; !!!DIFFERENCE!!!
seg002    ends

; ===========================================================================

; Segment type: Uninitialized
seg003    segment byte stack 'STACK'
    assume cs:seg003
    assume es:nothing, ss:nothing, ds:dseg
    db 100h dup(?)
seg003    ends


    end start

the difference is:

db 8 dup(?)

and

align 10h

any ideas how to get multi.exe binary equal to single.exe?

the result seems to be linker related

link.exe: the described 8 byte difference
wlink.exe: the described 8 byte difference (added 'STACK' to stack segment or else ss:sp=0:0 in exe header)
optlink.exe: 100% equal
ulink.exe: bug with SP not correctly set (already reported)

IDA shows that there is a "align 10h" at the end of the seg002 in single.obj and (multi)seg002.obj, but then different in the exe image is materialized by the linker, or?

images get identical if i force the stack to become part of the exe images with "db 256 dup (0)" in the seg003 stack segment, then it does not make sense for the linker to ignore the align in multi.exe - but that is not 100% identical without changing also the orginal(single.asm)

llm
  • 557
  • 3
  • 15
  • Have you tried using "BYTE" segment alignment for the stack segment? – Ross Ridge Sep 12 '19 at 21:14
  • tried it, but it makes no difference - but using para is OK - normal situation is that i get an file from IDA and the splitted version result of it should be equal, maybe i find the relevant linker flag or a way to tell the compiler to always materialize segment alignment in the image – llm Sep 13 '19 at 03:42

0 Answers0