0

I've been studying the delayed-load (delayimp) pipeline as a possible backend for the missing RPATH functionality on Windows, by the following example:

#include <stdio.h>

int __declspec(dllimport) foo(int arg);

int main(int argc, char* argv[])
{
    printf("foo() = %d\n", foo(foo(argc)));
    return 0;
}

Both GNU and LLVM implement delayed loading similarly with the "dlltool" (yet, LLVM's dlltool seems to have merged into "ld-link"). Essentially, the task performed in LLVM's lld/COFF/DLL.cpp or BinUtil's dlltool.c is two-fold:

  1. Generate jump table stub for a delayed-load function (see example below)
  2. Generate a trampoline that shall deploy the __delayLoadHelper2 code (see example below)

Upon the successful binding, the __delayLoadHelper2 seems to write a resolved function address right into the executable code section:

extern "C"
FARPROC WINAPI
__delayLoadHelper2(
    PCImgDelayDescr     pidd,
    FARPROC *           ppfnIATEntry
    ) {
...
SetEntryHookBypass:
    *ppfnIATEntry = pfnRet; // access violation
...
}

In order for executable image modification, Microsoft has developed some fancy functions that temporarily add write permissions to the corresponding memory region.

Now the question is: the code to be modified is within the jump table stub that goes into ".idata" section, and it fails to get write permissions:

        if ((Characteristics & IMAGE_SCN_MEM_WRITE) == 0) {

            //
            // This delay load helper module does not support merging the delay
            // load section to a read only section because memory management
            // would not guarantee that there is commit available - and thus a
            // low memory failure path where the delay load failure hook could
            // not be safely invoked (the delay load section would still be
            // read only) might be encountered.
            //
            // It is a build time configuration problem to produce such a
            // binary so abort here and now so that the problem can be
            // identified & fixed.
            //

/* Exception thrown at 0x000000013F3B3F3F in dlltool_test_executable.exe: 0xC0000005: Access violation reading */
            __fastfail(FAST_FAIL_DLOAD_PROTECTION_FAILURE);
        }

So, currently the hard-binding does not work, and gives "write access violation". I'm wondering what kind of binary configuration am I missing here?

My test config: LLVM upstream from github, BinUtils upstream from git, MSVC2019, Windows 7.

$ cat trampoline.s
# Import trampoline
        .section        .text
        .global __tailMerge_C__Users_marcusmae_dlltool_build_import_test_lib
__tailMerge_C__Users_marcusmae_dlltool_build_import_test_lib:
        pushq %rcx
        pushq %rdx
        pushq %r8
        pushq %r9
        subq  $40, %rsp
        movq  %rax, %rdx
        leaq  __DELAY_IMPORT_DESCRIPTOR_C__Users_marcusmae_dlltool_build_import_test_lib(%rip), %rcx
        call __delayLoadHelper2
        addq  $40, %rsp
        popq %r9
        popq %r8
        popq %rdx
        popq %rcx
        jmp *%rax

# DELAY_IMPORT_DESCRIPTOR
.section        .text$2
.global __DELAY_IMPORT_DESCRIPTOR_C__Users_marcusmae_dlltool_build_import_test_lib
__DELAY_IMPORT_DESCRIPTOR_C__Users_marcusmae_dlltool_build_import_test_lib:
        .long 1 # grAttrs
        .rva    __C__Users_marcusmae_dlltool_build_import_test_lib_iname        # rvaDLLName
        .rva    __DLL_HANDLE_C__Users_marcusmae_dlltool_build_import_test_lib   # rvaHmod
        .rva    __IAT_C__Users_marcusmae_dlltool_build_import_test_lib  # rvaIAT
        .rva    __INT_C__Users_marcusmae_dlltool_build_import_test_lib  # rvaINT
        .long   0       # rvaBoundIAT
        .long   0       # rvaUnloadIAT
        .long   0       # dwTimeStamp

.section .data
__DLL_HANDLE_C__Users_marcusmae_dlltool_build_import_test_lib:
        .long   0       # Handle
        .long   0

#Stuff for compatibility
        .section        .idata$5
        .long   0
        .long   0
__IAT_C__Users_marcusmae_dlltool_build_import_test_lib:
        .section        .idata$4
        .long   0
        .long   0
        .section        .idata$4
__INT_C__Users_marcusmae_dlltool_build_import_test_lib:
        .section        .idata$2
$ objdump -d dorks00000.o

dorks00000.o:     file format pe-x86-64


Disassembly of section .text:

0000000000000000 <foo>:
   0:   ff 25 00 00 00 00       jmpq   *0x0(%rip)        # 6 <foo+0x6>
   6:   48 8d 05 00 00 00 00    lea    0x0(%rip),%rax        # d <foo+0xd>
   d:   e9 00 00 00 00          jmpq   12 <foo+0x12>
        ...
Dmitry Mikushin
  • 1,478
  • 15
  • 16

1 Answers1

2

So you are generating the delay import structures using GNU dlltool, but linking against it with LLD or MS link.exe?

I think the difference here lies in the fact that GNU dlltool places the addresses that are updated at runtime within .idata, and GNU ld normally links .idata as writable, while LLD and MS link.exe normally has read-only .idata (and places the addresses that will be updated at runtime by the delay loading mechanism in .data instead).

LLD happens to have a bit of extra code to take read-write .idata sections from GNU import libraries and merge them into the rest of LLD's read-only .idata - which makes normal GNU import libraries work, but unfortunately breaks using it together with the GNU dlltool delayimport libraries.

So with LLD, just use LLD's built-in delay import mechanism, by passing e.g. -delayload:user32.dll when linking. This works when using MSVC style import libraries, but unfortunately not when using GNU style import libraries (import libraries generated by GNU dlltool or GNU ld).

mstorsjo
  • 12,983
  • 2
  • 39
  • 62
  • Exactly. I'm trying to combine MS link.exe with dlltool-generated import libraries, because I want to delay-load the libraries that were not initialially compiled for delay-load. That is, I need to provide new import libraries. I hope to get this done without touching the linker logic, as the import library is naturally a self-containing entity. Could you please kindly point me to where exactly the LLVM linker prepares ".idata" sections to be writable? GNU dlltool simply makes `INIT_SEC_DATA (IDATA5, ".idata$5", SEC_HAS_CONTENTS, 2)`, which does not make it writable. – Dmitry Mikushin Aug 24 '20 at 11:50
  • LLD doesn't make the `.idata` sections writable, it places the corresponding data in `.data` instead. So for your case, you might want to change `.idata$5` into `.data$5`, and the same for `.idata$4` - that might work. – mstorsjo Aug 24 '20 at 12:14
  • But for making MS link.exe delayload a library that is created using GNU ld, the easiest way might be to have GNU ld make a def file when linking (`-Wl,--output-def,mylib.def`), then create an import lib out of this using MS lib.exe (`lib.exe -machine:x64 -def:mylib.def -out:mylib.lib`), then link against this lib with MS link.exe, but passing the `-delayload` option, e.g. `link.exe [other options] mylib.lib -delayload:mylib.dll`. MS link.exe doesn't need a separate import library for delay import cases, as long as it's a MSVC style import library. – mstorsjo Aug 24 '20 at 12:15
  • Chaning .idata$4 and .idata$5 into .data$4 and .data$5 resulted into a new access violation here: ```// Calculate the index for the IAT entry in the import address table // N.B. The INT entries are ordered the same as the IAT entries so // the calculation can be done on the IAT side. // const unsigned iIAT = IndexFromPImgThunkData(PCImgThunkData(ppfnIATEntry), idd.pIAT); const unsigned iINT = iIAT; PCImgThunkData pitd = &(idd.pINT[iINT]); dli.dlp.fImportByName = !IMAGE_SNAP_BY_ORDINAL(pitd->u1.Ordinal);``` – Dmitry Mikushin Aug 24 '20 at 14:38
  • Then try changing the other sections to .data as well; `.text$2` and `.idata$2` - all except for the first `.text` section that actually contains executable code. – mstorsjo Aug 24 '20 at 18:41
  • Same error. After the either of two renamings, the value of `pidd->rvaIAT` changes to something not valid anymore (or unassigned) – Dmitry Mikushin Aug 25 '20 at 12:27
  • Yeah, so I looked into the `objdump -h` of the import library and the final executable. Turns out, the show stopper is the executable, where .idata comes `CONTENTS, ALLOC, LOAD, READONLY, DATA`. While our import library has .idata with `CONTENTS, ALLOC, LOAD, RELOC, DATA`. I guess the read-only attribute is added by the MSVC linker. – Dmitry Mikushin Aug 25 '20 at 15:52
  • Solved by adding: `set_target_properties(${PROJECT_NAME}_test_executable PROPERTIES LINK_FLAGS "/SECTION:.idata,RW")` Thanks, @mstorsjo ! – Dmitry Mikushin Aug 25 '20 at 18:25