when thread created inside wow64 process it anyway begin execute in 64bit mode - from LdrInitializeThunk
inside 64-bit ntdll.dll
.
inside this, when process is wow64, the Run64IfContextIs64
function is called - system try determinate - are thread must execute in native or wow64 context. this is done by checking tread start address - if it is within a native dll (currently this is ntdll.dll
, wow64.dll
, wow64win.dll
, wow64cpu.dll
) - system run it in native (64 bit mode). call graph is

and in debugger we can see next debug print:
<pid>:<tid> Found inside *.dll
<pid>:<tid> InitialPC <rip> is within a native dll. Running native context unchanged.
special case is InitialPC was inside 64-bit kernel32.dll or 64-bit user32.dll (look for DllsToRemap
) - inside Run64IfContextIs64
called MapContextAddress64TO32
system check address inside native dll. in some case (say for DebugBreak
) system try redirect this to 32bit kernel32.dll DebugBreak
and execute in wow64 mode, but.. here bug - crashed. in most case - system redirect address from 64-bit kernel32.dll
(or user32.dll
) to Wow64pBreakPoint
(not exported function) inside wow64.dll
. this function executed in native (64-bit mode) - call int 3
(breakpoint) if debugger attached. after this simply terminate thread. in debug output we can view
<pid>:<tid> InitialPC <rip> found in the space reserved for 64-bit kernel32.dll
this is however exactly your case - you pass address of LoadLibraryA
inside 64-bit kernel32.dll
. which of course is error.
otherwise (if PC not inside 64-bit module) - system execute this thread in wow64 context, what we and need here

<pid>:<tid> ThunkStartupContent64TO32: Original InitialPC <rip>, StartupAddress <eip>, Arg1 <pv>
<pid>:<tid> ThunkStartupContext64TO32: Thunking RTL user thread start
after BTCpuSimulate
thread go to the LdrInitializeThunk
from 32-bit ntdll.dll and execute as usual in wow64 context.
so we need instead address of LoadLibraryA
inside 64-bit kernel32.dll
got address of LoadLibraryW
inside 32-bit kernel32.dll
. however this task is not simply if try use only documented win32 api. i use for this ntdll api (and some of this not documented). however as is:
PVOID getRVA(PVOID Base, ULONG_PTR BaseAddress, PCSTR Name)
{
if (PIMAGE_NT_HEADERS32 pinth = (PIMAGE_NT_HEADERS32)RtlImageNtHeader(Base))
{
BaseAddress -= pinth->OptionalHeader.AddressOfEntryPoint;
DWORD Size, exportRVA;
if (PIMAGE_EXPORT_DIRECTORY pied = (PIMAGE_EXPORT_DIRECTORY)
RtlImageDirectoryEntryToData(Base, TRUE, IMAGE_DIRECTORY_ENTRY_EXPORT, &Size))
{
exportRVA = RtlPointerToOffset(Base, pied);
DWORD NumberOfFunctions = pied->NumberOfFunctions;
DWORD NumberOfNames = pied->NumberOfNames;
if (0 < NumberOfNames && NumberOfNames <= NumberOfFunctions)
{
PDWORD AddressOfFunctions = (PDWORD)RtlOffsetToPointer(Base, pied->AddressOfFunctions);
PDWORD AddressOfNames = (PDWORD)RtlOffsetToPointer(Base, pied->AddressOfNames);
PWORD AddressOfNameOrdinals = (PWORD)RtlOffsetToPointer(Base, pied->AddressOfNameOrdinals);
DWORD a = 0, b = NumberOfNames, o;
do
{
o = (a + b) >> 1;
int i = strcmp(RtlOffsetToPointer(Base, AddressOfNames[o]), Name);
if (!i)
{
DWORD Rva = AddressOfFunctions[AddressOfNameOrdinals[o]];
return (ULONG_PTR)Rva - (ULONG_PTR)exportRVA < Size ? 0 : RtlOffsetToPointer(BaseAddress, Rva);
}
0 > i ? a = o + 1 : b = o;
} while (a < b);
}
}
}
return 0;
}
PVOID GetWowLoadLibraryW()
{
PVOID pv = 0;
STATIC_OBJECT_ATTRIBUTES(oa, "\\KnownDlls32\\kernel32.dll");
HANDLE hSection;
if (0 <= ZwOpenSection(&hSection, SECTION_QUERY|SECTION_MAP_READ, &oa))
{
SECTION_IMAGE_INFORMATION sii;
if (0 <= ZwQuerySection(hSection, SectionImageInformation, &sii, sizeof(sii), 0))
{
PVOID BaseAddress = 0;
SIZE_T ViewSize = 0;
if (0 <= ZwMapViewOfSection(hSection, NtCurrentProcess(), &BaseAddress, 0, 0, 0, &ViewSize, ViewUnmap, 0, PAGE_READONLY))
{
__try {
pv = getRVA(BaseAddress, (ULONG_PTR)sii.TransferAddress, "LoadLibraryW");
} __except( EXCEPTION_EXECUTE_HANDLER) {
}
ZwUnmapViewOfSection(NtCurrentProcess(), BaseAddress);
}
}
NtClose(hSection);
}
return pv;
}
after this task became trivial:
if (PVOID wowLoadLibraryW = GetWowLoadLibraryW())
{
//PCWSTR szLibPath = ...
SIZE_T s = (wcslen(szLibPath) + 1) * sizeof(WCHAR);
if (HANDLE hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, <pid>))
{
if (PVOID pv = VirtualAllocEx(hProcess, 0, s, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE))
{
if (WriteProcessMemory(hProcess, pv, szLibPath, s, 0))
{
if (HANDLE hThread = CreateRemoteThread(hProcess, 0, 0, (PTHREAD_START_ROUTINE)wowLoadLibraryW, pv, 0, 0))
{
CloseHandle(hThread);
}
}
}
CloseHandle(hProcess);
}
}