From bccf933c0a43ce11fd5dcc34e5a945937bb01072 Mon Sep 17 00:00:00 2001 From: Jason Hood Date: Mon, 27 Jan 2014 14:03:53 +1000 Subject: [PATCH] Always find the base address of kernel32.dll. --- ANSI.c | 18 ++++- ansicon.c | 52 +++++++------- ansicon.h | 2 + injdll32.c | 208 ++++++++++++++++++++++++++--------------------------- injdll64.c | 161 +++++++++++++++++++++++++++++++++++++++-- 5 files changed, 298 insertions(+), 143 deletions(-) diff --git a/ANSI.c b/ANSI.c index 6c3c4f6..0d89f66 100644 --- a/ANSI.c +++ b/ANSI.c @@ -111,9 +111,10 @@ v.166, 20 & 21 September, 2013: fix 32-bit process trying to detect 64-bit process. - v1.67, 25 January, 2014: + v1.67, 25 to 27 January, 2014: don't hook ourself from LoadLibrary or LoadLibraryEx; - update the LoadLibraryEx flags that should not cause hooking. + update the LoadLibraryEx flags that should not cause hooking; + always find the base address of kernel32.dll. */ #include "ansicon.h" @@ -252,8 +253,9 @@ SHARED DWORD s_flag; #define GRM_INIT 1 #define GRM_EXIT 2 +SHARED DWORD LLW32r; #ifdef _WIN64 -SHARED DWORD LLW32; +SHARED DWORD LLW64r; #endif @@ -1842,6 +1844,16 @@ BOOL WINAPI DllMain( HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved ) hDllInstance = hInstance; // save Dll instance handle DEBUGSTR( 1, L"hDllInstance = %p", hDllInstance ); + if (LLW32r == 0) + { + if (!get_LLW32r()) + return FALSE; +#ifdef _WIN64 + if (!get_LLW64r()) + return FALSE; +#endif + } + // Get the entry points to the original functions. hKernel = GetModuleHandleA( APIKernel ); for (hook = Hooks; hook->name; ++hook) diff --git a/ansicon.c b/ansicon.c index ae28440..fd58f18 100644 --- a/ansicon.c +++ b/ansicon.c @@ -78,7 +78,7 @@ don't write the reset sequence if output is redirected. */ -#define PDATE L"25 January, 2014" +#define PDATE L"27 January, 2014" #include "ansicon.h" #include "version.h" @@ -111,10 +111,11 @@ BOOL GetParentProcessInfo( LPPROCESS_INFORMATION ppi, LPTSTR ); // The DLL shares this variable, so injection requires it here. +DWORD LLW32r; #ifdef _WIN64 -DWORD LLW32; -extern LPVOID base; +DWORD LLW64r; #endif +extern LPVOID kernel32_base; // Find the name of the DLL and inject it. @@ -137,11 +138,18 @@ BOOL Inject( LPPROCESS_INFORMATION ppi, BOOL* gui, LPCTSTR app ) #ifdef _WIN64 wsprintf( dll + len, L"ANSI%d.dll", type ); if (type == 32) + { + get_LLW32r(); InjectDLL32( ppi, dll ); + } else + { + get_LLW64r(); InjectDLL64( ppi, dll ); + } #else wcscpy( dll + len, L"ANSI32.dll" ); + get_LLW32r(); InjectDLL32( ppi, dll ); #endif return TRUE; @@ -276,37 +284,31 @@ int main( void ) } else if (GetParentProcessInfo( &pi, arg )) { + HANDLE hSnap; + MODULEENTRY32 me; + BOOL fOk; + pi.hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pi.dwProcessId); pi.hThread = OpenThread( THREAD_ALL_ACCESS, FALSE, pi.dwThreadId ); SuspendThread( pi.hThread ); -#ifdef _WIN64 - // Find the base address of kernel32.dll if the 64-bit version is - // injecting into a 32-bit parent. - if (IsWow64Process( pi.hProcess, &gui ) && gui) + // Find the base address of kernel32.dll. + hSnap = CreateToolhelp32Snapshot( TH32CS_SNAPMODULE | + TH32CS_SNAPMODULE32, + pi.dwProcessId ); + if (hSnap != INVALID_HANDLE_VALUE) { - HANDLE hSnap; - MODULEENTRY32 me; - BOOL fOk; - - hSnap = CreateToolhelp32Snapshot( TH32CS_SNAPMODULE | - TH32CS_SNAPMODULE32, - pi.dwProcessId ); - if (hSnap != INVALID_HANDLE_VALUE) + me.dwSize = sizeof(MODULEENTRY32); + for (fOk = Module32First( hSnap, &me ); fOk; + fOk = Module32Next( hSnap, &me )) { - me.dwSize = sizeof(MODULEENTRY32); - for (fOk = Module32First( hSnap, &me ); fOk; - fOk = Module32Next( hSnap, &me )) + if (_wcsicmp( me.szModule, L"kernel32.dll" ) == 0) { - if (_wcsicmp( me.szModule, L"kernel32.dll" ) == 0) - { - base = me.modBaseAddr; - break; - } + kernel32_base = me.modBaseAddr; + break; } - CloseHandle( hSnap ); } + CloseHandle( hSnap ); } -#endif if (!Inject( &pi, &gui, arg )) rc = 1; ResumeThread( pi.hThread ); diff --git a/ansicon.h b/ansicon.h index cba8c9d..5a9a5b5 100644 --- a/ansicon.h +++ b/ansicon.h @@ -47,6 +47,8 @@ typedef struct int ProcessType( LPPROCESS_INFORMATION, BOOL* ); void InjectDLL32( LPPROCESS_INFORMATION, LPCTSTR ); void InjectDLL64( LPPROCESS_INFORMATION, LPCTSTR ); +BOOL get_LLW32r( void ); +BOOL get_LLW64r( void ); extern TCHAR prog_path[MAX_PATH]; extern LPTSTR prog; diff --git a/injdll32.c b/injdll32.c index 06e98d4..038702c 100644 --- a/injdll32.c +++ b/injdll32.c @@ -31,13 +31,14 @@ TWow64SetThreadContext Wow64SetThreadContext; #define CONTEXT_CONTROL WOW64_CONTEXT_CONTROL #define GetThreadContext Wow64GetThreadContext #define SetThreadContext Wow64SetThreadContext +#endif + +extern DWORD LLW32r; +LPVOID kernel32_base; +PIMAGE_DOS_HEADER pDosHeader; #define MakeVA( cast, offset ) (cast)((DWORD_PTR)pDosHeader + (DWORD)(offset)) -extern DWORD LLW32; -LPVOID base; -static PIMAGE_DOS_HEADER pDosHeader; - int export_cmp( const void* a, const void* b ) { return strcmp( (LPCSTR)a, MakeVA( LPCSTR, *(const PDWORD)b ) ); @@ -45,12 +46,9 @@ int export_cmp( const void* a, const void* b ) /* - Get the relative address of the 32-bit LoadLibraryW function from 64-bit code. - This was originally done via executing a helper program (ANSI-LLW.exe), but I - never liked doing that, so now I do it the "hard" way - load the 32-bit - kernel32.dll directly and search the exports. + Get the relative address of LoadLibraryW direct from kernel32.dll. */ -BOOL get_LLW32( void ) +BOOL get_LLW32r( void ) { HMODULE kernel32; TCHAR buf[MAX_PATH]; @@ -61,7 +59,11 @@ BOOL get_LLW32( void ) PWORD ord_table; PDWORD pLLW; +#ifdef _WIN64 len = GetSystemWow64Directory( buf, MAX_PATH ); +#else + len = GetSystemDirectory( buf, MAX_PATH ); +#endif wcscpy( buf + len, L"\\kernel32.dll" ); kernel32 = LoadLibraryEx( buf, NULL, LOAD_LIBRARY_AS_IMAGE_RESOURCE ); if (kernel32 == NULL) @@ -89,24 +91,24 @@ BOOL get_LLW32( void ) FreeLibrary( kernel32 ); return FALSE; } - LLW32 = fun_table[ord_table[pLLW - name_table]]; + LLW32r = fun_table[ord_table[pLLW - name_table]]; FreeLibrary( kernel32 ); return TRUE; } -#else -DWORD LLW32; -#endif void InjectDLL32( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) { CONTEXT context; DWORD ep; - DWORD len; + BOOL eip; LPVOID mem; DWORD mem32; DWORD pr; + DWORD LLW; + + DWORD len; #define CODESIZE 20 BYTE code[CODESIZE+TSIZE(MAX_PATH)]; union @@ -114,9 +116,23 @@ void InjectDLL32( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) PBYTE pB; PDWORD pL; } ip; -#ifdef _WIN64 - BOOL entry = FALSE; -#endif + + struct unicode_string + { + USHORT Length; + USHORT MaximumLength; + DWORD Buffer; + }; + struct ldr_module // incomplete definition + { + DWORD next, prev; + DWORD baseAddress; + DWORD entryPoint; + DWORD sizeOfImage; + struct unicode_string fullDllName; + struct unicode_string baseDllName; + } ldr; + WCHAR basename[MAX_PATH]; #ifdef IMPORT_WOW64 if (Wow64GetThreadContext == 0) @@ -137,7 +153,6 @@ void InjectDLL32( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) len = TSIZE(lstrlen( dll ) + 1); if (len > TSIZE(MAX_PATH)) return; - CopyMemory( code + CODESIZE, dll, len ); len += CODESIZE; @@ -149,91 +164,68 @@ void InjectDLL32( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) ip.pB = code; - ep = context.Eip; - if (LLW32 == 0) + // Determine the base address of kernel32.dll. If injecting into the parent + // process, the base has already been determined. Otherwise, use the PEB to + // walk the loaded modules. + if (kernel32_base != 0) { -#ifndef _WIN64 - LLW32 = (DWORD)GetProcAddress( GetModuleHandle( L"kernel32.dll" ), - "LoadLibraryW" ); -#else - struct unicode_string - { - USHORT Length; - USHORT MaximumLength; - DWORD Buffer; - }; - struct ldr_module // incomplete definition - { - DWORD next, prev; - DWORD baseAddress; - DWORD entryPoint; - DWORD sizeOfImage; - struct unicode_string fullDllName; - struct unicode_string baseDllName; - } ldr; - WCHAR basename[MAX_PATH]; - - if (!get_LLW32()) - return; - // Determine the base address of the 32-bit kernel32.dll. If injecting - // into the parent process, base has already been determined. Otherwise, - // use the PEB to walk the loaded modules. - if (base == 0) - { - // When a process is created suspended, EAX has the entry point and EBX - // points to the PEB. - if (!ReadProcessMemory( ppi->hProcess, UIntToPtr( context.Ebx + 0x0C ), - ip.pL, 4, NULL )) - { - DEBUGSTR( 1, L"Failed to read Ldr from PEB." ); - return; - } - // In case we're a bit slow (which seems to be unlikely), set up an - // infinite loop as the entry point. - WriteProcessMemory( ppi->hProcess, mem, "\xEB\xFE", 2, NULL ); - FlushInstructionCache( ppi->hProcess, mem, 2 ); - ep = context.Eax; - context.Eax = mem32; - SetThreadContext( ppi->hThread, &context ); - VirtualProtectEx( ppi->hProcess, mem, len, PAGE_EXECUTE, &pr ); - // Now resume the thread, as the PEB hasn't even been created yet. - ResumeThread( ppi->hThread ); - while (*ip.pL == 0) - { - Sleep( 0 ); - ReadProcessMemory( ppi->hProcess, UIntToPtr( context.Ebx + 0x0C ), - ip.pL, 4, NULL ); - } - // Read PEB_LDR_DATA.InInitializationOrderModuleList.Flink. - ReadProcessMemory( ppi->hProcess, UIntToPtr( *ip.pL + 0x1c ), - &ip.pL[1], 4, NULL ); - // Sometimes we're so quick ntdll.dll is the only one present, so keep - // looping until kernel32.dll shows up. - for (;;) - { - ldr.next = ip.pL[1]; - do - { - ReadProcessMemory( ppi->hProcess, UIntToPtr( ldr.next ), - &ldr, sizeof(ldr), NULL ); - ReadProcessMemory( ppi->hProcess, UIntToPtr( ldr.baseDllName.Buffer ), - basename, ldr.baseDllName.MaximumLength, NULL ); - if (_wcsicmp( basename, L"kernel32.dll" ) == 0) - { - LLW32 += ldr.baseAddress; - goto gotit; - } - } while (ldr.next != *ip.pL + 0x1c); - } - gotit: - SuspendThread( ppi->hThread ); - VirtualProtectEx( ppi->hProcess, mem, len, pr, &pr ); - entry = TRUE; - } - else - LLW32 += PtrToUint( base ); -#endif + ep = context.Eip; + eip = TRUE; } + else + { + // When a process is created suspended, EAX has the entry point and EBX + // points to the PEB. + if (!ReadProcessMemory( ppi->hProcess, UIntToPtr( context.Ebx + 0x0C ), + ip.pL, 4, NULL )) + { + DEBUGSTR( 1, L"Failed to read Ldr from PEB." ); + return; + } + ep = context.Eax; + eip = FALSE; + // In case we're a bit slow (which seems to be unlikely), set up an + // infinite loop as the entry point. + WriteProcessMemory( ppi->hProcess, mem, "\xEB\xFE", 2, NULL ); + FlushInstructionCache( ppi->hProcess, mem, 2 ); + context.Eax = mem32; + SetThreadContext( ppi->hThread, &context ); + VirtualProtectEx( ppi->hProcess, mem, len, PAGE_EXECUTE, &pr ); + // Now resume the thread, as the PEB hasn't even been created yet. + ResumeThread( ppi->hThread ); + while (*ip.pL == 0) + { + Sleep( 0 ); + ReadProcessMemory( ppi->hProcess, UIntToPtr( context.Ebx + 0x0C ), + ip.pL, 4, NULL ); + } + // Read PEB_LDR_DATA.InInitializationOrderModuleList.Flink. + ReadProcessMemory( ppi->hProcess, UIntToPtr( *ip.pL + 0x1c ), + &ip.pL[1], 4, NULL ); + // Sometimes we're so quick ntdll.dll is the only one present, so keep + // looping until kernel32.dll shows up. + for (;;) + { + ldr.next = ip.pL[1]; + do + { + ReadProcessMemory( ppi->hProcess, UIntToPtr( ldr.next ), + &ldr, sizeof(ldr), NULL ); + ReadProcessMemory( ppi->hProcess, UIntToPtr( ldr.baseDllName.Buffer ), + basename, ldr.baseDllName.MaximumLength, NULL ); + if (_wcsicmp( basename, L"kernel32.dll" ) == 0) + { + kernel32_base = UIntToPtr( ldr.baseAddress ); + goto gotit; + } + } while (ldr.next != *ip.pL + 0x1c); + } + gotit: + SuspendThread( ppi->hThread ); + VirtualProtectEx( ppi->hProcess, mem, len, pr, &pr ); + } + LLW = PtrToUint( kernel32_base ) + LLW32r; + kernel32_base = 0; *ip.pB++ = 0x68; // push ep *ip.pL++ = ep; @@ -242,7 +234,7 @@ void InjectDLL32( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) *ip.pB++ = 0x68; // push L"path\to\ANSI32.dll" *ip.pL++ = mem32 + CODESIZE; *ip.pB++ = 0xe8; // call LoadLibraryW - *ip.pL++ = LLW32 - (mem32 + (DWORD)(ip.pB+4 - code)); + *ip.pL++ = LLW - (mem32 + (DWORD)(ip.pB+4 - code)); *ip.pB++ = 0x61; // popa *ip.pB++ = 0x9d; // popf *ip.pB++ = 0xc3; // ret @@ -250,10 +242,10 @@ void InjectDLL32( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) WriteProcessMemory( ppi->hProcess, mem, code, len, NULL ); FlushInstructionCache( ppi->hProcess, mem, len ); VirtualProtectEx( ppi->hProcess, mem, len, PAGE_EXECUTE, &pr ); -#ifdef _WIN64 - if (entry) - return; -#endif - context.Eip = mem32; - SetThreadContext( ppi->hThread, &context ); + + if (eip) + { + context.Eip = mem32; + SetThreadContext( ppi->hThread, &context ); + } } diff --git a/injdll64.c b/injdll64.c index c46bd46..933da5d 100644 --- a/injdll64.c +++ b/injdll64.c @@ -17,17 +17,97 @@ #include "ansicon.h" +extern DWORD LLW64r; +extern LPVOID kernel32_base; +extern PIMAGE_DOS_HEADER pDosHeader; + +#define MakeVA( cast, offset ) (cast)((DWORD_PTR)pDosHeader + (DWORD)(offset)) + +extern int export_cmp( const void* a, const void* b ); + + +/* + Get the relative address of LoadLibraryW direct from kernel32.dll. +*/ +BOOL get_LLW64r( void ) +{ + HMODULE kernel32; + TCHAR buf[MAX_PATH]; + UINT len; + PIMAGE_NT_HEADERS pNTHeader; + PIMAGE_EXPORT_DIRECTORY pExportDir; + PDWORD fun_table, name_table; + PWORD ord_table; + PDWORD pLLW; + + len = GetSystemDirectory( buf, MAX_PATH ); + wcscpy( buf + len, L"\\kernel32.dll" ); + kernel32 = LoadLibraryEx( buf, NULL, LOAD_LIBRARY_AS_IMAGE_RESOURCE ); + if (kernel32 == NULL) + { + DEBUGSTR( 1, L"Unable to load 64-bit kernel32.dll!" ); + return FALSE; + } + // The handle uses low bits as flags, so strip 'em off. + pDosHeader = (PIMAGE_DOS_HEADER)((DWORD_PTR)kernel32 & ~0xFFFF); + pNTHeader = MakeVA( PIMAGE_NT_HEADERS, pDosHeader->e_lfanew ); + pExportDir = MakeVA( PIMAGE_EXPORT_DIRECTORY, + pNTHeader->OptionalHeader. + DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT]. + VirtualAddress ); + + fun_table = MakeVA( PDWORD, pExportDir->AddressOfFunctions ); + name_table = MakeVA( PDWORD, pExportDir->AddressOfNames ); + ord_table = MakeVA( PWORD, pExportDir->AddressOfNameOrdinals ); + + pLLW = bsearch( "LoadLibraryW", name_table, pExportDir->NumberOfNames, + sizeof(DWORD), export_cmp ); + if (pLLW == NULL) + { + DEBUGSTR( 1, L"Could not find LoadLibraryW!" ); + FreeLibrary( kernel32 ); + return FALSE; + } + LLW64r = fun_table[ord_table[pLLW - name_table]]; + + FreeLibrary( kernel32 ); + return TRUE; +} + + void InjectDLL64( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) { CONTEXT context; - DWORD len; + DWORD64 ep; + BOOL rip; LPVOID mem; + DWORD pr; DWORD64 LLW; + union { PBYTE pB; PDWORD64 pL; } ip; + + struct unicode_string + { + USHORT Length; + USHORT MaximumLength; + DWORD64 Buffer; + }; + struct ldr_module // incomplete definition + { + DWORD64 next, prev; + DWORD64 baseAddress; + DWORD64 entryPoint; + DWORD64 sizeOfImage; + struct unicode_string fullDllName; + struct unicode_string baseDllName; + } ldr; + WCHAR basename[MAX_PATH]; + + DWORD len; #define CODESIZE 92 static BYTE code[CODESIZE+TSIZE(MAX_PATH)] = { 0,0,0,0,0,0,0,0, // original rip @@ -78,19 +158,86 @@ void InjectDLL64( LPPROCESS_INFORMATION ppi, LPCTSTR dll ) CopyMemory( code + CODESIZE, dll, len ); len += CODESIZE; - context.ContextFlags = CONTEXT_CONTROL; + context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; GetThreadContext( ppi->hThread, &context ); mem = VirtualAllocEx( ppi->hProcess, NULL, len, MEM_COMMIT, - PAGE_EXECUTE_READWRITE ); - LLW = (DWORD64)LoadLibraryW; + PAGE_READWRITE ); ip.pB = code; - *ip.pL++ = context.Rip; + // Determine the base address of kernel32.dll. If injecting into the parent + // process, the base has already been determined. Otherwise, use the PEB to + // walk the loaded modules. + if (kernel32_base != 0) + { + ep = context.Rip; + rip = TRUE; + } + else + { + // When a process is created suspended, RCX has the entry point and RDX + // points to the PEB. + if (!ReadProcessMemory( ppi->hProcess, (LPVOID)(context.Rdx + 0x18), + ip.pL, 8, NULL )) + { + DEBUGSTR( 1, L"Failed to read Ldr from PEB." ); + return; + } + ep = context.Rcx; + rip = FALSE; + // In case we're a bit slow (which seems to be unlikely), set up an + // infinite loop as the entry point. + WriteProcessMemory( ppi->hProcess, (PBYTE)mem + 16, "\xEB\xFE", 2, NULL ); + FlushInstructionCache( ppi->hProcess, (PBYTE)mem + 16, 2 ); + context.Rcx = (DWORD64)mem + 16; + SetThreadContext( ppi->hThread, &context ); + VirtualProtectEx( ppi->hProcess, mem, len, PAGE_EXECUTE, &pr ); + // Now resume the thread, as the PEB hasn't even been created yet. + ResumeThread( ppi->hThread ); + while (*ip.pL == 0) + { + Sleep( 0 ); + ReadProcessMemory( ppi->hProcess, (LPVOID)(context.Rdx + 0x18), + ip.pL, 8, NULL ); + } + // Read PEB_LDR_DATA.InInitializationOrderModuleList.Flink. + ReadProcessMemory( ppi->hProcess, (LPVOID)(*ip.pL + 0x30), + &ip.pL[1], 8, NULL ); + // Sometimes we're so quick ntdll.dll is the only one present, so keep + // looping until kernel32.dll shows up. + for (;;) + { + ldr.next = ip.pL[1]; + do + { + ReadProcessMemory( ppi->hProcess, (LPVOID)ldr.next, + &ldr, sizeof(ldr), NULL ); + ReadProcessMemory( ppi->hProcess, (LPVOID)ldr.baseDllName.Buffer, + basename, ldr.baseDllName.MaximumLength, NULL ); + if (_wcsicmp( basename, L"kernel32.dll" ) == 0) + { + kernel32_base = (LPVOID)ldr.baseAddress; + goto gotit; + } + } while (ldr.next != *ip.pL + 0x30); + } + gotit: + SuspendThread( ppi->hThread ); + VirtualProtectEx( ppi->hProcess, mem, len, pr, &pr ); + } + LLW = (DWORD64)kernel32_base + LLW64r; + kernel32_base = 0; + + *ip.pL++ = ep; *ip.pL++ = LLW; WriteProcessMemory( ppi->hProcess, mem, code, len, NULL ); FlushInstructionCache( ppi->hProcess, mem, len ); - context.Rip = (DWORD64)mem + 16; - SetThreadContext( ppi->hThread, &context ); + VirtualProtectEx( ppi->hProcess, mem, len, PAGE_EXECUTE, &pr ); + + if (rip) + { + context.Rip = (DWORD64)mem + 16; + SetThreadContext( ppi->hThread, &context ); + } }