CreateRemoteThread() still exists and can be used to create a thread in a different process than the one you are running. Usually, remote threads are used in order to load a DLL inside the target process and ultimately to do some API hooking.
The problem we encounter with CreateRemoteThread() and Windows 7 is that that API does not work across sessions. What this means is that is your program runs as a normal application, it will not be able to create a thread inside a Windows service because they do not run inside the same session.
Running in separate sessions is something that Microsoft has done in order to increase the security of the OS. It's not a bad thing in itself but it can be easily defeated. One way is to use some queued APC which can go across sessions but it's not trivial and I won't be discussing this here.
The solution that is accessible to the more casual programmer is the use of an undocumented API: CreateThreadEx().
Before discussing that function, we need to know about some structures that Windows uses.
typedef struct _CLIENT_ID {
HANDLE UniqueProcess;
HANDLE UniqueThread;
} CLIENT_ID;
typedef CLIENT_ID *PCLIENT_ID;
typedef struct _PEB_LDR_DATA {
ULONG Length;
BOOL Initialized;
PVOID SsHandle;
LIST_ENTRY InLoadOrderModuleList;
LIST_ENTRY InMemoryOrderModuleList;
LIST_ENTRY InInitializationOrderModuleList;
} PEB_LDR_DATA, *PPEB_LDR_DATA;
typedef struct _RTL_USER_PROCESS_PARAMETERS {
ULONG MaximumLength;
ULONG Length;
ULONG Flags;
ULONG DebugFlags;
PVOID ConsoleHandle;
ULONG ConsoleFlags;
HANDLE StdInputHandle;
HANDLE StdOutputHandle;
HANDLE StdErrorHandle;
_UNICODE_STRING CurrentDirectoryPath;
HANDLE CurrentDirectoryHandle;
_UNICODE_STRING DllPath;
_UNICODE_STRING ImagePathName;
_UNICODE_STRING CommandLine;
PVOID Environment;
ULONG StartingPositionLeft;
ULONG StartingPositionTop;
ULONG Width;
ULONG Height;
ULONG CharWidth;
ULONG CharHeight;
ULONG ConsoleTextAttributes;
ULONG WindowFlags;
ULONG ShowWindowFlags;
_UNICODE_STRING WindowTitle;
_UNICODE_STRING DesktopName;
_UNICODE_STRING ShellInfo;
_UNICODE_STRING RuntimeData;
PVOID DLCurrentDirectory;
} RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS;
typedef struct _PEB {
BOOL InheritedAddressSpace;
BOOL ReadImageFileExecOptions;
BOOL BeingDebugged;
BOOL Spare;
HANDLE Mutant;
PVOID ImageBaseAddress;
PPEB_LDR_DATA LoaderData;
PRTL_USER_PROCESS_PARAMETERS ProcessParameters;
PVOID SubSystemData;
PVOID ProcessHeap;
PVOID FastPebLock;
PVOID FastPebLockRoutine;
PVOID FastPebUnlockRoutine;
ULONG EnvironmentUpdateCount;
PVOID KernelCallbackTable;
PVOID EventLogSection;
PVOID EventLog;
PVOID FreeList;
ULONG TlsExpansionCounter;
PVOID TlsBitmap;
ULONG TlsBitmapBits[0x2];
PVOID ReadOnlySharedMemoryBase;
PVOID ReadOnlySharedMemoryHeap;
PVOID ReadOnlyStaticServerData;
PVOID AnsiCodePageData;
PVOID OemCodePageData;
PVOID UnicodeCaseTableData;
ULONG NumberOfProcessors;
ULONG NtGlobalFlag;
BYTE Spare2[0x4];
LARGE_INTEGER CriticalSectionTimeout;
ULONG HeapSegmentReserve;
ULONG HeapSegmentCommit;
ULONG HeapDeCommitTotalFreeThreshold;
ULONG HeapDeCommitFreeBlockThreshold;
ULONG NumberOfHeaps;
ULONG MaximumNumberOfHeaps;
PVOID *ProcessHeaps;
PVOID GdiSharedHandleTable;
PVOID ProcessStarterHelper;
PVOID GdiDCAttributeList;
PVOID LoaderLock;
ULONG OSMajorVersion;
ULONG OSMinorVersion;
ULONG OSBuildNumber;
ULONG OSPlatformId;
ULONG ImageSubSystem;
ULONG ImageSubSystemMajorVersion;
ULONG ImageSubSystemMinorVersion;
ULONG GdiHandleBuffer[0x22];
ULONG PostProcessInitRoutine;
ULONG TlsExpansionBitmap;
BYTE TlsExpansionBitmapBits[0x80];
ULONG SessionId;
} PEB, *PPEB;
typedef struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME
{
PVOID Previous;
PVOID * ActivationContext;
ULONG Flags;
} RTL_ACTIVATION_CONTEXT_STACK_FRAME, *PRTL_ACTIVATION_CONTEXT_STACK_FRAME;
typedef struct _ACTIVATION_CONTEXT_STACK
{
PRTL_ACTIVATION_CONTEXT_STACK_FRAME ActiveFrame;
LIST_ENTRY FrameListCache;
ULONG Flags;
ULONG NextCookieSequenceNumber;
ULONG StackId;
} ACTIVATION_CONTEXT_STACK, *PACTIVATION_CONTEXT_STACK;
typedef struct _GDI_TEB_BATCH
{
ULONG Offset;
ULONG HDC;
ULONG Buffer[310];
} GDI_TEB_BATCH, *PGDI_TEB_BATCH;
typedef struct _TEB
{
NT_TIB NtTib;
PVOID EnvironmentPointer;
CLIENT_ID ClientId;
PVOID ActiveRpcHandle;
PVOID ThreadLocalStoragePointer;
PPEB ProcessEnvironmentBlock;
ULONG LastErrorValue;
ULONG CountOfOwnedCriticalSections;
PVOID CsrClientThread;
PVOID Win32ThreadInfo;
ULONG User32Reserved[26];
ULONG UserReserved[5];
PVOID WOW32Reserved;
ULONG CurrentLocale;
ULONG FpSoftwareStatusRegister;
VOID * SystemReserved1[54];
LONG ExceptionCode;
PACTIVATION_CONTEXT_STACK ActivationContextStackPointer;
UCHAR SpareBytes1[36];
ULONG TxFsContext;
GDI_TEB_BATCH GdiTebBatch;
CLIENT_ID RealClientId;
PVOID GdiCachedProcessHandle;
ULONG GdiClientPID;
ULONG GdiClientTID;
PVOID GdiThreadLocalInfo;
ULONG Win32ClientInfo[62];
VOID * glDispatchTable[233];
ULONG glReserved1[29];
PVOID glReserved2;
PVOID glSectionInfo;
PVOID glSection;
PVOID glTable;
PVOID glCurrentRC;
PVOID glContext;
ULONG LastStatusValue;
_UNICODE_STRING StaticUnicodeString;
WCHAR StaticUnicodeBuffer[261];
PVOID DeallocationStack;
VOID * TlsSlots[64];
LIST_ENTRY TlsLinks;
PVOID Vdm;
PVOID ReservedForNtRpc;
VOID * DbgSsReserved[2];
ULONG HardErrorMode;
VOID * Instrumentation[9];
GUID ActivityId;
PVOID SubProcessTag;
PVOID EtwLocalData;
PVOID EtwTraceData;
PVOID WinSockData;
ULONG GdiBatchCount;
UCHAR SpareBool0;
UCHAR SpareBool1;
UCHAR SpareBool2;
UCHAR IdealProcessor;
ULONG GuaranteedStackBytes;
PVOID ReservedForPerf;
PVOID ReservedForOle;
ULONG WaitingOnLoaderLock;
PVOID SavedPriorityState;
ULONG SoftPatchPtr1;
PVOID ThreadPoolData;
VOID * * TlsExpansionSlots;
ULONG ImpersonationLocale;
ULONG IsImpersonating;
PVOID NlsCache;
PVOID pShimData;
ULONG HeapVirtualAffinity;
PVOID CurrentTransactionHandle;
PVOID ActiveFrame;
PVOID FlsData;
PVOID PreferredLanguages;
PVOID UserPrefLanguages;
PVOID MergedPrefLanguages;
ULONG MuiImpersonation;
WORD CrossTebFlags;
ULONG SpareCrossTebBits: 16;
WORD SameTebFlags;
ULONG DbgSafeThunkCall: 1;
ULONG DbgInDebugPrint: 1;
ULONG DbgHasFiberData: 1;
ULONG DbgSkipThreadAttach: 1;
ULONG DbgWerInShipAssertCode: 1;
ULONG DbgRanProcessInit: 1;
ULONG DbgClonedThread: 1;
ULONG DbgSuppressDebugMsg: 1;
ULONG SpareSameTebBits: 8;
PVOID TxnScopeEnterCallback;
PVOID TxnScopeExitCallback;
PVOID TxnScopeContext;
ULONG LockCount;
ULONG ProcessRundown;
UINT64 LastSwitchTime;
UINT64 TotalSwitchOutTime;
LARGE_INTEGER WaitReasonBitMap;
} TEB, *PTEB;
While you don't need to know all of those, they are useful if you want to get some information about the process you are inserting a thread into.
Now I will discuss the NtCreateThreadEx API.
The prototype is as follows:
typedef DWORD (WINAPI *LPFUNLPFUN_NtCreateThreadEx)
(
OUT PHANDLE hThread,
IN ACCESS_MASK DesiredAccess,
IN LPVOID ObjectAttributes,
IN HANDLE ProcessHandle,
IN LPTHREAD_START_ROUTINE lpStartAddress,
IN LPVOID lpParameter,
IN BOOL CreateSuspended,
IN ULONG StackZeroBits,
IN ULONG SizeOfStackCommit,
IN ULONG SizeOfStackReserve,
OUT LPVOID lpBytesBuffer
);
As we can see, it looks more like a function you'd call inside a driver than in user mode. But, not to worry because you may call it from user mode.
The last parameter is an undocumented structure that looks like this:
typedef struct NtCreateThreadExBuffer
{
ULONG Size;
ULONG Unknown1;
ULONG Unknown2;
PCLIENT_ID ClientId;
ULONG Unknown4;
ULONG Unknown5;
ULONG Unknown6;
PTEB* TEB;
ULONG Unknown8;
} NTCREATETHREADEX_STRUCT, *LPNTCREATETHREADEX_STRUCT;
This explains why I listed more structures at the begining.
The ClientId and TEB pointers will be filled upon return of the call. The TEB is obviously the more important but most will simply ignore that data.
Now that we know what the API looks like and what structures it uses we can start describing how to use it.
First, let's get the pointer to the function like this:
// Get the Windows version before trying to get the address of
// NtCreateThreadEx.
//
dwMajorVersion = (DWORD)(LOBYTE(LOWORD(dwVersion)));
dwMinorVersion = (DWORD)(HIBYTE(LOWORD(dwVersion)));
if (dwMajorVersion == 6 && dwMinorVersion >= 1)
{
NtCreateThreadEx = (LPFUN_NtCreateThreadEx) GetProcAddress(modNtDll, "NtCreateThreadEx");
if( !NtCreateThreadEx )
{
// Failed to get funtion address from ntdll.dll
return FALSE;
}
else
printf("Located NtCreateThreadEx\r\n");
}
If everything works fine, by now we have the pointer we need.
Here is the actual code to create the thread we want:
//setup and initialize the buffer
NTCREATETHREADEX_STRUCT ntbuffer;
memset (&ntbuffer,0,sizeof(NTCREATETHREADEX_STRUCT));
CLIENT_ID cid = {0};
PTEB pTEB = NULL;
ntbuffer.Size = sizeof(NTCREATETHREADEX_STRUCT);
ntbuffer.Unknown1 = 0x10003;
ntbuffer.Unknown2 = 0x8;
ntbuffer.ClientId = &cid; // ClientId
ntbuffer.Unknown4 = 0;
ntbuffer.Unknown5 = 0x10004;
ntbuffer.Unknown6 = 4;
ntbuffer.TEB = &pTEB; // PEB
ntbuffer.Unknown8 = 0;
HANDLE hRemoteThread = NULL;
DWORD error = NtCreateThreadEx(&hRemoteThread,
0x1FFFFF,
NULL,
hProc,
(LPTHREAD_START_ROUTINE) pfnRoutine,
hRemoteMem,
FALSE,
NULL,
NULL,
NULL,
&ntbuffer);
Here's a succinct description of the parameters used in the call:
- hRemoteThread is a HANDLE that will be used for the newly created thread
- 0x1FFFFF is the default access mask (should use proper values)
- The object attributes can be NULL
- hProc is the handle of the process you want to inject your thread into
- pfnRoutine is the address of the thread
- hRemoteMem is the memory allocated for the parameters of the thread (LPVOID)
- everything until ntBuffer can be ignored
- ntBuffer is the pointer to the NTCREATETHREADEX_STRUCT