vgpu_unlock/vgpu_unlock
Jonathan Johansson c63cc3bb76 Suppress some "Assertion Failed".
Some ioctl calls result in "Assertion Failed" although everything seems to work
anyway. So we suppress these so that the error messages that do pop up are
actually significant.

Also add support for sending syslog messages from the vgpu_unlock python script.
This simplifies debugging a lot, and allows us to log the operation type values
for each ioctl call that fails.
2021-04-17 17:37:07 +02:00

388 lines
16 KiB
Python
Executable file

#!/bin/python3
#
# vGPU unlock script for consumer GPUs.
#
# Copyright 2021 Jonathan Johansson
# This file is part of the "vgpu_unlock" project, and is distributed under the MIT License.
# See the LICENSE file for more details.
#
# Contributions from Krutav Shah and the vGPU Unlocking community included :)
#
import errno
import frida
import os
import queue
import subprocess
import sys
import time
script_source = r"""
var syslog_func = new NativeFunction(Module.getExportByName(null, "syslog"),
"void",
["int", "pointer", "...", "pointer"]);
var syslog = function(message) {
var format_ptr = Memory.allocUtf8String("%s");
var message_ptr = Memory.allocUtf8String(message);
syslog_func(5, format_ptr, message_ptr);
};
// Value of the "request" argument used by nvidia-vgpud and nvidia-vgpu-mgr
// when calling ioctl to read the PCI device ID and type (and possibly
// other things) from the GPU.
var REQ_QUERY_GPU = ptr("0xC020462A");
// When issuing ioctl with REQ_QUERY_GPU then the "argp" argument is a
// pointer to a structure something like this:
//
// struct arg {
// uint32_t unknown_1; // Initialized prior to call.
// uint32_t unknown_2; // Initialized prior to call.
// uint32_t op_type; // Operation type, see comment below.
// uint32_t padding_1; // Always set to 0 prior to call.
// void* result; // Pointer initialized prior to call.
// // Pointee initialized to 0 prior to call.
// // Pointee is written by ioctl call.
// uint32_t unknown_4; // Set to 0x10 for READ_PCI_ID and set to 4 for
// READ_DEV_TYPE prior to call.
// uint32_t status; // Written by ioctl call. See comment below.
// }
// These are the observed values for the op_type member.
var OP_READ_DEV_TYPE = 0x800289; // *result type is uint64_t.
var OP_READ_PCI_ID = 0x20801801; // *result type is uint16_t[4], the second
// element (index 1) is the device ID, the
// forth element (index 3) is the subsystem
// ID.
// nvidia-vgpu-mgr expects this value for a vGPU capable GPU.
var DEV_TYPE_VGPU_CAPABLE = uint64(3);
// When ioctl returns success (retval >= 0) but sets the status value of
// the arg structure to 3 then nvidia-vgpud will sleep for a bit (first
// 0.1s then 1s then 10s) then issue the same ioctl call again until the
// status differs from 3. It will attempt this for up to 24h before giving
// up.
var STATUS_OK = 0;
var STATUS_TRY_AGAIN = 3;
Interceptor.attach(Module.getExportByName(null, "ioctl"), {
onEnter(args) {
this.request = args[1];
this.argp = args[2];
},
onLeave(retVal) {
if(!this.request.equals(REQ_QUERY_GPU)) {
// Not a call we care about.
return;
}
if(retVal.toInt32() < 0) {
// Call failed.
return;
}
// Lookup status value according to struct above.
var status = this.argp.add(0x1C).readU32();
if(status == STATUS_TRY_AGAIN) {
// Driver will try again.
return;
}
var op_type = this.argp.add(8).readU32();
if(op_type == OP_READ_PCI_ID) {
// Lookup address of the device and subsystem IDs.
var devid_ptr = this.argp.add(0x10).readPointer().add(2);
var subsysid_ptr = this.argp.add(0x10).readPointer().add(6);
// Now we replace the device ID with a spoofed value that needs to
// be determined such that the spoofed value represents a GPU with
// vGPU support that uses the same GPU chip as our actual GPU.
var actual_devid = devid_ptr.readU16();
var spoofed_devid = actual_devid;
var actual_subsysid = subsysid_ptr.readU16();
var spoofed_subsysid = actual_subsysid;
// GM107
if(actual_devid == 0x1390 || // GTX 845M
actual_devid == 0x1391 || // GTX 850M
actual_devid == 0x1392 || // GTX 860M
actual_devid == 0x139a || // GTX 950M
actual_devid == 0x139b || // GTX 960M
actual_devid == 0x139c || // GTX 940M
actual_devid == 0x139d || // GTX 750 Ti Maxwell
actual_devid == 0x179c || // GTX 940MX
actual_devid == 0x1380 || // GTX 750 Ti Maxwell
actual_devid == 0x1381 || // GTX 750 Maxwell
actual_devid == 0x1382 || // GTX 745 Maxwell
actual_devid == 0x13b0 || // Quadro M2000 Mobile
actual_devid == 0x13b1 || // Quadro M1000 Mobile
actual_devid == 0x13b2 || // Quadro M600 Mobile
actual_devid == 0x13b3 || // Quadro K2200 Mobile
actual_devid == 0x13b4 || // Quadro M620 Mobile
actual_devid == 0x13b6 || // Quadro M1200 Mobile
actual_devid == 0x13b9 || // NVS 810
actual_devid == 0x13ba || // Quadro K2200
actual_devid == 0x13bb || // Quadro K620
actual_devid == 0x13bc) { // Quadro K1200
spoofed_devid = 0x13bd; // Tesla M10
}
// GM204
if(actual_devid == 0x13c3 || // GTX 960 GM204 OEM Edition
actual_devid == 0x13d9 || // GTX 965M
actual_devid == 0x13d8 || // GTX 970M
actual_devid == 0x13c2 || // GTX 970
actual_devid == 0x13d7 || // GTX 980M
actual_devid == 0x13c0 || // GTX 980
actual_devid == 0x13c1 || // GM204 Unknown
actual_devid == 0x13f1 || // Quadro M4000
actual_devid == 0x13f0) { // Quadro M5000
spoofed_devid = 0x13f2; // Tesla M60
}
// GP102
if(actual_devid == 0x1b00 || // TITAN X (Pascal)
actual_devid == 0x1b02 || // TITAN Xp
actual_devid == 0x1b06 || // GTX 1080 Ti
actual_devid == 0x1b30) { // Quadro P6000
spoofed_devid = 0x1b38; // Tesla P40
}
// GP108 (Merged with Tesla P4, will move to M10 in future due to 2GB VRAM)
if(actual_devid == 0x1d01 || // GT 1030
actual_devid == 0x1d10 || // MX150 Mobile
actual_devid == 0x1d11 || // MX230 Mobile
actual_devid == 0x1d12 || // MX150 Mobile
actual_devid == 0x1d13 || // MX250 Mobile
actual_devid == 0x1d16 || // MX330 Mobile
// GP107 (Merged with Tesla P4, may move to M10 in future due to low VRAM)
actual_devid == 0x1cb1 || // Quadro P1000
actual_devid == 0x1c81 || // GTX 1050 2GB
actual_devid == 0x1c82 || // GTX 1050 Ti
actual_devid == 0x1c83 || // GTX 1050 3GB
actual_devid == 0x1c8c || // GTX 1050 Ti Mobile
actual_devid == 0x1c8d || // GTX 1050 Mobile
actual_devid == 0x1c8f || // GTX 1050 Ti Max-Q
actual_devid == 0x1c90 || // MX150 Mobile
actual_devid == 0x1c92 || // GTX 1050 Mobile
actual_devid == 0x1c94 || // MX350 Mobile
actual_devid == 0x1c96 || // MX350 Mobile
// GP106 (Merged with Tesla P4)
actual_devid == 0x1c03 || // GTX 1060 6GB
actual_devid == 0x1c04 || // GTX 1060 5GB
actual_devid == 0x1c02 || // GTX 1060 3GB
actual_devid == 0x1c07 || // P106-100 6GB
actual_devid == 0x1c09 || // P106-90 3GB
actual_devid == 0x1c22 || // GTX 1050 Mobile
actual_devid == 0x1c23 || // GTX 1060 Mobile Rev. 2
actual_devid == 0x1c20 || // GTX 1060 Mobile
actual_devid == 0x1c21 || // GTX 1050 Ti Mobile
actual_devid == 0x1c2d || // GP106M Generic
actual_devid == 0x1c60 || // GTX 1060 Mobile 6GB
actual_devid == 0x1c61 || // GTX 1050 Ti Mobile
actual_devid == 0x1c62 || // GTX 1050 Mobile
actual_devid == 0x1c70 || // GP106GL Generic
actual_devid == 0x1c30 || // Quadro P2000
actual_devid == 0x1c31 || // Quadro P2200
// GP104
actual_devid == 0x1b80 || // GTX 1080
actual_devid == 0x1b81 || // GTX 1070
actual_devid == 0x1b82 || // GTX 1070 Ti
actual_devid == 0x1b83 || // GTX 1060 6GB GP104 Refresh
actual_devid == 0x1b84 || // GTX 1060 3GB GP104 Refresh
actual_devid == 0x1b87 || // P104-100 Mining Card
actual_devid == 0x1ba0 || // GTX 1080 Mobile
actual_devid == 0x1ba1 || // GTX 1070 Mobile
actual_devid == 0x1bb0) { // Quadro P5000 (This will be moved to Tesla P6 in the future)
spoofed_devid = 0x1bb3; // Tesla P4
}
// GV100 (For the one person who owns a Titan Volta)
if(actual_devid == 0x1d81 || // TITAN V
actual_devid == 0x1dba) { // Quadro GV100 32GB
spoofed_devid = 0x1db4; // Tesla V100
}
// TU102
if(actual_devid == 0x1e02 || // TITAN RTX
actual_devid == 0x1e04 || // RTX 2080 Ti
actual_devid == 0x1e07) { // RTX 2080 Ti Rev. A
spoofed_devid = 0x1e30; // Quadro RTX 6000
spoofed_subsysid = 0x12ba;
}
// TU117 (Merged with Tesla T4)
if(actual_devid == 0x1ff9 || // Quadro T1000 Mobile
actual_devid == 0x1f99 || // TU1117 Mobile Unknown
actual_devid == 0x1fae || // TU1117GL Unknown
actual_devid == 0x1fb8 || // Quadro T2000 Mobile Max-Q
actual_devid == 0x1fb9 || // Quadro T1000 Mobile
actual_devid == 0x1fbf || // TU1117GL Unknown
actual_devid == 0x1f97 || // GeForce MX450
actual_devid == 0x1f98 || // GeForce MX450
actual_devid == 0x1f9c || // GeForce MX450
actual_devid == 0x1fbb || // Quadro T500 Mobile
actual_devid == 0x1fd9 || // GeForce GTX 1650 Mobile Refresh
actual_devid == 0x1f81 || // TU117 Unknown
actual_devid == 0x1f82 || // GeForce GTX 1650
actual_devid == 0x1f91 || // GTX 1650 Mobile Max-Q
actual_devid == 0x1f92 || // GTX 1650 Mobile
actual_devid == 0x1f94 || // GTX 1650 Mobile
actual_devid == 0x1f95 || // GTX 1650 Ti Mobile
actual_devid == 0x1f96 || // GTX 1650 Mobile Max-Q
// TU116 (Merged with Tesla T4)
actual_devid == 0x2182 || // GTX 1660 Ti
actual_devid == 0x2183 || // TU116 Unknown
actual_devid == 0x2184 || // GTX 1660
actual_devid == 0x2187 || // GTX 1650 SUPER
actual_devid == 0x2188 || // GTX 1650
actual_devid == 0x2191 || // GTX 1660 Ti Mobile
actual_devid == 0x2192 || // GTX 1650 Ti Mobile
actual_devid == 0x21ae || // TU116GL Unknown
actual_devid == 0x21bf || // TU116GL Unknown
actual_devid == 0x21c4 || // GTX 1660 Super
actual_devid == 0x21d1 || // GTX 1660 Ti Mobile
// TU106 (Merged with Tesla T4)
actual_devid == 0x1f02 || // RTX 2070 8GB
actual_devid == 0x1f04 || // TU106 Unknown
actual_devid == 0x1f06 || // RTX 2060 SUPER
actual_devid == 0x1f07 || // RTX 2070 Rev. A
actual_devid == 0x1f08 || // RTX 2060 6GB
actual_devid == 0x1f09 || // GTX 1660 Super
actual_devid == 0x1f0a || // GTX 1650
actual_devid == 0x1f10 || // RTX 2070 Mobile
actual_devid == 0x1f11 || // RTX 2060 Mobile
actual_devid == 0x1f12 || // RTX 2060 Mobile Max-Q
actual_devid == 0x1f14 || // RTX 2070 Mobile Max-Q
actual_devid == 0x1f15 || // RTX 2060 Mobile
actual_devid == 0x1f2e || // TU106M Mobile Unknown
actual_devid == 0x1f36 || // TU106GLM Mobile Unknown
actual_devid == 0x1f42 || // RTX 2060 SUPER
actual_devid == 0x1f47 || // RTX 2060 SUPER
actual_devid == 0x1f50 || // RTX 2070 Mobile
actual_devid == 0x1f51 || // RTX 2060 Mobile
// TU104
actual_devid == 0x1e81 || // RTX 2080 Super
actual_devid == 0x1e82 || // RTX 2080
actual_devid == 0x1e84 || // RTX 2070 Super
actual_devid == 0x1e87 || // RTX 2080 Rev. A
actual_devid == 0x1e89 || // RTX 2060
actual_devid == 0x1eb0 || // Quadro RTX 5000
actual_devid == 0x1eb1) { // Quadro RTX 4000
spoofed_devid = 0x1eb8; // Tesla T4
}
// GA102
if(actual_devid == 0x2204 || // RTX 3090
actual_devid == 0x2205 || // RTX 3080 Ti
actual_devid == 0x2206) { // RTX 3080
spoofed_devid = 0x2235; // RTX A40
}
devid_ptr.writeU16(spoofed_devid);
subsysid_ptr.writeU16(spoofed_subsysid);
}
if(op_type == OP_READ_DEV_TYPE) {
// Set device type to vGPU capable.
var dev_type_ptr = this.argp.add(0x10).readPointer();
dev_type_ptr.writeU64(DEV_TYPE_VGPU_CAPABLE);
}
if(status != STATUS_OK) {
// Things seems to work fine even if some operations that fail
// result in failed assertions. So here we change the status
// value for these cases to cleanup the logs for nvidia-vgpu-mgr.
if(op_type == 0xA0820104 ||
op_type == 0x90960103) {
this.argp.add(0x1C).writeU32(STATUS_OK);
} else {
syslog("op_type: 0x" + op_type.toString(16) + " failed.");
}
}
}
});
syslog("vgpu_unlock loaded.");
"""
device = frida.get_local_device()
child_processes = queue.Queue()
def instrument(pid):
"""Instrument and resume process.
:param pid: Process identifier
"""
session = device.attach(pid)
# We need to also instrument the children since nvidia-vgpud forks itself
# when initially launched.
session.enable_child_gating()
script = session.create_script(script_source)
script.load()
device.resume(pid)
def on_child_added(child):
"""Callback for when a new child process has been created.
:param child: The newly created child process.
"""
child_processes.put(child.pid)
instrument(child.pid)
def wait_exit(pid):
"""Wait for a process to terminate.
:param pid: Process ID of the target process.
"""
while 1:
time.sleep(.1)
try:
os.kill(pid, 0)
except OSError as e:
if e.errno == errno.ESRCH:
break
def main():
"""Entrypoint."""
# Behave at least a little bit like a forking service.
if sys.argv[1] != "-f":
subprocess.Popen([sys.argv[0], "-f"] + sys.argv[1:])
exit()
device.on("child-added", on_child_added)
pid = device.spawn(["/bin/bash", "-c", ' '.join(sys.argv[2:])])
instrument(pid)
# Wait for everything to terminate before exiting.
wait_exit(pid)
while not child_processes.empty():
wait_exit(child_processes.get_nowait())
if __name__ == "__main__":
main()