mirror of
https://github.com/DualCoder/vgpu_unlock.git
synced 2024-05-19 19:53:04 +12:00
1ba0d148b2
This now makes the scripts more compact as there are conditional statements selecting the vGPU profile enabled card by the generation, and the generation is determined by the ranges of PCI IDs. This should eliminate the need for adding PCI IDs manually now, and open up more profile options for users to pick from, including profiles with a VRAM multiple of 3GB. Note that this breaks compatibility with previous versions as users may have to go back and reselect a new profile.
248 lines
8.4 KiB
Python
Executable file
248 lines
8.4 KiB
Python
Executable file
#!/bin/python3
|
|
#
|
|
# vGPU unlock script for consumer GPUs.
|
|
#
|
|
# Copyright 2021 Jonathan Johansson
|
|
# This file is part of the "vgpu_unlock" project, and is distributed under the MIT License.
|
|
# See the LICENSE file for more details.
|
|
#
|
|
# Contributions from Krutav Shah and the vGPU Unlocking community included :)
|
|
#
|
|
|
|
import errno
|
|
import frida
|
|
import os
|
|
import queue
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
script_source = r"""
|
|
var syslog_func = new NativeFunction(Module.getExportByName(null, "syslog"),
|
|
"void",
|
|
["int", "pointer", "...", "pointer"]);
|
|
|
|
var syslog = function(message) {
|
|
var format_ptr = Memory.allocUtf8String("%s");
|
|
var message_ptr = Memory.allocUtf8String(message);
|
|
syslog_func(5, format_ptr, message_ptr);
|
|
};
|
|
|
|
|
|
// Value of the "request" argument used by nvidia-vgpud and nvidia-vgpu-mgr
|
|
// when calling ioctl to read the PCI device ID and type (and possibly
|
|
// other things) from the GPU.
|
|
var REQ_QUERY_GPU = ptr("0xC020462A");
|
|
|
|
// When issuing ioctl with REQ_QUERY_GPU then the "argp" argument is a
|
|
// pointer to a structure something like this:
|
|
//
|
|
// struct arg {
|
|
// uint32_t unknown_1; // Initialized prior to call.
|
|
// uint32_t unknown_2; // Initialized prior to call.
|
|
// uint32_t op_type; // Operation type, see comment below.
|
|
// uint32_t padding_1; // Always set to 0 prior to call.
|
|
// void* result; // Pointer initialized prior to call.
|
|
// // Pointee initialized to 0 prior to call.
|
|
// // Pointee is written by ioctl call.
|
|
// uint32_t unknown_4; // Set to 0x10 for READ_PCI_ID and set to 4 for
|
|
// READ_DEV_TYPE prior to call.
|
|
// uint32_t status; // Written by ioctl call. See comment below.
|
|
// }
|
|
|
|
// These are the observed values for the op_type member.
|
|
var OP_READ_DEV_TYPE = 0x800289; // *result type is uint64_t.
|
|
var OP_READ_PCI_ID = 0x20801801; // *result type is uint16_t[4], the second
|
|
// element (index 1) is the device ID, the
|
|
// forth element (index 3) is the subsystem
|
|
// ID.
|
|
|
|
// nvidia-vgpu-mgr expects this value for a vGPU capable GPU.
|
|
var DEV_TYPE_VGPU_CAPABLE = uint64(3);
|
|
|
|
// When ioctl returns success (retval >= 0) but sets the status value of
|
|
// the arg structure to 3 then nvidia-vgpud will sleep for a bit (first
|
|
// 0.1s then 1s then 10s) then issue the same ioctl call again until the
|
|
// status differs from 3. It will attempt this for up to 24h before giving
|
|
// up.
|
|
var STATUS_OK = 0;
|
|
var STATUS_TRY_AGAIN = 3;
|
|
|
|
Interceptor.attach(Module.getExportByName(null, "ioctl"), {
|
|
onEnter(args) {
|
|
this.request = args[1];
|
|
this.argp = args[2];
|
|
},
|
|
onLeave(retVal) {
|
|
if(!this.request.equals(REQ_QUERY_GPU)) {
|
|
// Not a call we care about.
|
|
return;
|
|
}
|
|
|
|
if(retVal.toInt32() < 0) {
|
|
// Call failed.
|
|
return;
|
|
}
|
|
|
|
// Lookup status value according to struct above.
|
|
var status = this.argp.add(0x1C).readU32();
|
|
|
|
if(status == STATUS_TRY_AGAIN) {
|
|
// Driver will try again.
|
|
return;
|
|
}
|
|
|
|
var op_type = this.argp.add(8).readU32();
|
|
|
|
if(op_type == OP_READ_PCI_ID) {
|
|
// Lookup address of the device and subsystem IDs.
|
|
var devid_ptr = this.argp.add(0x10).readPointer().add(2);
|
|
var subsysid_ptr = this.argp.add(0x10).readPointer().add(6);
|
|
|
|
// Now we replace the device ID with a spoofed value that needs to
|
|
// be determined such that the spoofed value represents a GPU with
|
|
// vGPU support that uses the same GPU chip as our actual GPU.
|
|
var actual_devid = devid_ptr.readU16();
|
|
var spoofed_devid = actual_devid;
|
|
var actual_subsysid = subsysid_ptr.readU16();
|
|
var spoofed_subsysid = actual_subsysid;
|
|
|
|
// Maxwell
|
|
if(0x1340 <= actual_devid && actual_devid <= 0x13bd ||
|
|
0x174d <= actual_devid && actual_devid <= 0x179c) {
|
|
spoofed_devid = 0x13bd; // Tesla M10
|
|
spoofed_subsysid = 0x1160;
|
|
}
|
|
|
|
// Maxwell 2.0
|
|
if(0x13c0 <= actual_devid && actual_devid <= 0x1436 ||
|
|
0x1617 <= actual_devid && actual_devid <= 0x1667 ||
|
|
0x17c2 <= actual_devid && actual_devid <= 0x17fd) {
|
|
spoofed_devid = 0x13f2; // Tesla M60
|
|
}
|
|
|
|
// Pascal
|
|
if(0x15f0 <= actual_devid && actual_devid <= 0x15f1 ||
|
|
0x1b00 <= actual_devid && actual_devid <= 0x1d56 ||
|
|
0x1725 <= actual_devid && actual_devid <= 0x172f) {
|
|
spoofed_devid = 0x1b38; // Tesla P40
|
|
}
|
|
|
|
// GV100 Volta
|
|
if(actual_devid == 0x1d81 || // TITAN V
|
|
actual_devid == 0x1dba) { // Quadro GV100 32GB
|
|
spoofed_devid = 0x1db6; // Tesla V100 32GB PCIE
|
|
}
|
|
|
|
// Turing
|
|
if(0x1e02 <= actual_devid && actual_devid <= 0x1ff9 ||
|
|
0x2182 <= actual_devid && actual_devid <= 0x21d1) {
|
|
spoofed_devid = 0x1e30; // Quadro RTX 6000
|
|
spoofed_subsysid = 0x12ba;
|
|
}
|
|
|
|
// Ampere
|
|
if(0x2200 <= actual_devid && actual_devid <= 0x2600) {
|
|
spoofed_devid = 0x2230; // RTX A6000
|
|
}
|
|
|
|
devid_ptr.writeU16(spoofed_devid);
|
|
subsysid_ptr.writeU16(spoofed_subsysid);
|
|
}
|
|
|
|
if(op_type == OP_READ_DEV_TYPE) {
|
|
// Set device type to vGPU capable.
|
|
var dev_type_ptr = this.argp.add(0x10).readPointer();
|
|
dev_type_ptr.writeU64(DEV_TYPE_VGPU_CAPABLE);
|
|
}
|
|
|
|
if(status != STATUS_OK) {
|
|
// Things seems to work fine even if some operations that fail
|
|
// result in failed assertions. So here we change the status
|
|
// value for these cases to cleanup the logs for nvidia-vgpu-mgr.
|
|
if(op_type == 0xA0820104 ||
|
|
op_type == 0x90960103) {
|
|
this.argp.add(0x1C).writeU32(STATUS_OK);
|
|
} else {
|
|
syslog("op_type: 0x" + op_type.toString(16) + " failed.");
|
|
}
|
|
}
|
|
|
|
// Workaround for some Maxwell cards not supporting reading inforom.
|
|
if(op_type == 0x2080014b && status == 0x56) {
|
|
this.argp.add(0x1C).writeU32(0x57);
|
|
}
|
|
}
|
|
});
|
|
|
|
syslog("vgpu_unlock loaded.");
|
|
"""
|
|
|
|
device = frida.get_local_device()
|
|
child_processes = queue.Queue()
|
|
|
|
def instrument(pid):
|
|
"""Instrument and resume process.
|
|
|
|
:param pid: Process identifier
|
|
"""
|
|
|
|
session = device.attach(pid)
|
|
# We need to also instrument the children since nvidia-vgpud forks itself
|
|
# when initially launched.
|
|
session.enable_child_gating()
|
|
script = session.create_script(script_source)
|
|
script.load()
|
|
device.resume(pid)
|
|
|
|
|
|
def on_child_added(child):
|
|
"""Callback for when a new child process has been created.
|
|
|
|
:param child: The newly created child process.
|
|
"""
|
|
|
|
child_processes.put(child.pid)
|
|
instrument(child.pid)
|
|
|
|
|
|
def wait_exit(pid):
|
|
"""Wait for a process to terminate.
|
|
|
|
:param pid: Process ID of the target process.
|
|
"""
|
|
|
|
while 1:
|
|
time.sleep(.1)
|
|
|
|
try:
|
|
os.kill(pid, 0)
|
|
|
|
except OSError as e:
|
|
if e.errno == errno.ESRCH:
|
|
break
|
|
|
|
|
|
def main():
|
|
"""Entrypoint."""
|
|
|
|
# Behave at least a little bit like a forking service.
|
|
if sys.argv[1] != "-f":
|
|
subprocess.Popen([sys.argv[0], "-f"] + sys.argv[1:])
|
|
exit()
|
|
|
|
device.on("child-added", on_child_added)
|
|
pid = device.spawn(["/bin/bash", "-c", ' '.join(sys.argv[2:])])
|
|
instrument(pid)
|
|
|
|
# Wait for everything to terminate before exiting.
|
|
wait_exit(pid)
|
|
|
|
while not child_processes.empty():
|
|
wait_exit(child_processes.get_nowait())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|