mirror of
https://github.com/aristocratos/btop.git
synced 2024-05-29 16:50:20 +12:00
Handle GPUs which cannot report certain stats in btop_collect.cpp and CPU panel
This commit is contained in:
parent
005de97e6d
commit
414d7eb94c
|
@ -507,17 +507,15 @@ namespace Runner {
|
||||||
//* Run collection and draw functions for all boxes
|
//* Run collection and draw functions for all boxes
|
||||||
try {
|
try {
|
||||||
//? GPU data collection
|
//? GPU data collection
|
||||||
vector<Gpu::gpu_info> gpus;
|
bool gpu_in_cpu_panel = Config::getS("cpu_graph_lower").rfind("gpu-", 0) == 0
|
||||||
bool gpu_in_cpu_panel =
|
or Config::getS("cpu_graph_upper").rfind("gpu-", 0) == 0;
|
||||||
Config::getS("cpu_graph_lower") == "default"
|
|
||||||
or Config::getS("cpu_graph_lower").rfind("gpu-", 0) == 0
|
|
||||||
or Config::getS("cpu_graph_upper").rfind("gpu-", 0) == 0;
|
|
||||||
|
|
||||||
vector<unsigned int> gpu_panels = {};
|
vector<unsigned int> gpu_panels = {};
|
||||||
for (auto& box : conf.boxes)
|
for (auto& box : conf.boxes)
|
||||||
if (box.rfind("gpu", 0) == 0)
|
if (box.rfind("gpu", 0) == 0)
|
||||||
gpu_panels.push_back(box.back()-'0');
|
gpu_panels.push_back(box.back()-'0');
|
||||||
|
|
||||||
|
vector<Gpu::gpu_info> gpus;
|
||||||
if (gpu_in_cpu_panel or not gpu_panels.empty()) {
|
if (gpu_in_cpu_panel or not gpu_panels.empty()) {
|
||||||
if (Global::debug) debug_timer("gpu", collect_begin);
|
if (Global::debug) debug_timer("gpu", collect_begin);
|
||||||
gpus = Gpu::collect(conf.no_update);
|
gpus = Gpu::collect(conf.no_update);
|
||||||
|
|
|
@ -498,6 +498,7 @@ namespace Cpu {
|
||||||
long unsigned int lavg_str_len = 0;
|
long unsigned int lavg_str_len = 0;
|
||||||
int graph_up_height, graph_low_height;
|
int graph_up_height, graph_low_height;
|
||||||
int graph_up_width, graph_low_width;
|
int graph_up_width, graph_low_width;
|
||||||
|
int gpu_meter_width;
|
||||||
bool shown = true, redraw = true, mid_line = false;
|
bool shown = true, redraw = true, mid_line = false;
|
||||||
string box;
|
string box;
|
||||||
vector<Draw::Graph> graphs_upper;
|
vector<Draw::Graph> graphs_upper;
|
||||||
|
@ -566,16 +567,25 @@ namespace Cpu {
|
||||||
auto& gpu = gpus[i]; auto& graph = graphs[i];
|
auto& gpu = gpus[i]; auto& graph = graphs[i];
|
||||||
|
|
||||||
//? GPU graphs/meters
|
//? GPU graphs/meters
|
||||||
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpu.temp, graph_symbol, false, false, gpu.temp_max, -23 };
|
if (gpu.supported_functions.temp_info)
|
||||||
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpu.mem_used_percent, graph_symbol };
|
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpu.temp, graph_symbol, false, false, gpu.temp_max, -23 };
|
||||||
gpu_meters[i] = Draw::Meter{ b_width - 12 - (int)floating_humanizer(gpu.mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size(), "cpu" };
|
if (gpu.supported_functions.mem_used and gpu.supported_functions.mem_total)
|
||||||
if (++i < gpus.size())
|
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpu.mem_used_percent, graph_symbol };
|
||||||
graph = Draw::Graph{graph_width, graph_height, "cpu", gpu.gpu_percent, graph_symbol, invert, true};
|
if (gpu.supported_functions.gpu_utilization) {
|
||||||
else {
|
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpu.mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
|
||||||
graph = Draw::Graph{
|
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
|
||||||
graph_width + graph_default_width%graph_width - (int)gpus.size() + 1,
|
}
|
||||||
graph_height, "cpu", gpu.gpu_percent, graph_symbol, invert, true
|
|
||||||
};
|
bool utilization_support = gpu.supported_functions.gpu_utilization;
|
||||||
|
if (++i < gpus.size()) {
|
||||||
|
if (utilization_support)
|
||||||
|
graph = Draw::Graph{graph_width, graph_height, "cpu", gpu.gpu_percent, graph_symbol, invert, true};
|
||||||
|
} else {
|
||||||
|
if (utilization_support)
|
||||||
|
graph = Draw::Graph{
|
||||||
|
graph_width + graph_default_width%graph_width - (int)gpus.size() + 1,
|
||||||
|
graph_height, "cpu", gpu.gpu_percent, graph_symbol, invert, true
|
||||||
|
};
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -587,9 +597,14 @@ namespace Cpu {
|
||||||
gpu_mem_graphs.resize(gpus.size());
|
gpu_mem_graphs.resize(gpus.size());
|
||||||
gpu_meters.resize(gpus.size());
|
gpu_meters.resize(gpus.size());
|
||||||
for (unsigned long i = 0; i < gpus.size(); ++i) {
|
for (unsigned long i = 0; i < gpus.size(); ++i) {
|
||||||
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpus[i].temp, graph_symbol, false, false, gpus[i].temp_max, -23 };
|
if (gpus[i].supported_functions.temp_info)
|
||||||
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpus[i].mem_used_percent, graph_symbol };
|
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpus[i].temp, graph_symbol, false, false, gpus[i].temp_max, -23 };
|
||||||
gpu_meters[i] = Draw::Meter{ b_width - 12 - (int)floating_humanizer(gpus[i].mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size(), "cpu" };
|
if (gpus[i].supported_functions.mem_used and gpus[i].supported_functions.mem_total)
|
||||||
|
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpus[i].mem_used_percent, graph_symbol };
|
||||||
|
if (gpus[i].supported_functions.gpu_utilization) {
|
||||||
|
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpus[i].mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
|
||||||
|
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
graphs.resize(1);
|
graphs.resize(1);
|
||||||
|
@ -675,12 +690,15 @@ namespace Cpu {
|
||||||
auto draw_graphs = [&](vector<Draw::Graph>& graphs, const int graph_height, const int graph_width, const string& graph_field) {
|
auto draw_graphs = [&](vector<Draw::Graph>& graphs, const int graph_height, const int graph_width, const string& graph_field) {
|
||||||
if (graph_field == "gpu-totals")
|
if (graph_field == "gpu-totals")
|
||||||
for (unsigned long i = 0;;) {
|
for (unsigned long i = 0;;) {
|
||||||
out += graphs[i](gpus[i].gpu_percent, (data_same or redraw));
|
if (gpus[i].supported_functions.gpu_utilization) {
|
||||||
if (gpus.size() > 1) {
|
out += graphs[i](gpus[i].gpu_percent, (data_same or redraw));
|
||||||
auto i_str = to_string(i);
|
if (gpus.size() > 1) {
|
||||||
out += Mv::l(graph_width-1) + Mv::u(graph_height/2) + (graph_width > 5 ? "GPU " : "") + i_str
|
auto i_str = to_string(i);
|
||||||
+ Mv::d(graph_height/2) + Mv::r(graph_width - 1 - (graph_width > 5)*4 - i_str.size());
|
out += Mv::l(graph_width-1) + Mv::u(graph_height/2) + (graph_width > 5 ? "GPU " : "") + i_str
|
||||||
}
|
+ Mv::d(graph_height/2) + Mv::r(graph_width - 1 - (graph_width > 5)*4 - i_str.size());
|
||||||
|
}
|
||||||
|
} else out += Mv::d(graph_height/2) + Mv::r(graph_width/2 - 6) + "UNSUPPORTED" + Mv::r(graph_width/2 - 5);
|
||||||
|
|
||||||
if (++i < graphs.size())
|
if (++i < graphs.size())
|
||||||
out += Theme::c("div_line") + (Symbols::v_line + Mv::l(1) + Mv::u(1))*graph_height + Mv::r(1) + Mv::d(1);
|
out += Theme::c("div_line") + (Symbols::v_line + Mv::l(1) + Mv::u(1))*graph_height + Mv::r(1) + Mv::d(1);
|
||||||
else break;
|
else break;
|
||||||
|
@ -790,12 +808,21 @@ namespace Cpu {
|
||||||
for (unsigned long i = 0; i < gpus.size(); ++i) {
|
for (unsigned long i = 0; i < gpus.size(); ++i) {
|
||||||
if (not v_contains(Gpu::shown_panels, i)) {
|
if (not v_contains(Gpu::shown_panels, i)) {
|
||||||
out += Mv::to(b_y + b_height - 1 - gpus.size() + ++shown_panels_count - (Gpu::shown == 0), b_x + 1)
|
out += Mv::to(b_y + b_height - 1 - gpus.size() + ++shown_panels_count - (Gpu::shown == 0), b_x + 1)
|
||||||
+ Theme::c("main_fg") + Fx::b + "GPU " + to_string(i) + ' ' + gpu_meters[i](gpus[i].gpu_percent.back())
|
+ Theme::c("main_fg") + Fx::b + "GPU " + (gpus.size() > 1 ? rjust(to_string(i) + ' ', 1 + gpus.size() > 9) : "");
|
||||||
+ Theme::g("cpu").at(gpus[i].gpu_percent.back()) + rjust(to_string(gpus[i].gpu_percent.back()), 4) + Theme::c("main_fg") + '%';
|
if (gpus[i].supported_functions.gpu_utilization)
|
||||||
out += ' ' + Theme::c("inactive_fg") + graph_bg * 6 + Mv::l(6) + Theme::g("used").at(gpus[i].mem_used_percent.back())
|
out += gpu_meters[i](gpus[i].gpu_percent.back())
|
||||||
+ gpu_mem_graphs[i](gpus[i].mem_used_percent, data_same or redraw) + Theme::c("main_fg")
|
+ Theme::g("cpu").at(gpus[i].gpu_percent.back()) + rjust(to_string(gpus[i].gpu_percent.back()), 4) + Theme::c("main_fg") + '%';
|
||||||
+ rjust(floating_humanizer(gpus[i].mem_used, true), 5) + Theme::c("inactive_fg") + '/' + Theme::c("main_fg") + floating_humanizer(gpus[i].mem_total, true);
|
else out += Mv::r(gpu_meter_width);
|
||||||
if (show_temps) {
|
|
||||||
|
if (gpus[i].supported_functions.mem_used) {
|
||||||
|
out += ' ' + Theme::c("inactive_fg") + graph_bg * 6 + Mv::l(6) + Theme::g("used").at(gpus[i].mem_used_percent.back())
|
||||||
|
+ gpu_mem_graphs[i](gpus[i].mem_used_percent, data_same or redraw) + Theme::c("main_fg")
|
||||||
|
+ rjust(floating_humanizer(gpus[i].mem_used, true), 5);
|
||||||
|
if (gpus[i].supported_functions.mem_total)
|
||||||
|
out += Theme::c("inactive_fg") + '/' + Theme::c("main_fg") + floating_humanizer(gpus[i].mem_total, true);
|
||||||
|
else out += Mv::r(5);
|
||||||
|
} else out += Mv::r(17);
|
||||||
|
if (show_temps and gpus[i].supported_functions.temp_info) {
|
||||||
const auto [temp, unit] = celsius_to(gpus[i].temp.back(), temp_scale);
|
const auto [temp, unit] = celsius_to(gpus[i].temp.back(), temp_scale);
|
||||||
out += ' ' + Theme::c("inactive_fg") + graph_bg * 6 + Mv::l(6) + Theme::g("temp").at(clamp(gpus[i].temp.back() * 100 / gpus[i].temp_max, 0ll, 100ll))
|
out += ' ' + Theme::c("inactive_fg") + graph_bg * 6 + Mv::l(6) + Theme::g("temp").at(clamp(gpus[i].temp.back() * 100 / gpus[i].temp_max, 0ll, 100ll))
|
||||||
+ gpu_temp_graphs[i](gpus[i].temp, data_same or redraw)
|
+ gpu_temp_graphs[i](gpus[i].temp, data_same or redraw)
|
||||||
|
|
|
@ -104,15 +104,29 @@ namespace Gpu {
|
||||||
unsigned long long mem;
|
unsigned long long mem;
|
||||||
};*/
|
};*/
|
||||||
|
|
||||||
|
//* Container for supported Gpu::*::collect() functions
|
||||||
|
struct gpu_info_supported {
|
||||||
|
bool gpu_utilization = true,
|
||||||
|
mem_utilization = true,
|
||||||
|
gpu_clock = true,
|
||||||
|
mem_clock = true,
|
||||||
|
pwr_usage = true,
|
||||||
|
pwr_state = true,
|
||||||
|
temp_info = true,
|
||||||
|
mem_total = true,
|
||||||
|
mem_used = true,
|
||||||
|
pcie_txrx = true;
|
||||||
|
};
|
||||||
|
|
||||||
//* Per-device container for GPU info
|
//* Per-device container for GPU info
|
||||||
struct gpu_info {
|
struct gpu_info {
|
||||||
deque<long long> gpu_percent = {0};
|
deque<long long> gpu_percent = {};
|
||||||
unsigned int gpu_clock_speed = 0; // MHz
|
unsigned int gpu_clock_speed; // MHz
|
||||||
|
|
||||||
deque<long long> pwr_percent = {0};
|
deque<long long> pwr_percent = {};
|
||||||
long long pwr_usage = 0; // mW
|
long long pwr_usage; // mW
|
||||||
long long pwr_max_usage = 255000;
|
long long pwr_max_usage = 255000;
|
||||||
long long pwr_state = 32;
|
long long pwr_state;
|
||||||
|
|
||||||
deque<long long> temp = {0};
|
deque<long long> temp = {0};
|
||||||
long long temp_max = 110;
|
long long temp_max = 110;
|
||||||
|
@ -126,6 +140,8 @@ namespace Gpu {
|
||||||
long long pcie_tx = 0; // KB/s
|
long long pcie_tx = 0; // KB/s
|
||||||
long long pcie_rx = 0;
|
long long pcie_rx = 0;
|
||||||
|
|
||||||
|
gpu_info_supported supported_functions;
|
||||||
|
|
||||||
// vector<proc_info> graphics_processes = {}; // TODO
|
// vector<proc_info> graphics_processes = {}; // TODO
|
||||||
// vector<proc_info> compute_processes = {};
|
// vector<proc_info> compute_processes = {};
|
||||||
};
|
};
|
||||||
|
|
|
@ -105,6 +105,7 @@ namespace Gpu {
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
bool init();
|
bool init();
|
||||||
bool shutdown();
|
bool shutdown();
|
||||||
|
template <bool is_init> bool collect(gpu_info* gpus_slice);
|
||||||
#if defined(GPU_NVIDIA)
|
#if defined(GPU_NVIDIA)
|
||||||
vector<nvmlDevice_t> devices;
|
vector<nvmlDevice_t> devices;
|
||||||
#endif
|
#endif
|
||||||
|
@ -116,6 +117,7 @@ namespace Gpu {
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
bool init();
|
bool init();
|
||||||
bool shutdown();
|
bool shutdown();
|
||||||
|
template <bool is_init> bool collect(gpu_info* gpus_slice);
|
||||||
uint32_t device_count = 0;
|
uint32_t device_count = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,6 +195,7 @@ namespace Shared {
|
||||||
Mem::old_uptime = system_uptime();
|
Mem::old_uptime = system_uptime();
|
||||||
Mem::collect();
|
Mem::collect();
|
||||||
|
|
||||||
|
//? Init for CPU graphs
|
||||||
if (Config::strings.at("cpu_graph_upper") == "default" or not v_contains(Cpu::available_fields, Config::strings.at("cpu_graph_upper")))
|
if (Config::strings.at("cpu_graph_upper") == "default" or not v_contains(Cpu::available_fields, Config::strings.at("cpu_graph_upper")))
|
||||||
Config::strings.at("cpu_graph_upper") = "total";
|
Config::strings.at("cpu_graph_upper") = "total";
|
||||||
if (Config::strings.at("cpu_graph_lower") == "default" or not v_contains(Cpu::available_fields, Config::strings.at("cpu_graph_lower")))
|
if (Config::strings.at("cpu_graph_lower") == "default" or not v_contains(Cpu::available_fields, Config::strings.at("cpu_graph_lower")))
|
||||||
|
@ -857,18 +860,19 @@ namespace Gpu {
|
||||||
//? NVIDIA
|
//? NVIDIA
|
||||||
namespace Nvml {
|
namespace Nvml {
|
||||||
bool init() {
|
bool init() {
|
||||||
if (initialized) return false;
|
|
||||||
#if defined(GPU_NVIDIA)
|
#if defined(GPU_NVIDIA)
|
||||||
|
if (initialized) return false;
|
||||||
|
|
||||||
nvmlReturn_t result = nvmlInit();
|
nvmlReturn_t result = nvmlInit();
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::debug(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result));
|
Logger::warning(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//? Device count
|
//? Device count
|
||||||
result = nvmlDeviceGetCount(&device_count);
|
result = nvmlDeviceGetCount(&device_count);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::debug(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -881,7 +885,8 @@ namespace Gpu {
|
||||||
//? Device Handle
|
//? Device Handle
|
||||||
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t));
|
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t));
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::debug(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
|
||||||
|
gpus[i].supported_functions = {false, false, false, false, false, false, false, false};
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -889,7 +894,7 @@ namespace Gpu {
|
||||||
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
|
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
|
||||||
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::debug(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
|
||||||
else {
|
else {
|
||||||
gpu_names[i] = string(name);
|
gpu_names[i] = string(name);
|
||||||
for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) {
|
for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) {
|
||||||
|
@ -902,18 +907,21 @@ namespace Gpu {
|
||||||
unsigned int max_power;
|
unsigned int max_power;
|
||||||
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
|
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::debug(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
|
||||||
else gpus[i].pwr_max_usage = max_power;
|
else gpus[i].pwr_max_usage = max_power;
|
||||||
|
|
||||||
//? Get temp_max
|
//? Get temp_max
|
||||||
unsigned int temp_max;
|
unsigned int temp_max;
|
||||||
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
|
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS)
|
||||||
Logger::debug(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110°C: ") + nvmlErrorString(result));
|
||||||
else gpus[i].temp_max = (long long)temp_max;
|
else gpus[i].temp_max = (long long)temp_max;
|
||||||
}
|
}
|
||||||
initialized = true;
|
initialized = true;
|
||||||
|
|
||||||
|
//? Check supported functions
|
||||||
|
Nvml::collect<1>(gpus.data());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {initialized = true; shutdown(); return false;}
|
} else {initialized = true; shutdown(); return false;}
|
||||||
#else
|
#else
|
||||||
|
@ -922,8 +930,8 @@ namespace Gpu {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool shutdown() {
|
bool shutdown() {
|
||||||
if (!initialized) return false;
|
|
||||||
#if defined(GPU_NVIDIA)
|
#if defined(GPU_NVIDIA)
|
||||||
|
if (!initialized) return false;
|
||||||
nvmlReturn_t result = nvmlShutdown();
|
nvmlReturn_t result = nvmlShutdown();
|
||||||
if (NVML_SUCCESS == result)
|
if (NVML_SUCCESS == result)
|
||||||
initialized = false;
|
initialized = false;
|
||||||
|
@ -935,102 +943,129 @@ namespace Gpu {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool is_init> // collect<1> is called in Nvml::init(), and populates gpus.supported_functions
|
||||||
bool collect(gpu_info* gpus_slice) { // raw pointer to vector data, size == device_count
|
bool collect(gpu_info* gpus_slice) { // raw pointer to vector data, size == device_count
|
||||||
if (!initialized) return false;
|
|
||||||
#if defined(GPU_NVIDIA)
|
#if defined(GPU_NVIDIA)
|
||||||
|
if (!initialized) return false;
|
||||||
nvmlReturn_t result;
|
nvmlReturn_t result;
|
||||||
// DebugTimer gpu_nvidia("Nvidia Total");
|
// DebugTimer gpu_nvidia("Nvidia Total");
|
||||||
for (unsigned int i = 0; i < device_count; ++i) {
|
for (unsigned int i = 0; i < device_count; ++i) {
|
||||||
//? GPU & memory utilization
|
//? GPU & memory utilization
|
||||||
// DebugTimer nvTimer("Nv utilization");
|
if (gpus_slice[i].supported_functions.gpu_utilization) {
|
||||||
nvmlUtilization_t utilization;
|
nvmlUtilization_t utilization;
|
||||||
result = nvmlDeviceGetUtilizationRates(devices[i], &utilization);
|
result = nvmlDeviceGetUtilizationRates(devices[i], &utilization);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::debug(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result));
|
||||||
} else {
|
if constexpr(is_init) gpus_slice[i].supported_functions.gpu_utilization = false;
|
||||||
gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu);
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_utilization = false;
|
||||||
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory);
|
} else {
|
||||||
}
|
gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu);
|
||||||
|
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// nvTimer.stop_rename_reset("Nv clock");
|
// nvTimer.stop_rename_reset("Nv clock");
|
||||||
//? Clock speeds
|
//? Clock speeds
|
||||||
unsigned int gpu_clock, mem_clock;
|
if (gpus_slice[i].supported_functions.gpu_clock) {
|
||||||
|
unsigned int gpu_clock;
|
||||||
|
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_GRAPHICS, &gpu_clock);
|
||||||
|
if (result != NVML_SUCCESS) {
|
||||||
|
Logger::warning(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result));
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.gpu_clock = false;
|
||||||
|
} else gpus_slice[i].gpu_clock_speed = (long long)gpu_clock;
|
||||||
|
}
|
||||||
|
|
||||||
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_GRAPHICS, &gpu_clock);
|
if (gpus_slice[i].supported_functions.mem_clock) {
|
||||||
if (result != NVML_SUCCESS)
|
unsigned int mem_clock;
|
||||||
Logger::debug(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result));
|
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_MEM, &mem_clock);
|
||||||
else gpus_slice[i].gpu_clock_speed = (long long)gpu_clock;
|
if (result != NVML_SUCCESS) {
|
||||||
|
Logger::warning(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result));
|
||||||
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_MEM, &mem_clock);
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_clock = false;
|
||||||
if (result != NVML_SUCCESS)
|
} else gpus_slice[i].mem_clock_speed = (long long)mem_clock;
|
||||||
Logger::debug(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result));
|
}
|
||||||
else gpus_slice[i].mem_clock_speed = (long long)mem_clock;
|
|
||||||
|
|
||||||
// nvTimer.stop_rename_reset("Nv power");
|
// nvTimer.stop_rename_reset("Nv power");
|
||||||
//? Power usage & state
|
//? Power usage & state
|
||||||
unsigned int power;
|
if (gpus_slice[i].supported_functions.pwr_usage) {
|
||||||
result = nvmlDeviceGetPowerUsage(devices[i], &power);
|
unsigned int power;
|
||||||
if (result != NVML_SUCCESS) {
|
result = nvmlDeviceGetPowerUsage(devices[i], &power);
|
||||||
Logger::debug(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result));
|
if (result != NVML_SUCCESS) {
|
||||||
} else {
|
Logger::warning(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result));
|
||||||
gpus_slice[i].pwr_usage = (long long)power;
|
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_usage = false;
|
||||||
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
} else {
|
||||||
|
gpus_slice[i].pwr_usage = (long long)power;
|
||||||
|
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nvmlPstates_t pState;
|
if (gpus_slice[i].supported_functions.pwr_state) {
|
||||||
result = nvmlDeviceGetPowerState(devices[i], &pState);
|
nvmlPstates_t pState;
|
||||||
if (result != NVML_SUCCESS)
|
result = nvmlDeviceGetPowerState(devices[i], &pState);
|
||||||
Logger::debug(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result));
|
if (result != NVML_SUCCESS) {
|
||||||
else gpus_slice[i].pwr_state = static_cast<int>(pState);
|
Logger::warning(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result));
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_state = false;
|
||||||
|
} else gpus_slice[i].pwr_state = static_cast<int>(pState);
|
||||||
|
}
|
||||||
|
|
||||||
// nvTimer.stop_rename_reset("Nv temp");
|
// nvTimer.stop_rename_reset("Nv temp");
|
||||||
//? GPU temperature
|
//? GPU temperature
|
||||||
if (Config::getB("check_temp")) {
|
if (gpus_slice[i].supported_functions.temp_info) {
|
||||||
unsigned int temp;
|
if (Config::getB("check_temp")) {
|
||||||
nvmlReturn_t result = nvmlDeviceGetTemperature(devices[i], NVML_TEMPERATURE_GPU, &temp);
|
unsigned int temp;
|
||||||
if (result != NVML_SUCCESS)
|
nvmlReturn_t result = nvmlDeviceGetTemperature(devices[i], NVML_TEMPERATURE_GPU, &temp);
|
||||||
Logger::debug(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result));
|
if (result != NVML_SUCCESS) {
|
||||||
else gpus_slice[i].temp.push_back((long long)temp);
|
Logger::warning(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result));
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.temp_info = false;
|
||||||
|
} else gpus_slice[i].temp.push_back((long long)temp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// nvTimer.stop_rename_reset("Nv mem");
|
// nvTimer.stop_rename_reset("Nv mem");
|
||||||
//? Memory info
|
//? Memory info
|
||||||
nvmlMemory_t memory;
|
if (gpus_slice[i].supported_functions.mem_total) {
|
||||||
result = nvmlDeviceGetMemoryInfo(devices[i], &memory);
|
nvmlMemory_t memory;
|
||||||
if (result != NVML_SUCCESS) {
|
result = nvmlDeviceGetMemoryInfo(devices[i], &memory);
|
||||||
Logger::debug(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result));
|
if (result != NVML_SUCCESS) {
|
||||||
} else {
|
Logger::warning(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result));
|
||||||
gpus_slice[i].mem_total = memory.total;
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_total = false;
|
||||||
gpus_slice[i].mem_used = memory.used;
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_used = false;
|
||||||
//gpu.mem_free = memory.free;
|
} else {
|
||||||
|
gpus_slice[i].mem_total = memory.total;
|
||||||
|
gpus_slice[i].mem_used = memory.used;
|
||||||
|
//gpu.mem_free = memory.free;
|
||||||
|
|
||||||
auto used_percent = (long long)round((double)memory.used * 100.0 / (double)memory.total);
|
auto used_percent = (long long)round((double)memory.used * 100.0 / (double)memory.total);
|
||||||
gpus_slice[i].mem_used_percent.push_back(used_percent);
|
gpus_slice[i].mem_used_percent.push_back(used_percent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//nvTimer.stop_rename_reset("Nv pcie");
|
//nvTimer.stop_rename_reset("Nv pcie");
|
||||||
//? PCIe link speeds
|
//? PCIe link speeds
|
||||||
unsigned int tx,rx;
|
if (gpus_slice[i].supported_functions.pcie_txrx) {
|
||||||
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
|
unsigned int tx,rx;
|
||||||
if (result != NVML_SUCCESS)
|
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
|
||||||
Logger::error(std::string("NVML: Failed to get PCIe TX throughput: ") + nvmlErrorString(result));
|
if (result != NVML_SUCCESS) {
|
||||||
else gpus_slice[i].pcie_tx = (long long)tx;
|
Logger::warning(std::string("NVML: Failed to get PCIe TX throughput: ") + nvmlErrorString(result));
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.pcie_txrx = false;
|
||||||
|
} else gpus_slice[i].pcie_tx = (long long)tx;
|
||||||
|
|
||||||
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_RX_BYTES, &rx);
|
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_RX_BYTES, &rx);
|
||||||
if (result != NVML_SUCCESS)
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get PCIe RX throughput: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get PCIe RX throughput: ") + nvmlErrorString(result));
|
||||||
else gpus_slice[i].pcie_rx = (long long)rx;
|
if constexpr(is_init) gpus_slice[i].supported_functions.pcie_txrx = false;
|
||||||
|
} else gpus_slice[i].pcie_rx = (long long)rx;
|
||||||
|
}
|
||||||
|
|
||||||
//? TODO: Processes using GPU
|
//? TODO: Processes using GPU
|
||||||
/*unsigned int proc_info_len;
|
/*unsigned int proc_info_len;
|
||||||
nvmlProcessInfo_t* proc_info = 0;
|
nvmlProcessInfo_t* proc_info = 0;
|
||||||
result = nvmlDeviceGetComputeRunningProcesses_v3(device, &proc_info_len, proc_info);
|
result = nvmlDeviceGetComputeRunningProcesses_v3(device, &proc_info_len, proc_info);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
Logger::error(std::string("NVML: Failed to get compute processes: ") + nvmlErrorString(result));
|
Logger::warning(std::string("NVML: Failed to get compute processes: ") + nvmlErrorString(result));
|
||||||
} else {
|
} else {
|
||||||
for (unsigned int i = 0; i < proc_info_len; ++i)
|
for (unsigned int i = 0; i < proc_info_len; ++i)
|
||||||
gpus_slice[i].graphics_processes.push_back({proc_info[i].pid, proc_info[i].usedGpuMemory});
|
gpus_slice[i].graphics_processes.push_back({proc_info[i].pid, proc_info[i].usedGpuMemory});
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1044,8 +1079,8 @@ namespace Gpu {
|
||||||
//? AMD
|
//? AMD
|
||||||
namespace Rsmi {
|
namespace Rsmi {
|
||||||
bool init() {
|
bool init() {
|
||||||
if (initialized) return false;
|
|
||||||
#if defined(GPU_AMD)
|
#if defined(GPU_AMD)
|
||||||
|
if (initialized) return false;
|
||||||
rsmi_status_t result;
|
rsmi_status_t result;
|
||||||
|
|
||||||
result = rsmi_init(0);
|
result = rsmi_init(0);
|
||||||
|
@ -1057,7 +1092,7 @@ namespace Gpu {
|
||||||
//? Device count
|
//? Device count
|
||||||
result = rsmi_num_monitor_devices(&device_count);
|
result = rsmi_num_monitor_devices(&device_count);
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::debug("ROCm SMI: Failed to fetch number of devices");
|
Logger::warning("ROCm SMI: Failed to fetch number of devices");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1072,25 +1107,28 @@ namespace Gpu {
|
||||||
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
|
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
|
||||||
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::debug("ROCm SMI: Failed to get device name");
|
Logger::warning("ROCm SMI: Failed to get device name");
|
||||||
else gpu_names[offset] = string(name);
|
else gpu_names[offset] = string(name);
|
||||||
|
|
||||||
//? Power usage
|
//? Power usage
|
||||||
uint64_t max_power;
|
uint64_t max_power;
|
||||||
result = rsmi_dev_power_cap_get(i, 0, &max_power);
|
result = rsmi_dev_power_cap_get(i, 0, &max_power);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::debug("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
|
Logger::warning("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
|
||||||
else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
|
else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
|
||||||
|
|
||||||
//? Get temp_max
|
//? Get temp_max
|
||||||
int64_t temp_max;
|
int64_t temp_max;
|
||||||
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
|
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
if (result != RSMI_STATUS_SUCCESS)
|
||||||
Logger::debug("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110");
|
Logger::warning("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110");
|
||||||
else gpus[offset].temp_max = (long long)temp_max;
|
else gpus[offset].temp_max = (long long)temp_max;
|
||||||
}
|
}
|
||||||
initialized = true;
|
initialized = true;
|
||||||
|
|
||||||
|
//? Check supported functions
|
||||||
|
Rsmi::collect<1>(gpus.data() + Nvml::device_count);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {initialized = true; shutdown(); return false;}
|
} else {initialized = true; shutdown(); return false;}
|
||||||
#else
|
#else
|
||||||
|
@ -1099,8 +1137,8 @@ namespace Gpu {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool shutdown() {
|
bool shutdown() {
|
||||||
if (!initialized) return false;
|
|
||||||
#if defined(GPU_AMD)
|
#if defined(GPU_AMD)
|
||||||
|
if (!initialized) return false;
|
||||||
if (rsmi_shut_down() == RSMI_STATUS_SUCCESS)
|
if (rsmi_shut_down() == RSMI_STATUS_SUCCESS)
|
||||||
initialized = false;
|
initialized = false;
|
||||||
else Logger::warning("Failed to shutdown ROCm SMI");
|
else Logger::warning("Failed to shutdown ROCm SMI");
|
||||||
|
@ -1111,84 +1149,110 @@ namespace Gpu {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool is_init>
|
||||||
bool collect(gpu_info* gpus_slice) { // raw pointer to vector data, size == device_count, offset by Nvml::device_count elements
|
bool collect(gpu_info* gpus_slice) { // raw pointer to vector data, size == device_count, offset by Nvml::device_count elements
|
||||||
if (!initialized) return false;
|
|
||||||
#if defined(GPU_AMD)
|
#if defined(GPU_AMD)
|
||||||
|
if (!initialized) return false;
|
||||||
rsmi_status_t result;
|
rsmi_status_t result;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < device_count; ++i) {
|
for (uint32_t i = 0; i < device_count; ++i) {
|
||||||
//? GPU utilization
|
//? GPU utilization
|
||||||
uint32_t utilization;
|
if (gpus_slice[i].supported_functions.gpu_utilization) {
|
||||||
result = rsmi_dev_busy_percent_get(i, &utilization);
|
uint32_t utilization;
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
result = rsmi_dev_busy_percent_get(i, &utilization);
|
||||||
Logger::debug("ROCm SMI: Failed to get GPU utilization");
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
else
|
Logger::warning("ROCm SMI: Failed to get GPU utilization");
|
||||||
gpus_slice[i].gpu_percent.push_back((long long)utilization);
|
if constexpr(is_init) gpus_slice[i].supported_functions.gpu_utilization = false;
|
||||||
|
} else gpus_slice[i].gpu_percent.push_back((long long)utilization);
|
||||||
|
}
|
||||||
|
|
||||||
//? Memory utilization
|
//? Memory utilization
|
||||||
result = rsmi_dev_memory_busy_percent_get(i, &utilization);
|
if (gpus_slice[i].supported_functions.mem_utilization) {
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
uint32_t utilization;
|
||||||
Logger::debug("ROCm SMI: Failed to get VRAM utilization");
|
result = rsmi_dev_memory_busy_percent_get(i, &utilization);
|
||||||
else
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization);
|
Logger::warning("ROCm SMI: Failed to get VRAM utilization");
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_utilization = false;
|
||||||
|
} else gpus_slice[i].mem_utilization_percent.push_back((long long)utilization);
|
||||||
|
}
|
||||||
|
|
||||||
//? Clock speeds
|
//? Clock speeds
|
||||||
rsmi_frequencies_t frequencies;
|
if (gpus_slice[i].supported_functions.gpu_clock) {
|
||||||
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &frequencies);
|
rsmi_frequencies_t frequencies;
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &frequencies);
|
||||||
Logger::debug("ROCm SMI: Failed to get GPU clock speed: ");
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
else gpus_slice[i].gpu_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
Logger::warning("ROCm SMI: Failed to get GPU clock speed: ");
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.gpu_clock = false;
|
||||||
|
} else gpus_slice[i].gpu_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
||||||
|
}
|
||||||
|
|
||||||
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &frequencies);
|
if (gpus_slice[i].supported_functions.mem_clock) {
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
rsmi_frequencies_t frequencies;
|
||||||
Logger::debug("ROCm SMI: Failed to get VRAM clock speed: ");
|
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &frequencies);
|
||||||
else gpus_slice[i].mem_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
|
Logger::warning("ROCm SMI: Failed to get VRAM clock speed: ");
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_clock = false;
|
||||||
|
} else gpus_slice[i].mem_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
|
||||||
|
}
|
||||||
|
|
||||||
//? Power usage & state
|
//? Power usage & state
|
||||||
uint64_t power;
|
if (gpus_slice[i].supported_functions.pwr_usage) {
|
||||||
result = rsmi_dev_power_ave_get(i, 0, &power);
|
uint64_t power;
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
result = rsmi_dev_power_ave_get(i, 0, &power);
|
||||||
Logger::debug("ROCm SMI: Failed to get GPU power usage");
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
else
|
Logger::warning("ROCm SMI: Failed to get GPU power usage");
|
||||||
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_usage = false;
|
||||||
gpus_slice[i].pwr_state = 32; // NVML_PSTATE_UNKNOWN; won't display in GUI
|
} else gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
|
||||||
|
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_state = false;
|
||||||
|
}
|
||||||
|
|
||||||
//? GPU temperature
|
//? GPU temperature
|
||||||
if (Config::getB("check_temp")) {
|
if (gpus_slice[i].supported_functions.temp_info) {
|
||||||
int64_t temp;
|
if (Config::getB("check_temp") or is_init) {
|
||||||
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temp);
|
int64_t temp;
|
||||||
if (result != RSMI_STATUS_SUCCESS)
|
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temp);
|
||||||
Logger::debug("ROCm SMI: Failed to get GPU temperature");
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
else gpus_slice[i].temp.push_back((long long)temp/1000);
|
Logger::warning("ROCm SMI: Failed to get GPU temperature");
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.temp_info = false;
|
||||||
|
} else gpus_slice[i].temp.push_back((long long)temp/1000);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//? Memory info
|
//? Memory info
|
||||||
uint64_t total, used;
|
if (gpus_slice[i].supported_functions.mem_total) {
|
||||||
result = rsmi_dev_memory_total_get(i, RSMI_MEM_TYPE_VRAM, &total);
|
uint64_t total;
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
result = rsmi_dev_memory_total_get(i, RSMI_MEM_TYPE_VRAM, &total);
|
||||||
Logger::debug("ROCm SMI: Failed to get total VRAM");
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
} else {
|
Logger::warning("ROCm SMI: Failed to get total VRAM");
|
||||||
gpus_slice[i].mem_total = total;
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_total = false;
|
||||||
|
} else gpus_slice[i].mem_total = total;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gpus_slice[i].supported_functions.mem_used) {
|
||||||
|
uint64_t used;
|
||||||
result = rsmi_dev_memory_usage_get(i, RSMI_MEM_TYPE_VRAM, &used);
|
result = rsmi_dev_memory_usage_get(i, RSMI_MEM_TYPE_VRAM, &used);
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
Logger::debug("ROCm SMI: Failed to get VRAM usage");
|
Logger::warning("ROCm SMI: Failed to get VRAM usage");
|
||||||
|
if constexpr(is_init) gpus_slice[i].supported_functions.mem_used = false;
|
||||||
} else {
|
} else {
|
||||||
gpus_slice[i].mem_used = used;
|
gpus_slice[i].mem_used = used;
|
||||||
|
if (gpus_slice[i].supported_functions.mem_total)
|
||||||
auto used_percent = (long long)round((double)used * 100.0 / (double)total);
|
gpus_slice[i].mem_used_percent.push_back((long long)round((double)used * 100.0 / (double)gpus_slice[i].mem_total));
|
||||||
gpus_slice[i].mem_used_percent.push_back(used_percent);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//? PCIe link speeds
|
//? PCIe link speeds
|
||||||
uint64_t tx, rx;
|
if (gpus_slice[i].supported_functions.pcie_txrx) {
|
||||||
result = rsmi_dev_pci_throughput_get(i, &tx, &rx, 0);
|
uint64_t tx, rx;
|
||||||
if (result != RSMI_STATUS_SUCCESS) {
|
result = rsmi_dev_pci_throughput_get(i, &tx, &rx, 0);
|
||||||
Logger::debug("ROCm SMI: Failed to get PCIe throughput");
|
if (result != RSMI_STATUS_SUCCESS) {
|
||||||
} else {
|
Logger::warning("ROCm SMI: Failed to get PCIe throughput");
|
||||||
gpus_slice[i].pcie_tx = (long long)tx;
|
if constexpr(is_init) gpus_slice[i].supported_functions.pcie_txrx = false;
|
||||||
gpus_slice[i].pcie_rx = (long long)rx;
|
} else {
|
||||||
|
gpus_slice[i].pcie_tx = (long long)tx;
|
||||||
|
gpus_slice[i].pcie_rx = (long long)rx;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1209,13 +1273,14 @@ namespace Gpu {
|
||||||
// DebugTimer gpu_timer("GPU Total");
|
// DebugTimer gpu_timer("GPU Total");
|
||||||
|
|
||||||
//* Collect data
|
//* Collect data
|
||||||
Nvml::collect(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
|
Nvml::collect<0>(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
|
||||||
Rsmi::collect(gpus.data() + Nvml::device_count); // size = Rsmi::device_count
|
Rsmi::collect<0>(gpus.data() + Nvml::device_count); // size = Rsmi::device_count
|
||||||
|
|
||||||
//* Calculate average usage
|
//* Calculate average usage
|
||||||
long long avg = 0;
|
long long avg = 0;
|
||||||
for (auto& gpu : gpus) {
|
for (auto& gpu : gpus) {
|
||||||
avg += gpu.gpu_percent.back();
|
if (gpu.supported_functions.gpu_utilization)
|
||||||
|
avg += gpu.gpu_percent.back();
|
||||||
|
|
||||||
//* Trim vectors if there are more values than needed for graphs
|
//* Trim vectors if there are more values than needed for graphs
|
||||||
if (width != 0) {
|
if (width != 0) {
|
||||||
|
|
Loading…
Reference in a new issue