Add more GPU graph types to the CPU panel

This commit is contained in:
romner-set 2023-05-30 18:24:50 +02:00
parent 842c761a73
commit 547f17dda3
4 changed files with 219 additions and 157 deletions

View file

@ -557,59 +557,76 @@ namespace Cpu {
const int graph_default_width = x + width - b_width - 3;
auto init_graphs = [&](vector<Draw::Graph>& graphs, const int graph_height, int& graph_width, const string& graph_field, bool invert) {
if (graph_field == "gpu-totals") {
graphs.resize(gpus.size());
gpu_temp_graphs.resize(gpus.size());
gpu_mem_graphs.resize(gpus.size());
gpu_meters.resize(gpus.size());
graph_width = graph_default_width/(int)gpus.size() - (int)gpus.size() + 1 + graph_default_width%gpus.size();
for (unsigned long i = 0;;) {
auto& gpu = gpus[i]; auto& graph = graphs[i];
if (graph_field.rfind("gpu", 0) == 0) {
if (graph_field.find("totals") != string::npos) {
graphs.resize(gpus.size());
gpu_temp_graphs.resize(gpus.size());
gpu_mem_graphs.resize(gpus.size());
gpu_meters.resize(gpus.size());
graph_width = graph_default_width/(int)gpus.size() - (int)gpus.size() + 1 + graph_default_width%gpus.size();
for (unsigned long i = 0;;) {
auto& gpu = gpus[i]; auto& graph = graphs[i];
//? GPU graphs/meters
if (gpu.supported_functions.temp_info)
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpu.temp, graph_symbol, false, false, gpu.temp_max, -23 };
if (gpu.supported_functions.mem_used and gpu.supported_functions.mem_total)
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpu.mem_used_percent, graph_symbol };
if (gpu.supported_functions.gpu_utilization) {
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpu.mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
}
//? GPU graphs/meters
if (gpu.supported_functions.temp_info)
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpu.temp, graph_symbol, false, false, gpu.temp_max, -23 };
if (gpu.supported_functions.mem_used and gpu.supported_functions.mem_total)
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpu.gpu_percent.at("gpu-vram-totals"), graph_symbol };
if (gpu.supported_functions.gpu_utilization) {
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpu.mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
}
bool utilization_support = gpu.supported_functions.gpu_utilization;
if (++i < gpus.size()) {
if (utilization_support)
graph = Draw::Graph{graph_width, graph_height, "cpu", gpu.gpu_percent, graph_symbol, invert, true};
} else {
if (utilization_support)
graph = Draw::Graph{
graph_width + graph_default_width%graph_width - (int)gpus.size() + 1,
graph_height, "cpu", gpu.gpu_percent, graph_symbol, invert, true
};
break;
bool utilization_support = gpu.supported_functions.gpu_utilization;
if (++i < gpus.size()) {
if (utilization_support)
graph = Draw::Graph{graph_width, graph_height, "cpu", gpu.gpu_percent.at(graph_field), graph_symbol, invert, true};
} else {
if (utilization_support)
graph = Draw::Graph{
graph_width + graph_default_width%graph_width - (int)gpus.size() + 1,
graph_height, "cpu", gpu.gpu_percent.at(graph_field), graph_symbol, invert, true
};
break;
}
}
}
} else if (graph_field == "gpu-average") {
graphs.resize(1);
graph_width = graph_default_width;
graphs[0] = Draw::Graph{ graph_width, graph_height, "cpu", Gpu::average_gpu_percent, graph_symbol, invert, true };
gpu_temp_graphs.resize(gpus.size());
gpu_mem_graphs.resize(gpus.size());
gpu_meters.resize(gpus.size());
for (unsigned long i = 0; i < gpus.size(); ++i) {
if (gpus[i].supported_functions.temp_info)
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpus[i].temp, graph_symbol, false, false, gpus[i].temp_max, -23 };
if (gpus[i].supported_functions.mem_used and gpus[i].supported_functions.mem_total)
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpus[i].mem_used_percent, graph_symbol };
if (gpus[i].supported_functions.gpu_utilization) {
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpus[i].mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
} else {
graphs.resize(1);
graph_width = graph_default_width;
graphs[0] = Draw::Graph{ graph_width, graph_height, "cpu", Gpu::shared_gpu_percent.at(graph_field), graph_symbol, invert, true };
gpu_temp_graphs.resize(gpus.size());
gpu_mem_graphs.resize(gpus.size());
gpu_meters.resize(gpus.size());
for (unsigned long i = 0; i < gpus.size(); ++i) {
if (gpus[i].supported_functions.temp_info)
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpus[i].temp, graph_symbol, false, false, gpus[i].temp_max, -23 };
if (gpus[i].supported_functions.mem_used and gpus[i].supported_functions.mem_total)
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpus[i].gpu_percent.at("gpu-vram-totals"), graph_symbol };
if (gpus[i].supported_functions.gpu_utilization) {
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpus[i].mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
}
}
}
} else {
graphs.resize(1);
graph_width = graph_default_width;
graphs[0] = Draw::Graph{ graph_width, graph_height, "cpu", cpu.cpu_percent.at(graph_field), graph_symbol, invert, true };
if ((ulong)Gpu::shown < gpus.size()) {
gpu_temp_graphs.resize(gpus.size());
gpu_mem_graphs.resize(gpus.size());
gpu_meters.resize(gpus.size());
for (unsigned long i = 0; i < gpus.size(); ++i) {
if (gpus[i].supported_functions.temp_info)
gpu_temp_graphs[i] = Draw::Graph{ 5, 1, "temp", gpus[i].temp, graph_symbol, false, false, gpus[i].temp_max, -23 };
if (gpus[i].supported_functions.mem_used and gpus[i].supported_functions.mem_total)
gpu_mem_graphs[i] = Draw::Graph{ 5, 1, "used", gpus[i].gpu_percent.at("gpu-vram-totals"), graph_symbol };
if (gpus[i].supported_functions.gpu_utilization) {
gpu_meter_width = b_width - 12 - (int)floating_humanizer(gpus[i].mem_total, true).size() - (show_temps ? 24 : 12) - (int)to_string(i).size() + (gpus.size() == 1)*2 - (gpus.size() > 9 and i <= 9);
gpu_meters[i] = Draw::Meter{gpu_meter_width, "cpu" };
}
}
}
}
};
@ -688,22 +705,22 @@ namespace Cpu {
//? Cpu/Gpu graphs
out += Fx::ub + Mv::to(y + 1, x + 1);
auto draw_graphs = [&](vector<Draw::Graph>& graphs, const int graph_height, const int graph_width, const string& graph_field) {
if (graph_field == "gpu-totals")
for (unsigned long i = 0;;) {
if (gpus[i].supported_functions.gpu_utilization) {
out += graphs[i](gpus[i].gpu_percent, (data_same or redraw));
if (graph_field.rfind("gpu", 0) == 0)
if (graph_field.find("totals") != string::npos)
for (unsigned long i = 0;;) {
out += graphs[i](gpus[i].gpu_percent.at(graph_field), (data_same or redraw));
if (gpus.size() > 1) {
auto i_str = to_string(i);
out += Mv::l(graph_width-1) + Mv::u(graph_height/2) + (graph_width > 5 ? "GPU " : "") + i_str
+ Mv::d(graph_height/2) + Mv::r(graph_width - 1 - (graph_width > 5)*4 - i_str.size());
}
} else out += Mv::d(graph_height/2) + Mv::r(graph_width/2 - 6) + "UNSUPPORTED" + Mv::r(graph_width/2 - 5);
if (++i < graphs.size())
out += Theme::c("div_line") + (Symbols::v_line + Mv::l(1) + Mv::u(1))*graph_height + Mv::r(1) + Mv::d(1);
else break;
}
else out += graphs[0]((graph_field == "gpu-average" ? Gpu::average_gpu_percent : cpu.cpu_percent.at(graph_field)), (data_same or redraw));
if (++i < graphs.size())
out += Theme::c("div_line") + (Symbols::v_line + Mv::l(1) + Mv::u(1))*graph_height + Mv::r(1) + Mv::d(1);
else break;
}
else out += graphs[0](Gpu::shared_gpu_percent.at(graph_field), (data_same or redraw));
else out += graphs[0](cpu.cpu_percent.at(graph_field), (data_same or redraw));
};
draw_graphs(graphs_upper, graph_up_height, graph_up_width, graph_up_field);
@ -810,13 +827,13 @@ namespace Cpu {
out += Mv::to(b_y + b_height - 1 - gpus.size() + ++shown_panels_count - (Gpu::shown == 0), b_x + 1)
+ Theme::c("main_fg") + Fx::b + "GPU " + (gpus.size() > 1 ? ((gpus.size() > 9 and i <= 9 ? " " : "") + to_string(i) + ' ') : "");
if (gpus[i].supported_functions.gpu_utilization)
out += gpu_meters[i](gpus[i].gpu_percent.back())
+ Theme::g("cpu").at(gpus[i].gpu_percent.back()) + rjust(to_string(gpus[i].gpu_percent.back()), 4) + Theme::c("main_fg") + '%';
out += gpu_meters[i](gpus[i].gpu_percent.at("gpu-totals").back())
+ Theme::g("cpu").at(gpus[i].gpu_percent.at("gpu-totals").back()) + rjust(to_string(gpus[i].gpu_percent.at("gpu-totals").back()), 4) + Theme::c("main_fg") + '%';
else out += Mv::r(gpu_meter_width);
if (gpus[i].supported_functions.mem_used) {
out += ' ' + Theme::c("inactive_fg") + graph_bg * 6 + Mv::l(6) + Theme::g("used").at(gpus[i].mem_used_percent.back())
+ gpu_mem_graphs[i](gpus[i].mem_used_percent, data_same or redraw) + Theme::c("main_fg")
out += ' ' + Theme::c("inactive_fg") + graph_bg * 6 + Mv::l(6) + Theme::g("used").at(gpus[i].gpu_percent.at("gpu-vram-totals").back())
+ gpu_mem_graphs[i](gpus[i].gpu_percent.at("gpu-vram-totals"), data_same or redraw) + Theme::c("main_fg")
+ rjust(floating_humanizer(gpus[i].mem_used, true), 5);
if (gpus[i].supported_functions.mem_total)
out += Theme::c("inactive_fg") + '/' + Theme::c("main_fg") + floating_humanizer(gpus[i].mem_total, true);
@ -889,12 +906,12 @@ namespace Gpu {
out += box[index];
if (gpu.supported_functions.gpu_utilization) {
graph_upper = Draw::Graph{x + width - b_width - 3, graph_up_height, "cpu", gpu.gpu_percent, graph_symbol, false, true}; // TODO cpu -> gpu
graph_upper = Draw::Graph{x + width - b_width - 3, graph_up_height, "cpu", gpu.gpu_percent.at("gpu-totals"), graph_symbol, false, true}; // TODO cpu -> gpu
if (not single_graph) {
graph_lower = Draw::Graph{
x + width - b_width - 3,
graph_low_height, "cpu",
gpu.gpu_percent,
gpu.gpu_percent.at("gpu-totals"),
graph_symbol,
Config::getB("cpu_invert_lower"), true
};
@ -908,7 +925,7 @@ namespace Gpu {
if (gpu.supported_functions.mem_utilization)
mem_util_graph = Draw::Graph{b_width/2 - 1, 2, "free", gpu.mem_utilization_percent, graph_symbol, 0, 0, 100, 4}; // offset so the graph isn't empty at 0-5% utilization
if (gpu.supported_functions.mem_used and gpu.supported_functions.mem_total)
mem_used_graph = Draw::Graph{b_width/2 - 2, 2 + 2*(gpu.supported_functions.mem_utilization), "used", gpu.mem_used_percent, graph_symbol};
mem_used_graph = Draw::Graph{b_width/2 - 2, 2 + 2*(gpu.supported_functions.mem_utilization), "used", gpu.gpu_percent.at("gpu-vram-totals"), graph_symbol};
}
@ -916,12 +933,12 @@ namespace Gpu {
//? Gpu graph, meter & clock speed
if (gpu.supported_functions.gpu_utilization) {
out += Fx::ub + Mv::to(y + 1, x + 1) + graph_upper(gpu.gpu_percent, (data_same or redraw[index]));
out += Fx::ub + Mv::to(y + 1, x + 1) + graph_upper(gpu.gpu_percent.at("gpu-totals"), (data_same or redraw[index]));
if (not single_graph)
out += Mv::to(y + graph_up_height + 1 + mid_line[index], x + 1) + graph_lower(gpu.gpu_percent, (data_same or redraw[index]));
out += Mv::to(y + graph_up_height + 1 + mid_line[index], x + 1) + graph_lower(gpu.gpu_percent.at("gpu-totals"), (data_same or redraw[index]));
out += Mv::to(b_y + 1, b_x + 1) + Theme::c("main_fg") + Fx::b + "GPU " + gpu_meter(gpu.gpu_percent.back())
+ Theme::g("cpu").at(gpu.gpu_percent.back()) + rjust(to_string(gpu.gpu_percent.back()), 4) + Theme::c("main_fg") + '%';
out += Mv::to(b_y + 1, b_x + 1) + Theme::c("main_fg") + Fx::b + "GPU " + gpu_meter(gpu.gpu_percent.at("gpu-totals").back())
+ Theme::g("cpu").at(gpu.gpu_percent.at("gpu-totals").back()) + rjust(to_string(gpu.gpu_percent.at("gpu-totals").back()), 4) + Theme::c("main_fg") + '%';
//? Temperature graph, I assume the device supports utilization if it supports temperature
if (show_temps) {
@ -941,8 +958,8 @@ namespace Gpu {
//? Power usage meter, power state
if (gpu.supported_functions.pwr_usage) {
out += Mv::to(b_y + 2, b_x + 1) + Theme::c("main_fg") + Fx::b + "PWR " + pwr_meter(gpu.pwr_percent.back())
+ Theme::g("cached").at(gpu.pwr_percent.back()) + rjust(to_string(gpu.pwr_usage/1000), 4) + Theme::c("main_fg") + 'W';
out += Mv::to(b_y + 2, b_x + 1) + Theme::c("main_fg") + Fx::b + "PWR " + pwr_meter(gpu.gpu_percent.at("gpu-pwr-totals").back())
+ Theme::g("cached").at(gpu.gpu_percent.at("gpu-pwr-totals").back()) + rjust(to_string(gpu.pwr_usage/1000), 4) + Theme::c("main_fg") + 'W';
if (gpu.supported_functions.pwr_state and gpu.pwr_state != 32) // NVML_PSTATE_UNKNOWN; unsupported or non-nvidia card
out += std::string(" P-state: ") + (gpu.pwr_state > 9 ? "" : " ") + 'P' + Theme::g("cached").at(gpu.pwr_state) + to_string(gpu.pwr_state);
}
@ -960,9 +977,9 @@ namespace Gpu {
+ Symbols::h_line*(b_width/2-8) + Symbols::div_up + Mv::d(offset)+Mv::l(1) + Symbols::div_down + Mv::l(1)+Mv::u(1) + (Symbols::v_line + Mv::l(1)+Mv::u(1))*(offset-1) + Symbols::div_up
+ Symbols::h_line + Theme::c("title") + "Used:" + Theme::c("div_line")
+ Symbols::h_line*(b_width/2+b_width%2-9-used_memory_string.size()) + Theme::c("title") + used_memory_string + Theme::c("div_line") + Symbols::h_line + Symbols::div_right
+ Mv::d(1) + Mv::l(b_width/2-1) + mem_used_graph(gpu.mem_used_percent, (data_same or redraw[index]))
+ Mv::d(1) + Mv::l(b_width/2-1) + mem_used_graph(gpu.gpu_percent.at("gpu-vram-totals"), (data_same or redraw[index]))
+ Mv::l(b_width-3) + Mv::u(1+2*gpu.supported_functions.mem_utilization) + Theme::c("main_fg") + Fx::b + "Total:" + rjust(floating_humanizer(gpu.mem_total), b_width/2-9) + Fx::ub
+ Mv::r(3) + rjust(to_string(gpu.mem_used_percent.back()), 3) + '%';
+ Mv::r(3) + rjust(to_string(gpu.gpu_percent.at("gpu-vram-totals").back()), 3) + '%';
//? Memory utilization
if (gpu.supported_functions.mem_utilization)
@ -1006,6 +1023,7 @@ namespace Gpu {
redraw[index] = false;
return out + Fx::reset;
}
}
namespace Mem {
@ -1918,11 +1936,11 @@ namespace Draw {
const bool show_temp = (Config::getB("check_temp") and got_sensors);
width = round((double)Term::width * width_p / 100);
if (Gpu::shown != 0 and not (Mem::shown or Net::shown or Proc::shown)) {
height = Term::height/2;
height = Term::height - Gpu::min_height*Gpu::shown - gpus_height_offset;
} else {
height = max(8, (int)ceil((double)Term::height * (trim(boxes) == "cpu" ? 100 : height_p/(Gpu::shown+1) + (Gpu::shown != 0)*5) / 100));
}
if (height <= Term::height-2) height += gpus_height_offset;
if (height <= Term::height-gpus_height_offset) height += gpus_height_offset;
x = 1;
y = cpu_bottom ? Term::height - height + 1 : 1;
@ -1975,7 +1993,16 @@ namespace Draw {
redraw[i] = true;
width = Term::width;
height = max(min_height, Cpu::shown ? Cpu::height : (int)ceil((double)Term::height * height_p/Gpu::shown / 100));
if (Cpu::shown)
if (not (Mem::shown or Net::shown or Proc::shown))
height = min_height;
else height = Cpu::height;
else
if (not (Mem::shown or Net::shown or Proc::shown))
height = Term::height/Gpu::shown + (i == 0)*(Term::height%Gpu::shown);
else
height = max(min_height, (int)ceil((double)Term::height * height_p/Gpu::shown / 100));
height += (height+Cpu::height == Term::height-1);
x_vec[i] = 1; y_vec[i] = 1 + i*height + (not Config::getB("cpu_bottom"))*Cpu::shown*Cpu::height;
box[i] = createBox(x_vec[i], y_vec[i], width, height, Theme::c("cpu_box"), true, std::string("gpu") + (char)(shown_panels[i]+'0'), "", (shown_panels[i]+5)%10); // TODO gpu_box

View file

@ -95,7 +95,9 @@ namespace Gpu {
extern vector<char> shown_panels;
extern vector<string> gpu_names;
extern vector<int> gpu_b_height_offsets;
extern deque<long long> average_gpu_percent;
extern long long gpu_pwr_total_max;
extern unordered_flat_map<string, deque<long long>> shared_gpu_percent; // averages, power/vram total
const array mem_names { "used"s, "free"s };
@ -121,10 +123,13 @@ namespace Gpu {
//* Per-device container for GPU info
struct gpu_info {
deque<long long> gpu_percent = {};
unordered_flat_map<string, deque<long long>> gpu_percent = {
{"gpu-totals", {}},
{"gpu-vram-totals", {}},
{"gpu-pwr-totals", {}},
};
unsigned int gpu_clock_speed; // MHz
deque<long long> pwr_percent = {};
long long pwr_usage; // mW
long long pwr_max_usage = 255000;
long long pwr_state;
@ -134,7 +139,6 @@ namespace Gpu {
long long mem_total = 0;
long long mem_used = 0;
deque<long long> mem_used_percent = {0};
deque<long long> mem_utilization_percent = {0}; // TODO: properly handle GPUs that can't report some stats
long long mem_clock_speed = 0; // MHz

View file

@ -103,7 +103,7 @@ namespace Term {
bool mem = boxes.find("mem") != string::npos;
bool net = boxes.find("net") != string::npos;
bool proc = boxes.find("proc") != string::npos;
bool gpu = 0;
int gpu = 0;
if (not Gpu::gpu_names.empty())
for (char i = '0'; i <= '5'; ++i)
gpu += (boxes.find(std::string("gpu") + i) != string::npos);
@ -117,7 +117,7 @@ namespace Term {
int height = (cpu ? Cpu::min_height : 0);
if (proc) height += Proc::min_height;
else height += (mem ? Mem::min_height : 0) + (net ? Net::min_height : 0);
if (gpu != 0) height += Gpu::min_height*gpu;
height += Gpu::min_height*gpu;
return { width, height };
}

View file

@ -99,7 +99,12 @@ namespace Gpu {
vector<gpu_info> gpus;
vector<string> gpu_names;
vector<int> gpu_b_height_offsets;
deque<long long> average_gpu_percent = {};
unordered_flat_map<string, deque<long long>> shared_gpu_percent = {
{"gpu-average", {}},
{"gpu-vram-total", {}},
{"gpu-pwr-total", {}},
};
long long gpu_pwr_total_max;
//? NVIDIA data collection
namespace Nvml {
@ -186,8 +191,10 @@ namespace Shared {
Gpu::Nvml::init();
Gpu::Rsmi::init();
if (not Gpu::gpu_names.empty()) {
Cpu::available_fields.push_back("gpu-totals");
Cpu::available_fields.push_back("gpu-average");
for (auto const& [key, _] : Gpu::gpus[0].gpu_percent)
Cpu::available_fields.push_back(key);
for (auto const& [key, _] : Gpu::shared_gpu_percent)
Cpu::available_fields.push_back(key);
if (Config::strings.at("cpu_graph_lower") == "default")
Config::strings.at("cpu_graph_lower") = "gpu-totals";
@ -890,45 +897,9 @@ namespace Gpu {
gpus.resize(device_count);
gpu_names.resize(device_count);
for (unsigned int i = 0; i < device_count; ++i) {
//? Device Handle
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t));
if (result != NVML_SUCCESS) {
Logger::warning(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
gpus[i].supported_functions = {false, false, false, false, false, false, false, false};
continue;
}
//? Device name
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
if (result != NVML_SUCCESS)
Logger::warning(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
else {
gpu_names[i] = string(name);
for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) {
gpu_names[i] = s_replace(gpu_names[i], brand, "");
}
gpu_names[i] = trim(gpu_names[i]);
}
//? Power usage
unsigned int max_power;
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
if (result != NVML_SUCCESS)
Logger::warning(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
else gpus[i].pwr_max_usage = max_power;
//? Get temp_max
unsigned int temp_max;
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
if (result != NVML_SUCCESS)
Logger::warning(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110°C: ") + nvmlErrorString(result));
else gpus[i].temp_max = (long long)temp_max;
}
initialized = true;
//? Check supported functions
//? Check supported functions & get maximums
Nvml::collect<1>(gpus.data());
return true;
@ -956,9 +927,50 @@ namespace Gpu {
bool collect(gpu_info* gpus_slice) { // raw pointer to vector data, size == device_count
#if defined(GPU_NVIDIA)
if (!initialized) return false;
nvmlReturn_t result;
// DebugTimer gpu_nvidia("Nvidia Total");
for (unsigned int i = 0; i < device_count; ++i) {
if constexpr(is_init) {
//? Device Handle
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i);
if (result != NVML_SUCCESS) {
Logger::warning(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
gpus[i].supported_functions = {false, false, false, false, false, false, false, false};
continue;
}
//? Device name
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
if (result != NVML_SUCCESS)
Logger::warning(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
else {
gpu_names[i] = string(name);
for (const auto& brand : {"NVIDIA", "Nvidia", "(R)", "(TM)"}) {
gpu_names[i] = s_replace(gpu_names[i], brand, "");
}
gpu_names[i] = trim(gpu_names[i]);
}
//? Power usage
unsigned int max_power;
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
if (result != NVML_SUCCESS)
Logger::warning(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
else {
gpus[i].pwr_max_usage = max_power; // RSMI reports power in microWatts
gpu_pwr_total_max += max_power;
}
//? Get temp_max
unsigned int temp_max;
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
if (result != NVML_SUCCESS)
Logger::warning(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110°C: ") + nvmlErrorString(result));
else gpus[i].temp_max = (long long)temp_max;
}
//? GPU & memory utilization
if (gpus_slice[i].supported_functions.gpu_utilization) {
nvmlUtilization_t utilization;
@ -968,7 +980,7 @@ namespace Gpu {
if constexpr(is_init) gpus_slice[i].supported_functions.gpu_utilization = false;
if constexpr(is_init) gpus_slice[i].supported_functions.mem_utilization = false;
} else {
gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu);
gpus_slice[i].gpu_percent.at("gpu-totals").push_back((long long)utilization.gpu);
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory);
}
}
@ -1003,7 +1015,7 @@ namespace Gpu {
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_usage = false;
} else {
gpus_slice[i].pwr_usage = (long long)power;
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
gpus_slice[i].gpu_percent.at("gpu-pwr-totals").push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
}
}
@ -1044,7 +1056,7 @@ namespace Gpu {
//gpu.mem_free = memory.free;
auto used_percent = (long long)round((double)memory.used * 100.0 / (double)memory.total);
gpus_slice[i].mem_used_percent.push_back(used_percent);
gpus_slice[i].gpu_percent.at("gpu-vram-totals").push_back(used_percent);
}
}
@ -1109,33 +1121,9 @@ namespace Gpu {
gpus.resize(gpus.size() + device_count);
gpu_names.resize(gpus.size() + device_count);
for (unsigned int i = 0; i < device_count; ++i) {
auto offset = Nvml::device_count + i;
//? Device name
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
if (result != RSMI_STATUS_SUCCESS)
Logger::warning("ROCm SMI: Failed to get device name");
else gpu_names[offset] = string(name);
//? Power usage
uint64_t max_power;
result = rsmi_dev_power_cap_get(i, 0, &max_power);
if (result != RSMI_STATUS_SUCCESS)
Logger::warning("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
//? Get temp_max
int64_t temp_max;
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
if (result != RSMI_STATUS_SUCCESS)
Logger::warning("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110°C");
else gpus[offset].temp_max = (long long)temp_max;
}
initialized = true;
//? Check supported functions
//? Check supported functions & get maximums
Rsmi::collect<1>(gpus.data() + Nvml::device_count);
return true;
@ -1165,6 +1153,32 @@ namespace Gpu {
rsmi_status_t result;
for (uint32_t i = 0; i < device_count; ++i) {
if constexpr(is_init) {
//? Device name
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
if (result != RSMI_STATUS_SUCCESS)
Logger::warning("ROCm SMI: Failed to get device name");
else gpu_names[Nvml::device_count + i] = string(name);
//? Power usage
uint64_t max_power;
result = rsmi_dev_power_cap_get(i, 0, &max_power);
if (result != RSMI_STATUS_SUCCESS)
Logger::warning("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
else {
gpus_slice[i].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
gpu_pwr_total_max += gpus_slice[i].pwr_max_usage;
}
//? Get temp_max
int64_t temp_max;
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
if (result != RSMI_STATUS_SUCCESS)
Logger::warning("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110°C");
else gpus_slice[i].temp_max = (long long)temp_max;
}
//? GPU utilization
if (gpus_slice[i].supported_functions.gpu_utilization) {
uint32_t utilization;
@ -1172,7 +1186,7 @@ namespace Gpu {
if (result != RSMI_STATUS_SUCCESS) {
Logger::warning("ROCm SMI: Failed to get GPU utilization");
if constexpr(is_init) gpus_slice[i].supported_functions.gpu_utilization = false;
} else gpus_slice[i].gpu_percent.push_back((long long)utilization);
} else gpus_slice[i].gpu_percent.at("gpu-totals").push_back((long long)utilization);
}
//? Memory utilization
@ -1211,7 +1225,7 @@ namespace Gpu {
if (result != RSMI_STATUS_SUCCESS) {
Logger::warning("ROCm SMI: Failed to get GPU power usage");
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_usage = false;
} else gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
} else gpus_slice[i].gpu_percent.at("gpu-pwr-totals").push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
if constexpr(is_init) gpus_slice[i].supported_functions.pwr_state = false;
}
@ -1247,7 +1261,7 @@ namespace Gpu {
} else {
gpus_slice[i].mem_used = used;
if (gpus_slice[i].supported_functions.mem_total)
gpus_slice[i].mem_used_percent.push_back((long long)round((double)used * 100.0 / (double)gpus_slice[i].mem_total));
gpus_slice[i].gpu_percent.at("gpu-vram-totals").push_back((long long)round((double)used * 100.0 / (double)gpus_slice[i].mem_total));
}
}
@ -1287,27 +1301,44 @@ namespace Gpu {
//* Calculate average usage
long long avg = 0;
long long mem_usage_total = 0;
long long mem_total = 0;
long long pwr_total = 0;
for (auto& gpu : gpus) {
if (gpu.supported_functions.gpu_utilization)
avg += gpu.gpu_percent.back();
avg += gpu.gpu_percent.at("gpu-totals").back();
if (gpu.supported_functions.mem_used)
mem_usage_total += gpu.mem_used;
if (gpu.supported_functions.mem_total)
mem_total += gpu.mem_total;
if (gpu.supported_functions.pwr_usage)
mem_total += gpu.pwr_usage;
//* Trim vectors if there are more values than needed for graphs
if (width != 0) {
//? GPU & memory utilization
while (cmp_greater(gpu.gpu_percent.size(), width * 2)) gpu.gpu_percent.pop_front();
while (cmp_greater(gpu.mem_utilization_percent.size(), width)) gpu.mem_utilization_percent.pop_front();
while (cmp_greater(gpu.gpu_percent.at("gpu-totals").size(), width * 2)) gpu.gpu_percent.at("gpu-totals").pop_front();
while (cmp_greater(gpu.mem_utilization_percent.size(), width)) gpu.mem_utilization_percent.pop_front();
//? Power usage
while (cmp_greater(gpu.pwr_percent.size(), width)) gpu.pwr_percent.pop_front();
while (cmp_greater(gpu.gpu_percent.at("gpu-pwr-totals").size(), width)) gpu.gpu_percent.at("gpu-pwr-totals").pop_front();
//? Temperature
while (cmp_greater(gpu.temp.size(), 18)) gpu.temp.pop_front();
//? Memory usage
while (cmp_greater(gpu.mem_used_percent.size(), width/2)) gpu.mem_used_percent.pop_front();
while (cmp_greater(gpu.gpu_percent.at("gpu-vram-totals").size(), width/2)) gpu.gpu_percent.at("gpu-vram-totals").pop_front();
}
}
average_gpu_percent.push_back(avg / gpus.size());
if (width != 0)
while (cmp_greater(average_gpu_percent.size(), width * 2)) average_gpu_percent.pop_front();
shared_gpu_percent.at("gpu-average").push_back(avg / gpus.size());
if (mem_total != 0)
shared_gpu_percent.at("gpu-vram-total").push_back(mem_usage_total / mem_total);
if (gpu_pwr_total_max != 0)
shared_gpu_percent.at("gpu-pwr-total").push_back(pwr_total / gpu_pwr_total_max);
if (width != 0) {
while (cmp_greater(shared_gpu_percent.at("gpu-average").size(), width * 2)) shared_gpu_percent.at("gpu-average").pop_front();
while (cmp_greater(shared_gpu_percent.at("gpu-pwr-total").size(), width * 2)) shared_gpu_percent.at("gpu-pwr-total").pop_front();
while (cmp_greater(shared_gpu_percent.at("gpu-vram-total").size(), width * 2)) shared_gpu_percent.at("gpu-vram-total").pop_front();
}
return gpus;
}