From 1fee2bc08b9215ce728f16be2ee8a3068ae45618 Mon Sep 17 00:00:00 2001 From: aristocratos Date: Sun, 21 May 2023 13:52:19 +0200 Subject: [PATCH] Add DebugTimer class and change some Logger::error calls to Logger::debug --- src/btop_tools.cpp | 70 ++++++++++++++++++++++++++++++++++- src/btop_tools.hpp | 60 +++++++++++++++++++++++++++--- src/linux/btop_collect.cpp | 75 ++++++++++++++++++++++---------------- 3 files changed, 167 insertions(+), 38 deletions(-) diff --git a/src/btop_tools.cpp b/src/btop_tools.cpp index f3fc37d..0d740de 100644 --- a/src/btop_tools.cpp +++ b/src/btop_tools.cpp @@ -541,6 +541,74 @@ namespace Tools { return (user != NULL ? user : ""); } + DebugTimer::DebugTimer(const string name, bool start, bool delayed_report) : name(name), delayed_report(delayed_report) { + if (start) + this->start(); + } + + DebugTimer::~DebugTimer() { + if (running) + this->stop(true); + this->force_report(); + } + + void DebugTimer::start() { + if (running) return; + running = true; + start_time = time_micros(); + } + + void DebugTimer::stop(bool report) { + if (not running) return; + running = false; + elapsed_time = time_micros() - start_time; + if (report) this->report(); + } + + void DebugTimer::reset(bool restart) { + running = false; + start_time = 0; + elapsed_time = 0; + if (restart) this->start(); + } + + void DebugTimer::stop_rename_reset(const string &new_name, bool report, bool restart) { + this->stop(report); + name = new_name; + this->reset(restart); + } + + void DebugTimer::report() { + string report_line; + if (start_time == 0 and elapsed_time == 0) + report_line = format("DebugTimer::report() warning -> Timer [{}] has not been started!", name); + else if (running) + report_line = format(custom_locale, "Timer [{}] (running) currently at {:L} μs", name, time_micros() - start_time); + else + report_line = format(custom_locale, "Timer [{}] took {:L} μs", name, elapsed_time); + + if (delayed_report) + report_buffer.emplace_back(report_line); + else + Logger::log_write(log_level, report_line); + } + + void DebugTimer::force_report() { + if (report_buffer.empty()) return; + for (const auto& line : report_buffer) + Logger::log_write(log_level, line); + report_buffer.clear(); + } + + uint64_t DebugTimer::elapsed() { + if (running) + return time_micros() - start_time; + return elapsed_time; + } + + bool DebugTimer::is_running() { + return running; + } } namespace Logger { @@ -572,7 +640,7 @@ namespace Logger { loglevel = v_index(log_levels, level); } - void log_write(const size_t level, const string& msg) { + void log_write(const Level level, const string& msg) { if (loglevel < level or logfile.empty()) return; atomic_lock lck(busy, true); lose_priv neutered{}; diff --git a/src/btop_tools.hpp b/src/btop_tools.hpp index 4ae27f8..7e499f6 100644 --- a/src/btop_tools.hpp +++ b/src/btop_tools.hpp @@ -152,6 +152,12 @@ namespace Term { namespace Tools { constexpr auto SSmax = std::numeric_limits::max(); + class MyNumPunct : public std::numpunct { + protected: + virtual char do_thousands_sep() const { return '\''; } + virtual std::string do_grouping() const { return "\03"; } + }; + size_t wide_ulen(const string& str); size_t wide_ulen(const std::wstring& w_str); @@ -335,7 +341,6 @@ namespace Tools { //* Convert a celsius value to celsius, fahrenheit, kelvin or rankin and return tuple with new value and unit. auto celsius_to(const long long& celsius, const string& scale) -> tuple; - } //* Simple logging implementation @@ -349,13 +354,56 @@ namespace Logger { }; extern std::filesystem::path logfile; + enum Level : size_t { + DISABLED = 0, + ERROR = 1, + WARNING = 2, + INFO = 3, + DEBUG = 4, + }; + //* Set log level, valid arguments: "DISABLED", "ERROR", "WARNING", "INFO" and "DEBUG" void set(const string& level); - void log_write(const size_t level, const string& msg); - inline void error(const string msg) { log_write(1, msg); } - inline void warning(const string msg) { log_write(2, msg); } - inline void info(const string msg) { log_write(3, msg); } - inline void debug(const string msg) { log_write(4, msg); } + void log_write(const Level level, const string& msg); + inline void error(const string msg) { log_write(ERROR, msg); } + inline void warning(const string msg) { log_write(WARNING, msg); } + inline void info(const string msg) { log_write(INFO, msg); } + inline void debug(const string msg) { log_write(DEBUG, msg); } } +namespace Tools { + //* Creates a named timer that is started on construct (by default) and reports elapsed time in microseconds to Logger::debug() on destruct if running + //* Unless delayed_report is set to false, all reporting is buffered and delayed until DebugTimer is destructed or .force_report() is called + //* Usage example: Tools::DebugTimer timer(name:"myTimer", [start:true], [delayed_report:true]) // Create timer and start + //* timer.stop(); // Stop timer and report elapsed time + //* timer.stop_rename_reset("myTimer2"); // Stop timer, report elapsed time, rename timer, reset and restart + class DebugTimer { + uint64_t start_time{}; + uint64_t elapsed_time{}; + bool running{}; + std::locale custom_locale = std::locale(std::locale::classic(), new Tools::MyNumPunct); + vector report_buffer{}; + public: + string name{}; + bool delayed_report{}; + Logger::Level log_level = Logger::DEBUG; + DebugTimer() = default; + DebugTimer(const string name, bool start = true, bool delayed_report = true); + ~DebugTimer(); + + void start(); + void stop(bool report = true); + void reset(bool restart = true); + //* Stops and reports (default), renames timer then resets and restarts (default) + void stop_rename_reset(const string& new_name, bool report = true, bool restart = true); + void report(); + void force_report(); + uint64_t elapsed(); + bool is_running(); + }; + +} + + + diff --git a/src/linux/btop_collect.cpp b/src/linux/btop_collect.cpp index 935f970..2cc2ac0 100644 --- a/src/linux/btop_collect.cpp +++ b/src/linux/btop_collect.cpp @@ -861,14 +861,14 @@ namespace Gpu { #if defined(GPU_NVIDIA) nvmlReturn_t result = nvmlInit(); if (result != NVML_SUCCESS) { - Logger::warning(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result)); + Logger::debug(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result)); return false; } //? Device count result = nvmlDeviceGetCount(&device_count); if (result != NVML_SUCCESS) { - Logger::error(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result)); return false; } @@ -881,7 +881,7 @@ namespace Gpu { //? Device Handle result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t)); if (result != NVML_SUCCESS) { - Logger::error(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result)); continue; } @@ -889,7 +889,7 @@ namespace Gpu { char name[NVML_DEVICE_NAME_BUFFER_SIZE]; result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result)); else { gpu_names[i] = string(name); for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) { @@ -902,14 +902,14 @@ namespace Gpu { unsigned int max_power; result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result)); else gpus[i].pwr_max_usage = max_power; //? Get temp_max unsigned int temp_max; result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result)); else gpus[i].temp_max = (long long)temp_max; } initialized = true; @@ -939,35 +939,39 @@ namespace Gpu { if (!initialized) return false; #if defined(GPU_NVIDIA) nvmlReturn_t result; - + // DebugTimer gpu_nvidia("Nvidia Total"); for (unsigned int i = 0; i < device_count; ++i) { //? GPU & memory utilization + // DebugTimer nvTimer("Nv utilization"); nvmlUtilization_t utilization; result = nvmlDeviceGetUtilizationRates(devices[i], &utilization); if (result != NVML_SUCCESS) { - Logger::error(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result)); } else { gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu); gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory); } + // nvTimer.stop_rename_reset("Nv clock"); //? Clock speeds unsigned int gpu_clock, mem_clock; + result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_GRAPHICS, &gpu_clock); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result)); else gpus_slice[i].gpu_clock_speed = (long long)gpu_clock; result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_MEM, &mem_clock); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result)); else gpus_slice[i].mem_clock_speed = (long long)mem_clock; + // nvTimer.stop_rename_reset("Nv power"); //? Power usage & state unsigned int power; result = nvmlDeviceGetPowerUsage(devices[i], &power); if (result != NVML_SUCCESS) { - Logger::error(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result)); } else { gpus_slice[i].pwr_usage = (long long)power; gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll)); @@ -976,23 +980,25 @@ namespace Gpu { nvmlPstates_t pState; result = nvmlDeviceGetPowerState(devices[i], &pState); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result)); else gpus_slice[i].pwr_state = static_cast(pState); + // nvTimer.stop_rename_reset("Nv temp"); //? GPU temperature if (Config::getB("check_temp")) { unsigned int temp; nvmlReturn_t result = nvmlDeviceGetTemperature(devices[i], NVML_TEMPERATURE_GPU, &temp); if (result != NVML_SUCCESS) - Logger::error(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result)); else gpus_slice[i].temp.push_back((long long)temp); } + // nvTimer.stop_rename_reset("Nv mem"); //? Memory info nvmlMemory_t memory; result = nvmlDeviceGetMemoryInfo(devices[i], &memory); if (result != NVML_SUCCESS) { - Logger::error(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result)); + Logger::debug(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result)); } else { gpus_slice[i].mem_total = memory.total; gpus_slice[i].mem_used = memory.used; @@ -1002,6 +1008,7 @@ namespace Gpu { gpus_slice[i].mem_used_percent.push_back(used_percent); } + //nvTimer.stop_rename_reset("Nv pcie"); //? PCIe link speeds unsigned int tx,rx; result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx); @@ -1014,7 +1021,7 @@ namespace Gpu { Logger::error(std::string("NVML: Failed to get PCIe RX throughput: ") + nvmlErrorString(result)); else gpus_slice[i].pcie_rx = (long long)rx; - //? TODO: Processes using GPU + //? TODO: Processes using GPU /*unsigned int proc_info_len; nvmlProcessInfo_t* proc_info = 0; result = nvmlDeviceGetComputeRunningProcesses_v3(device, &proc_info_len, proc_info); @@ -1042,13 +1049,17 @@ namespace Gpu { rsmi_status_t result; result = rsmi_init(0); - if (result != RSMI_STATUS_SUCCESS) - Logger::error("Failed to initialize ROCm SMI, AMD GPUs will not be detected"); + if (result != RSMI_STATUS_SUCCESS) { + Logger::debug("Failed to initialize ROCm SMI, AMD GPUs will not be detected"); + return false; + } //? Device count result = rsmi_num_monitor_devices(&device_count); - if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to fetch number of devices"); + if (result != RSMI_STATUS_SUCCESS) { + Logger::debug("ROCm SMI: Failed to fetch number of devices"); + return false; + } if (device_count > 0) { gpus.resize(gpus.size() + device_count); @@ -1061,21 +1072,21 @@ namespace Gpu { char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get device name"); + Logger::debug("ROCm SMI: Failed to get device name"); else gpu_names[offset] = string(name); //? Power usage uint64_t max_power; result = rsmi_dev_power_cap_get(i, 0, &max_power); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W"); + Logger::debug("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W"); else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts //? Get temp_max int64_t temp_max; result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110"); + Logger::debug("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110"); else gpus[offset].temp_max = (long long)temp_max; } initialized = true; @@ -1110,14 +1121,14 @@ namespace Gpu { uint32_t utilization; result = rsmi_dev_busy_percent_get(i, &utilization); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get GPU utilization"); + Logger::debug("ROCm SMI: Failed to get GPU utilization"); else gpus_slice[i].gpu_percent.push_back((long long)utilization); //? Memory utilization result = rsmi_dev_memory_busy_percent_get(i, &utilization); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get VRAM utilization"); + Logger::debug("ROCm SMI: Failed to get VRAM utilization"); else gpus_slice[i].mem_utilization_percent.push_back((long long)utilization); @@ -1125,19 +1136,19 @@ namespace Gpu { rsmi_frequencies_t frequencies; result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &frequencies); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get GPU clock speed: "); + Logger::debug("ROCm SMI: Failed to get GPU clock speed: "); else gpus_slice[i].gpu_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &frequencies); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get VRAM clock speed: "); + Logger::debug("ROCm SMI: Failed to get VRAM clock speed: "); else gpus_slice[i].mem_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz //? Power usage & state uint64_t power; result = rsmi_dev_power_ave_get(i, 0, &power); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get GPU power usage"); + Logger::debug("ROCm SMI: Failed to get GPU power usage"); else gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll)); gpus_slice[i].pwr_state = 32; // NVML_PSTATE_UNKNOWN; won't display in GUI @@ -1147,7 +1158,7 @@ namespace Gpu { int64_t temp; result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temp); if (result != RSMI_STATUS_SUCCESS) - Logger::error("ROCm SMI: Failed to get GPU temperature"); + Logger::debug("ROCm SMI: Failed to get GPU temperature"); else gpus_slice[i].temp.push_back((long long)temp/1000); } @@ -1155,13 +1166,13 @@ namespace Gpu { uint64_t total, used; result = rsmi_dev_memory_total_get(i, RSMI_MEM_TYPE_VRAM, &total); if (result != RSMI_STATUS_SUCCESS) { - Logger::error("ROCm SMI: Failed to get total VRAM"); + Logger::debug("ROCm SMI: Failed to get total VRAM"); } else { gpus_slice[i].mem_total = total; result = rsmi_dev_memory_usage_get(i, RSMI_MEM_TYPE_VRAM, &used); if (result != RSMI_STATUS_SUCCESS) { - Logger::error("ROCm SMI: Failed to get VRAM usage"); + Logger::debug("ROCm SMI: Failed to get VRAM usage"); } else { gpus_slice[i].mem_used = used; @@ -1174,7 +1185,7 @@ namespace Gpu { uint64_t tx, rx; result = rsmi_dev_pci_throughput_get(i, &tx, &rx, 0); if (result != RSMI_STATUS_SUCCESS) { - Logger::error("ROCm SMI: Failed to get PCIe throughput"); + Logger::debug("ROCm SMI: Failed to get PCIe throughput"); } else { gpus_slice[i].pcie_tx = (long long)tx; gpus_slice[i].pcie_rx = (long long)rx; @@ -1195,6 +1206,8 @@ namespace Gpu { auto collect(bool no_update) -> vector& { if (Runner::stopping or (no_update and not gpus.empty())) return gpus; + // DebugTimer gpu_timer("GPU Total"); + //* Collect data Nvml::collect(gpus.data()); // raw pointer to vector data, size == Nvml::device_count Rsmi::collect(gpus.data() + Nvml::device_count); // size = Rsmi::device_count