Add DebugTimer class and change some Logger::error calls to Logger::debug

This commit is contained in:
aristocratos 2023-05-21 13:52:19 +02:00
parent 2e68c0b916
commit 1fee2bc08b
3 changed files with 167 additions and 38 deletions

View file

@ -541,6 +541,74 @@ namespace Tools {
return (user != NULL ? user : "");
}
DebugTimer::DebugTimer(const string name, bool start, bool delayed_report) : name(name), delayed_report(delayed_report) {
if (start)
this->start();
}
DebugTimer::~DebugTimer() {
if (running)
this->stop(true);
this->force_report();
}
void DebugTimer::start() {
if (running) return;
running = true;
start_time = time_micros();
}
void DebugTimer::stop(bool report) {
if (not running) return;
running = false;
elapsed_time = time_micros() - start_time;
if (report) this->report();
}
void DebugTimer::reset(bool restart) {
running = false;
start_time = 0;
elapsed_time = 0;
if (restart) this->start();
}
void DebugTimer::stop_rename_reset(const string &new_name, bool report, bool restart) {
this->stop(report);
name = new_name;
this->reset(restart);
}
void DebugTimer::report() {
string report_line;
if (start_time == 0 and elapsed_time == 0)
report_line = format("DebugTimer::report() warning -> Timer [{}] has not been started!", name);
else if (running)
report_line = format(custom_locale, "Timer [{}] (running) currently at {:L} μs", name, time_micros() - start_time);
else
report_line = format(custom_locale, "Timer [{}] took {:L} μs", name, elapsed_time);
if (delayed_report)
report_buffer.emplace_back(report_line);
else
Logger::log_write(log_level, report_line);
}
void DebugTimer::force_report() {
if (report_buffer.empty()) return;
for (const auto& line : report_buffer)
Logger::log_write(log_level, line);
report_buffer.clear();
}
uint64_t DebugTimer::elapsed() {
if (running)
return time_micros() - start_time;
return elapsed_time;
}
bool DebugTimer::is_running() {
return running;
}
}
namespace Logger {
@ -572,7 +640,7 @@ namespace Logger {
loglevel = v_index(log_levels, level);
}
void log_write(const size_t level, const string& msg) {
void log_write(const Level level, const string& msg) {
if (loglevel < level or logfile.empty()) return;
atomic_lock lck(busy, true);
lose_priv neutered{};

View file

@ -152,6 +152,12 @@ namespace Term {
namespace Tools {
constexpr auto SSmax = std::numeric_limits<std::streamsize>::max();
class MyNumPunct : public std::numpunct<char> {
protected:
virtual char do_thousands_sep() const { return '\''; }
virtual std::string do_grouping() const { return "\03"; }
};
size_t wide_ulen(const string& str);
size_t wide_ulen(const std::wstring& w_str);
@ -335,7 +341,6 @@ namespace Tools {
//* Convert a celsius value to celsius, fahrenheit, kelvin or rankin and return tuple with new value and unit.
auto celsius_to(const long long& celsius, const string& scale) -> tuple<long long, string>;
}
//* Simple logging implementation
@ -349,13 +354,56 @@ namespace Logger {
};
extern std::filesystem::path logfile;
enum Level : size_t {
DISABLED = 0,
ERROR = 1,
WARNING = 2,
INFO = 3,
DEBUG = 4,
};
//* Set log level, valid arguments: "DISABLED", "ERROR", "WARNING", "INFO" and "DEBUG"
void set(const string& level);
void log_write(const size_t level, const string& msg);
inline void error(const string msg) { log_write(1, msg); }
inline void warning(const string msg) { log_write(2, msg); }
inline void info(const string msg) { log_write(3, msg); }
inline void debug(const string msg) { log_write(4, msg); }
void log_write(const Level level, const string& msg);
inline void error(const string msg) { log_write(ERROR, msg); }
inline void warning(const string msg) { log_write(WARNING, msg); }
inline void info(const string msg) { log_write(INFO, msg); }
inline void debug(const string msg) { log_write(DEBUG, msg); }
}
namespace Tools {
//* Creates a named timer that is started on construct (by default) and reports elapsed time in microseconds to Logger::debug() on destruct if running
//* Unless delayed_report is set to false, all reporting is buffered and delayed until DebugTimer is destructed or .force_report() is called
//* Usage example: Tools::DebugTimer timer(name:"myTimer", [start:true], [delayed_report:true]) // Create timer and start
//* timer.stop(); // Stop timer and report elapsed time
//* timer.stop_rename_reset("myTimer2"); // Stop timer, report elapsed time, rename timer, reset and restart
class DebugTimer {
uint64_t start_time{};
uint64_t elapsed_time{};
bool running{};
std::locale custom_locale = std::locale(std::locale::classic(), new Tools::MyNumPunct);
vector<string> report_buffer{};
public:
string name{};
bool delayed_report{};
Logger::Level log_level = Logger::DEBUG;
DebugTimer() = default;
DebugTimer(const string name, bool start = true, bool delayed_report = true);
~DebugTimer();
void start();
void stop(bool report = true);
void reset(bool restart = true);
//* Stops and reports (default), renames timer then resets and restarts (default)
void stop_rename_reset(const string& new_name, bool report = true, bool restart = true);
void report();
void force_report();
uint64_t elapsed();
bool is_running();
};
}

View file

@ -861,14 +861,14 @@ namespace Gpu {
#if defined(GPU_NVIDIA)
nvmlReturn_t result = nvmlInit();
if (result != NVML_SUCCESS) {
Logger::warning(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result));
Logger::debug(std::string("Failed to initialize NVML, NVIDIA GPUs will not be detected: ") + nvmlErrorString(result));
return false;
}
//? Device count
result = nvmlDeviceGetCount(&device_count);
if (result != NVML_SUCCESS) {
Logger::error(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get device count: ") + nvmlErrorString(result));
return false;
}
@ -881,7 +881,7 @@ namespace Gpu {
//? Device Handle
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i*sizeof(nvmlDevice_t));
if (result != NVML_SUCCESS) {
Logger::error(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
continue;
}
@ -889,7 +889,7 @@ namespace Gpu {
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
result = nvmlDeviceGetName(devices[i], name, NVML_DEVICE_NAME_BUFFER_SIZE);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get device name: ") + nvmlErrorString(result));
else {
gpu_names[i] = string(name);
for (const auto& brand : {"NVIDIA", "Nvidia", "AMD", "Amd", "Intel", "(R)", "(TM)"}) {
@ -902,14 +902,14 @@ namespace Gpu {
unsigned int max_power;
result = nvmlDeviceGetPowerManagementLimit(devices[i], &max_power);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get maximum GPU power draw, defaulting to 225W: ") + nvmlErrorString(result));
else gpus[i].pwr_max_usage = max_power;
//? Get temp_max
unsigned int temp_max;
result = nvmlDeviceGetTemperatureThreshold(devices[i], NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temp_max);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get maximum GPU temperature, defaulting to 110: ") + nvmlErrorString(result));
else gpus[i].temp_max = (long long)temp_max;
}
initialized = true;
@ -939,35 +939,39 @@ namespace Gpu {
if (!initialized) return false;
#if defined(GPU_NVIDIA)
nvmlReturn_t result;
// DebugTimer gpu_nvidia("Nvidia Total");
for (unsigned int i = 0; i < device_count; ++i) {
//? GPU & memory utilization
// DebugTimer nvTimer("Nv utilization");
nvmlUtilization_t utilization;
result = nvmlDeviceGetUtilizationRates(devices[i], &utilization);
if (result != NVML_SUCCESS) {
Logger::error(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get GPU utilization: ") + nvmlErrorString(result));
} else {
gpus_slice[i].gpu_percent.push_back((long long)utilization.gpu);
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization.memory);
}
// nvTimer.stop_rename_reset("Nv clock");
//? Clock speeds
unsigned int gpu_clock, mem_clock;
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_GRAPHICS, &gpu_clock);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get GPU clock speed: ") + nvmlErrorString(result));
else gpus_slice[i].gpu_clock_speed = (long long)gpu_clock;
result = nvmlDeviceGetClockInfo(devices[i], NVML_CLOCK_MEM, &mem_clock);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get VRAM clock speed: ") + nvmlErrorString(result));
else gpus_slice[i].mem_clock_speed = (long long)mem_clock;
// nvTimer.stop_rename_reset("Nv power");
//? Power usage & state
unsigned int power;
result = nvmlDeviceGetPowerUsage(devices[i], &power);
if (result != NVML_SUCCESS) {
Logger::error(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get GPU power usage: ") + nvmlErrorString(result));
} else {
gpus_slice[i].pwr_usage = (long long)power;
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
@ -976,23 +980,25 @@ namespace Gpu {
nvmlPstates_t pState;
result = nvmlDeviceGetPowerState(devices[i], &pState);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get GPU power state: ") + nvmlErrorString(result));
else gpus_slice[i].pwr_state = static_cast<int>(pState);
// nvTimer.stop_rename_reset("Nv temp");
//? GPU temperature
if (Config::getB("check_temp")) {
unsigned int temp;
nvmlReturn_t result = nvmlDeviceGetTemperature(devices[i], NVML_TEMPERATURE_GPU, &temp);
if (result != NVML_SUCCESS)
Logger::error(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get GPU temperature: ") + nvmlErrorString(result));
else gpus_slice[i].temp.push_back((long long)temp);
}
// nvTimer.stop_rename_reset("Nv mem");
//? Memory info
nvmlMemory_t memory;
result = nvmlDeviceGetMemoryInfo(devices[i], &memory);
if (result != NVML_SUCCESS) {
Logger::error(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result));
Logger::debug(std::string("NVML: Failed to get VRAM info: ") + nvmlErrorString(result));
} else {
gpus_slice[i].mem_total = memory.total;
gpus_slice[i].mem_used = memory.used;
@ -1002,6 +1008,7 @@ namespace Gpu {
gpus_slice[i].mem_used_percent.push_back(used_percent);
}
//nvTimer.stop_rename_reset("Nv pcie");
//? PCIe link speeds
unsigned int tx,rx;
result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
@ -1014,7 +1021,7 @@ namespace Gpu {
Logger::error(std::string("NVML: Failed to get PCIe RX throughput: ") + nvmlErrorString(result));
else gpus_slice[i].pcie_rx = (long long)rx;
//? TODO: Processes using GPU
//? TODO: Processes using GPU
/*unsigned int proc_info_len;
nvmlProcessInfo_t* proc_info = 0;
result = nvmlDeviceGetComputeRunningProcesses_v3(device, &proc_info_len, proc_info);
@ -1042,13 +1049,17 @@ namespace Gpu {
rsmi_status_t result;
result = rsmi_init(0);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("Failed to initialize ROCm SMI, AMD GPUs will not be detected");
if (result != RSMI_STATUS_SUCCESS) {
Logger::debug("Failed to initialize ROCm SMI, AMD GPUs will not be detected");
return false;
}
//? Device count
result = rsmi_num_monitor_devices(&device_count);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to fetch number of devices");
if (result != RSMI_STATUS_SUCCESS) {
Logger::debug("ROCm SMI: Failed to fetch number of devices");
return false;
}
if (device_count > 0) {
gpus.resize(gpus.size() + device_count);
@ -1061,21 +1072,21 @@ namespace Gpu {
char name[NVML_DEVICE_NAME_BUFFER_SIZE]; // ROCm SMI does not provide a constant for this as far as I can tell, this should be good enough
result = rsmi_dev_name_get(i, name, NVML_DEVICE_NAME_BUFFER_SIZE);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get device name");
Logger::debug("ROCm SMI: Failed to get device name");
else gpu_names[offset] = string(name);
//? Power usage
uint64_t max_power;
result = rsmi_dev_power_cap_get(i, 0, &max_power);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
Logger::debug("ROCm SMI: Failed to get maximum GPU power draw, defaulting to 225W");
else gpus[offset].pwr_max_usage = (long long)(max_power/1000); // RSMI reports power in microWatts
//? Get temp_max
int64_t temp_max;
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temp_max);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110");
Logger::debug("ROCm SMI: Failed to get maximum GPU temperature, defaulting to 110");
else gpus[offset].temp_max = (long long)temp_max;
}
initialized = true;
@ -1110,14 +1121,14 @@ namespace Gpu {
uint32_t utilization;
result = rsmi_dev_busy_percent_get(i, &utilization);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get GPU utilization");
Logger::debug("ROCm SMI: Failed to get GPU utilization");
else
gpus_slice[i].gpu_percent.push_back((long long)utilization);
//? Memory utilization
result = rsmi_dev_memory_busy_percent_get(i, &utilization);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get VRAM utilization");
Logger::debug("ROCm SMI: Failed to get VRAM utilization");
else
gpus_slice[i].mem_utilization_percent.push_back((long long)utilization);
@ -1125,19 +1136,19 @@ namespace Gpu {
rsmi_frequencies_t frequencies;
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &frequencies);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get GPU clock speed: ");
Logger::debug("ROCm SMI: Failed to get GPU clock speed: ");
else gpus_slice[i].gpu_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
result = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &frequencies);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get VRAM clock speed: ");
Logger::debug("ROCm SMI: Failed to get VRAM clock speed: ");
else gpus_slice[i].mem_clock_speed = (long long)frequencies.frequency[frequencies.current]/1000000; // Hz to MHz
//? Power usage & state
uint64_t power;
result = rsmi_dev_power_ave_get(i, 0, &power);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get GPU power usage");
Logger::debug("ROCm SMI: Failed to get GPU power usage");
else
gpus_slice[i].pwr_percent.push_back(clamp((long long)round((double)gpus_slice[i].pwr_usage * 100.0 / (double)gpus_slice[i].pwr_max_usage), 0ll, 100ll));
gpus_slice[i].pwr_state = 32; // NVML_PSTATE_UNKNOWN; won't display in GUI
@ -1147,7 +1158,7 @@ namespace Gpu {
int64_t temp;
result = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temp);
if (result != RSMI_STATUS_SUCCESS)
Logger::error("ROCm SMI: Failed to get GPU temperature");
Logger::debug("ROCm SMI: Failed to get GPU temperature");
else gpus_slice[i].temp.push_back((long long)temp/1000);
}
@ -1155,13 +1166,13 @@ namespace Gpu {
uint64_t total, used;
result = rsmi_dev_memory_total_get(i, RSMI_MEM_TYPE_VRAM, &total);
if (result != RSMI_STATUS_SUCCESS) {
Logger::error("ROCm SMI: Failed to get total VRAM");
Logger::debug("ROCm SMI: Failed to get total VRAM");
} else {
gpus_slice[i].mem_total = total;
result = rsmi_dev_memory_usage_get(i, RSMI_MEM_TYPE_VRAM, &used);
if (result != RSMI_STATUS_SUCCESS) {
Logger::error("ROCm SMI: Failed to get VRAM usage");
Logger::debug("ROCm SMI: Failed to get VRAM usage");
} else {
gpus_slice[i].mem_used = used;
@ -1174,7 +1185,7 @@ namespace Gpu {
uint64_t tx, rx;
result = rsmi_dev_pci_throughput_get(i, &tx, &rx, 0);
if (result != RSMI_STATUS_SUCCESS) {
Logger::error("ROCm SMI: Failed to get PCIe throughput");
Logger::debug("ROCm SMI: Failed to get PCIe throughput");
} else {
gpus_slice[i].pcie_tx = (long long)tx;
gpus_slice[i].pcie_rx = (long long)rx;
@ -1195,6 +1206,8 @@ namespace Gpu {
auto collect(bool no_update) -> vector<gpu_info>& {
if (Runner::stopping or (no_update and not gpus.empty())) return gpus;
// DebugTimer gpu_timer("GPU Total");
//* Collect data
Nvml::collect(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
Rsmi::collect(gpus.data() + Nvml::device_count); // size = Rsmi::device_count