123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- #include "ThreadGuardian.h"
- #include "../MppDecoder/MppManager.h"
- #include "../AIManager/RKNNManager.h"
- #include "../LogRecorder/LogOutput.h"
- #include <iostream>
- ThreadGuardian::ThreadGuardian() : m_running(false) {
- LOG_INFO("ThreadGuardian 初始化");
- }
- ThreadGuardian::~ThreadGuardian() {
- stopMonitoring();
- LOG_INFO("ThreadGuardian 销毁");
- }
- void ThreadGuardian::startMonitoring() {
- if (!m_running.exchange(true)) {
- LOG_INFO("启动线程监控");
- m_monitorThread = std::thread(&ThreadGuardian::monitorThreads, this);
- }
- }
- void ThreadGuardian::stopMonitoring() {
- if (m_running.exchange(false)) {
- LOG_INFO("停止线程监控");
- if (m_monitorThread.joinable()) {
- m_monitorThread.join();
- }
- }
- }
- void ThreadGuardian::registerMppThread(int threadIndex, std::function<void(int)> restartFunc) {
- std::lock_guard<std::mutex> lock(m_threadMapMutex);
-
- ThreadInfo info;
- info.threadIndex = threadIndex;
- info.name = "mppThread_" + std::to_string(threadIndex);
- info.isRunning = true;
- info.lastAliveTime = std::chrono::steady_clock::now();
- info.restartFunc = restartFunc;
-
- m_threads[info.name] = info;
- LOG_INFO("注册MPP线程: {}", info.name);
- }
- void ThreadGuardian::registerAIThread(int threadIndex, std::function<void(int)> restartFunc) {
- std::lock_guard<std::mutex> lock(m_threadMapMutex);
-
- ThreadInfo info;
- info.threadIndex = threadIndex;
- info.name = "rknnThread_" + std::to_string(threadIndex);
- info.isRunning = true;
- info.lastAliveTime = std::chrono::steady_clock::now();
- info.restartFunc = restartFunc;
-
- m_threads[info.name] = info;
- LOG_INFO("注册AI线程: {}", info.name);
- }
- void ThreadGuardian::updateThreadHeartbeat(std::string threadName) {
- std::lock_guard<std::mutex> lock(m_threadMapMutex);
-
- auto it = m_threads.find(threadName);
- if (it != m_threads.end()) {
- it->second.lastAliveTime = std::chrono::steady_clock::now();
- it->second.isRunning = true;
- }
- }
- void ThreadGuardian::unregisterThread(std::string threadName) {
- std::lock_guard<std::mutex> lock(m_threadMapMutex);
-
- auto it = m_threads.find(threadName);
- if (it != m_threads.end()) {
- LOG_INFO("注销线程: {}", it->second.name);
- m_threads.erase(it);
- }
- }
- void ThreadGuardian::monitorThreads() {
- LOG_INFO("线程监控已启动");
-
- while (m_running) {
- // 睡眠一段时间后检查线程状态
- std::this_thread::sleep_for(m_checkInterval);
-
- std::lock_guard<std::mutex> lock(m_threadMapMutex);
- auto now = std::chrono::steady_clock::now();
-
- for (auto& pair : m_threads) {
- auto& info = pair.second;
- auto timeSinceLastHeartbeat = std::chrono::duration_cast<std::chrono::seconds>(now - info.lastAliveTime);
-
- // 如果线程心跳超时,认为线程异常
- if (timeSinceLastHeartbeat > m_threadTimeout) {
- LOG_WARN("检测到线程 {} 可能已退出,尝试恢复", info.name);
- info.isRunning = false;
-
- // 调用重启函数
- restartThread(info);
-
- // 更新心跳时间,避免重复恢复
- info.lastAliveTime = now;
- }
- }
- }
-
- LOG_INFO("线程监控已停止");
- }
- void ThreadGuardian::restartThread(const ThreadInfo& threadInfo) {
- try {
- LOG_INFO("正在恢复线程: {}", threadInfo.name);
-
- // 执行重启函数
- if (threadInfo.restartFunc) {
- threadInfo.restartFunc(threadInfo.threadIndex);
- LOG_INFO("线程 {} 恢复成功", threadInfo.name);
- } else {
- LOG_ERROR("线程 {} 没有设置重启函数", threadInfo.name);
- }
- } catch (const std::exception& e) {
- LOG_ERROR("恢复线程 {} 失败: {}", threadInfo.name, e.what());
- } catch (...) {
- LOG_ERROR("恢复线程 {} 时发生未知异常", threadInfo.name);
- }
- }
|