ThreadGuardian.cpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #include "ThreadGuardian.h"
  2. #include "../MppDecoder/MppManager.h"
  3. #include "../AIManager/RKNNManager.h"
  4. #include "../LogRecorder/LogOutput.h"
  5. #include <iostream>
  6. ThreadGuardian::ThreadGuardian() : m_running(false) {
  7. LOG_INFO("ThreadGuardian 初始化");
  8. }
  9. ThreadGuardian::~ThreadGuardian() {
  10. stopMonitoring();
  11. LOG_INFO("ThreadGuardian 销毁");
  12. }
  13. void ThreadGuardian::startMonitoring() {
  14. if (!m_running.exchange(true)) {
  15. LOG_INFO("启动线程监控");
  16. m_monitorThread = std::thread(&ThreadGuardian::monitorThreads, this);
  17. }
  18. }
  19. void ThreadGuardian::stopMonitoring() {
  20. if (m_running.exchange(false)) {
  21. LOG_INFO("停止线程监控");
  22. if (m_monitorThread.joinable()) {
  23. m_monitorThread.join();
  24. }
  25. }
  26. }
  27. void ThreadGuardian::registerMppThread(int threadIndex, std::function<void(int)> restartFunc) {
  28. std::lock_guard<std::mutex> lock(m_threadMapMutex);
  29. ThreadInfo info;
  30. info.threadIndex = threadIndex;
  31. info.name = "mppThread_" + std::to_string(threadIndex);
  32. info.isRunning = true;
  33. info.lastAliveTime = std::chrono::steady_clock::now();
  34. info.restartFunc = restartFunc;
  35. m_threads[info.name] = info;
  36. LOG_INFO("注册MPP线程: {}", info.name);
  37. }
  38. void ThreadGuardian::registerAIThread(int threadIndex, std::function<void(int)> restartFunc) {
  39. std::lock_guard<std::mutex> lock(m_threadMapMutex);
  40. ThreadInfo info;
  41. info.threadIndex = threadIndex;
  42. info.name = "rknnThread_" + std::to_string(threadIndex);
  43. info.isRunning = true;
  44. info.lastAliveTime = std::chrono::steady_clock::now();
  45. info.restartFunc = restartFunc;
  46. m_threads[info.name] = info;
  47. LOG_INFO("注册AI线程: {}", info.name);
  48. }
  49. void ThreadGuardian::updateThreadHeartbeat(std::string threadName) {
  50. std::lock_guard<std::mutex> lock(m_threadMapMutex);
  51. auto it = m_threads.find(threadName);
  52. if (it != m_threads.end()) {
  53. it->second.lastAliveTime = std::chrono::steady_clock::now();
  54. it->second.isRunning = true;
  55. }
  56. }
  57. void ThreadGuardian::unregisterThread(std::string threadName) {
  58. std::lock_guard<std::mutex> lock(m_threadMapMutex);
  59. auto it = m_threads.find(threadName);
  60. if (it != m_threads.end()) {
  61. LOG_INFO("注销线程: {}", it->second.name);
  62. m_threads.erase(it);
  63. }
  64. }
  65. void ThreadGuardian::monitorThreads() {
  66. LOG_INFO("线程监控已启动");
  67. while (m_running) {
  68. // 睡眠一段时间后检查线程状态
  69. std::this_thread::sleep_for(m_checkInterval);
  70. std::lock_guard<std::mutex> lock(m_threadMapMutex);
  71. auto now = std::chrono::steady_clock::now();
  72. for (auto& pair : m_threads) {
  73. auto& info = pair.second;
  74. auto timeSinceLastHeartbeat = std::chrono::duration_cast<std::chrono::seconds>(now - info.lastAliveTime);
  75. // 如果线程心跳超时,认为线程异常
  76. if (timeSinceLastHeartbeat > m_threadTimeout) {
  77. LOG_WARN("检测到线程 {} 可能已退出,尝试恢复", info.name);
  78. info.isRunning = false;
  79. // 调用重启函数
  80. restartThread(info);
  81. // 更新心跳时间,避免重复恢复
  82. info.lastAliveTime = now;
  83. }
  84. }
  85. }
  86. LOG_INFO("线程监控已停止");
  87. }
  88. void ThreadGuardian::restartThread(const ThreadInfo& threadInfo) {
  89. try {
  90. LOG_INFO("正在恢复线程: {}", threadInfo.name);
  91. // 执行重启函数
  92. if (threadInfo.restartFunc) {
  93. threadInfo.restartFunc(threadInfo.threadIndex);
  94. LOG_INFO("线程 {} 恢复成功", threadInfo.name);
  95. } else {
  96. LOG_ERROR("线程 {} 没有设置重启函数", threadInfo.name);
  97. }
  98. } catch (const std::exception& e) {
  99. LOG_ERROR("恢复线程 {} 失败: {}", threadInfo.name, e.what());
  100. } catch (...) {
  101. LOG_ERROR("恢复线程 {} 时发生未知异常", threadInfo.name);
  102. }
  103. }