NvCodecUtils.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. /*
  2. * Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
  3. *
  4. * Please refer to the NVIDIA end user license agreement (EULA) associated
  5. * with this source code for terms and conditions that govern your use of
  6. * this software. Any use, reproduction, disclosure, or distribution of
  7. * this software and related documentation outside the terms of the EULA
  8. * is strictly prohibited.
  9. *
  10. */
  11. //---------------------------------------------------------------------------
  12. //! \file NvCodecUtils.h
  13. //! \brief Miscellaneous classes and error checking functions.
  14. //!
  15. //! Used by Transcode/Encode samples apps for reading input files, mutithreading, performance measurement or colorspace conversion while decoding.
  16. //---------------------------------------------------------------------------
  17. #pragma once
  18. #include <iomanip>
  19. #include <chrono>
  20. #include <sys/stat.h>
  21. #include <assert.h>
  22. #include <stdint.h>
  23. #include <string.h>
  24. #include "Logger.h"
  25. #include <ios>
  26. #include <sstream>
  27. #include <thread>
  28. #include <list>
  29. #include <condition_variable>
  30. extern simplelogger::Logger *logger;
  31. #ifdef __cuda_cuda_h__
  32. inline bool check(CUresult e, int iLine, const char *szFile) {
  33. if (e != CUDA_SUCCESS) {
  34. const char *szErrName = NULL;
  35. cuGetErrorName(e, &szErrName);
  36. LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
  37. return false;
  38. }
  39. return true;
  40. }
  41. #endif
  42. #ifdef __CUDA_RUNTIME_H__
  43. inline bool check(cudaError_t e, int iLine, const char *szFile) {
  44. if (e != cudaSuccess) {
  45. LOG(FATAL) << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile;
  46. return false;
  47. }
  48. return true;
  49. }
  50. #endif
  51. #ifdef _NV_ENCODEAPI_H_
  52. inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
  53. const char *aszErrName[] = {
  54. "NV_ENC_SUCCESS",
  55. "NV_ENC_ERR_NO_ENCODE_DEVICE",
  56. "NV_ENC_ERR_UNSUPPORTED_DEVICE",
  57. "NV_ENC_ERR_INVALID_ENCODERDEVICE",
  58. "NV_ENC_ERR_INVALID_DEVICE",
  59. "NV_ENC_ERR_DEVICE_NOT_EXIST",
  60. "NV_ENC_ERR_INVALID_PTR",
  61. "NV_ENC_ERR_INVALID_EVENT",
  62. "NV_ENC_ERR_INVALID_PARAM",
  63. "NV_ENC_ERR_INVALID_CALL",
  64. "NV_ENC_ERR_OUT_OF_MEMORY",
  65. "NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
  66. "NV_ENC_ERR_UNSUPPORTED_PARAM",
  67. "NV_ENC_ERR_LOCK_BUSY",
  68. "NV_ENC_ERR_NOT_ENOUGH_BUFFER",
  69. "NV_ENC_ERR_INVALID_VERSION",
  70. "NV_ENC_ERR_MAP_FAILED",
  71. "NV_ENC_ERR_NEED_MORE_INPUT",
  72. "NV_ENC_ERR_ENCODER_BUSY",
  73. "NV_ENC_ERR_EVENT_NOT_REGISTERD",
  74. "NV_ENC_ERR_GENERIC",
  75. "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
  76. "NV_ENC_ERR_UNIMPLEMENTED",
  77. "NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
  78. "NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
  79. "NV_ENC_ERR_RESOURCE_NOT_MAPPED",
  80. };
  81. if (e != NV_ENC_SUCCESS) {
  82. LOG(FATAL) << "NVENC error " << aszErrName[e] << " at line " << iLine << " in file " << szFile;
  83. return false;
  84. }
  85. return true;
  86. }
  87. #endif
  88. #ifdef _WINERROR_
  89. inline bool check(HRESULT e, int iLine, const char *szFile) {
  90. if (e != S_OK) {
  91. std::stringstream stream;
  92. stream << std::hex << std::uppercase << e;
  93. LOG(FATAL) << "HRESULT error 0x" << stream.str() << " at line " << iLine << " in file " << szFile;
  94. return false;
  95. }
  96. return true;
  97. }
  98. #endif
  99. #if defined(__gl_h_) || defined(__GL_H__)
  100. inline bool check(GLenum e, int iLine, const char *szFile) {
  101. if (e != 0) {
  102. LOG(ERROR) << "GLenum error " << e << " at line " << iLine << " in file " << szFile;
  103. return false;
  104. }
  105. return true;
  106. }
  107. #endif
  108. inline bool check(int e, int iLine, const char *szFile) {
  109. if (e < 0) {
  110. LOG(ERROR) << "General error " << e << " at line " << iLine << " in file " << szFile;
  111. return false;
  112. }
  113. return true;
  114. }
  115. #define ck(call) check(call, __LINE__, __FILE__)
  116. /**
  117. * @brief Wrapper class around std::thread
  118. */
  119. class NvThread
  120. {
  121. public:
  122. NvThread() = default;
  123. NvThread(const NvThread&) = delete;
  124. NvThread& operator=(const NvThread& other) = delete;
  125. NvThread(std::thread&& thread) : t(std::move(thread))
  126. {
  127. }
  128. NvThread(NvThread&& thread) : t(std::move(thread.t))
  129. {
  130. }
  131. NvThread& operator=(NvThread&& other)
  132. {
  133. t = std::move(other.t);
  134. return *this;
  135. }
  136. ~NvThread()
  137. {
  138. join();
  139. }
  140. void join()
  141. {
  142. if (t.joinable())
  143. {
  144. t.join();
  145. }
  146. }
  147. private:
  148. std::thread t;
  149. };
  150. #ifndef _WIN32
  151. #define _stricmp strcasecmp
  152. #define _stat64 stat64
  153. #endif
  154. /**
  155. * @brief Utility class to allocate buffer memory. Helps avoid I/O during the encode/decode loop in case of performance tests.
  156. */
  157. class BufferedFileReader {
  158. public:
  159. /**
  160. * @brief Constructor function to allocate appropriate memory and copy file contents into it
  161. */
  162. BufferedFileReader(const char *szFileName, bool bPartial = false) {
  163. struct _stat64 st;
  164. if (_stat64(szFileName, &st) != 0) {
  165. return;
  166. }
  167. nSize = st.st_size;
  168. while (nSize) {
  169. try {
  170. pBuf = new uint8_t[(size_t)nSize];
  171. if (nSize != st.st_size) {
  172. LOG(WARNING) << "File is too large - only " << std::setprecision(4) << 100.0 * nSize / st.st_size << "% is loaded";
  173. }
  174. break;
  175. } catch(std::bad_alloc) {
  176. if (!bPartial) {
  177. LOG(ERROR) << "Failed to allocate memory in BufferedReader";
  178. return;
  179. }
  180. nSize = (uint32_t)(nSize * 0.9);
  181. }
  182. }
  183. std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
  184. if (!fpIn)
  185. {
  186. LOG(ERROR) << "Unable to open input file: " << szFileName;
  187. return;
  188. }
  189. std::streamsize nRead = fpIn.read(reinterpret_cast<char*>(pBuf), nSize).gcount();
  190. fpIn.close();
  191. assert(nRead == nSize);
  192. }
  193. ~BufferedFileReader() {
  194. if (pBuf) {
  195. delete[] pBuf;
  196. }
  197. }
  198. bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
  199. if (!pBuf) {
  200. return false;
  201. }
  202. *ppBuf = pBuf;
  203. *pnSize = nSize;
  204. return true;
  205. }
  206. private:
  207. uint8_t *pBuf = NULL;
  208. uint64_t nSize = 0;
  209. };
  210. /**
  211. * @brief Template class to facilitate color space conversion
  212. */
  213. template<typename T>
  214. class YuvConverter {
  215. public:
  216. YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
  217. pQuad = new T[nWidth * nHeight / 4];
  218. }
  219. ~YuvConverter() {
  220. delete pQuad;
  221. }
  222. void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
  223. if (nPitch == 0) {
  224. nPitch = nWidth;
  225. }
  226. T *puv = pFrame + nPitch * nHeight;
  227. if (nPitch == nWidth) {
  228. memcpy(pQuad, puv, nWidth * nHeight / 4 * sizeof(T));
  229. } else {
  230. for (int i = 0; i < nHeight / 2; i++) {
  231. memcpy(pQuad + nWidth / 2 * i, puv + nPitch / 2 * i, nWidth / 2 * sizeof(T));
  232. }
  233. }
  234. T *pv = puv + (nPitch / 2) * (nHeight / 2);
  235. for (int y = 0; y < nHeight / 2; y++) {
  236. for (int x = 0; x < nWidth / 2; x++) {
  237. puv[y * nPitch + x * 2] = pQuad[y * nWidth / 2 + x];
  238. puv[y * nPitch + x * 2 + 1] = pv[y * nPitch / 2 + x];
  239. }
  240. }
  241. }
  242. void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
  243. if (nPitch == 0) {
  244. nPitch = nWidth;
  245. }
  246. T *puv = pFrame + nPitch * nHeight,
  247. *pu = puv,
  248. *pv = puv + nPitch * nHeight / 4;
  249. for (int y = 0; y < nHeight / 2; y++) {
  250. for (int x = 0; x < nWidth / 2; x++) {
  251. pu[y * nPitch / 2 + x] = puv[y * nPitch + x * 2];
  252. pQuad[y * nWidth / 2 + x] = puv[y * nPitch + x * 2 + 1];
  253. }
  254. }
  255. if (nPitch == nWidth) {
  256. memcpy(pv, pQuad, nWidth * nHeight / 4 * sizeof(T));
  257. } else {
  258. for (int i = 0; i < nHeight / 2; i++) {
  259. memcpy(pv + nPitch / 2 * i, pQuad + nWidth / 2 * i, nWidth / 2 * sizeof(T));
  260. }
  261. }
  262. }
  263. private:
  264. T *pQuad;
  265. int nWidth, nHeight;
  266. };
  267. /**
  268. * @brief Utility class to measure elapsed time in seconds between the block of executed code
  269. */
  270. class StopWatch {
  271. public:
  272. void Start() {
  273. t0 = std::chrono::high_resolution_clock::now();
  274. }
  275. double Stop() {
  276. return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch() - t0.time_since_epoch()).count() / 1.0e9;
  277. }
  278. private:
  279. std::chrono::high_resolution_clock::time_point t0;
  280. };
  281. template<typename T>
  282. class ConcurrentQueue
  283. {
  284. public:
  285. ConcurrentQueue() {}
  286. ConcurrentQueue(size_t size) : maxSize(size) {}
  287. ConcurrentQueue(const ConcurrentQueue&) = delete;
  288. ConcurrentQueue& operator=(const ConcurrentQueue&) = delete;
  289. void setSize(size_t s) {
  290. maxSize = s;
  291. }
  292. void push_back(const T& value) {
  293. // Do not use a std::lock_guard here. We will need to explicitly
  294. // unlock before notify_one as the other waiting thread will
  295. // automatically try to acquire mutex once it wakes up
  296. // (which will happen on notify_one)
  297. std::unique_lock<std::mutex> lock(m_mutex);
  298. auto wasEmpty = m_List.empty();
  299. while (full()) {
  300. m_cond.wait(lock);
  301. }
  302. m_List.push_back(value);
  303. if (wasEmpty && !m_List.empty()) {
  304. lock.unlock();
  305. m_cond.notify_one();
  306. }
  307. }
  308. T pop_front() {
  309. std::unique_lock<std::mutex> lock(m_mutex);
  310. while (m_List.empty()) {
  311. m_cond.wait(lock);
  312. }
  313. auto wasFull = full();
  314. T data = std::move(m_List.front());
  315. m_List.pop_front();
  316. if (wasFull && !full()) {
  317. lock.unlock();
  318. m_cond.notify_one();
  319. }
  320. return data;
  321. }
  322. T front() {
  323. std::unique_lock<std::mutex> lock(m_mutex);
  324. while (m_List.empty()) {
  325. m_cond.wait(lock);
  326. }
  327. return m_List.front();
  328. }
  329. size_t size() {
  330. std::unique_lock<std::mutex> lock(m_mutex);
  331. return m_List.size();
  332. }
  333. bool empty() {
  334. std::unique_lock<std::mutex> lock(m_mutex);
  335. return m_List.empty();
  336. }
  337. void clear() {
  338. std::unique_lock<std::mutex> lock(m_mutex);
  339. m_List.clear();
  340. }
  341. private:
  342. bool full() {
  343. if (m_List.size() == maxSize)
  344. return true;
  345. return false;
  346. }
  347. private:
  348. std::list<T> m_List;
  349. std::mutex m_mutex;
  350. std::condition_variable m_cond;
  351. size_t maxSize;
  352. };
  353. inline void CheckInputFile(const char *szInFilePath) {
  354. std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
  355. if (fpIn.fail()) {
  356. std::ostringstream err;
  357. err << "Unable to open input file: " << szInFilePath << std::endl;
  358. throw std::invalid_argument(err.str());
  359. }
  360. }
  361. inline void ValidateResolution(int nWidth, int nHeight) {
  362. if (nWidth <= 0 || nHeight <= 0) {
  363. std::ostringstream err;
  364. err << "Please specify positive non zero resolution as -s WxH. Current resolution is " << nWidth << "x" << nHeight << std::endl;
  365. throw std::invalid_argument(err.str());
  366. }
  367. }
  368. template <class COLOR32>
  369. void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
  370. template <class COLOR64>
  371. void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
  372. template <class COLOR32>
  373. void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
  374. template <class COLOR64>
  375. void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
  376. template <class COLOR32>
  377. void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
  378. template <class COLOR64>
  379. void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
  380. template <class COLOR32>
  381. void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
  382. template <class COLOR64>
  383. void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
  384. template <class COLOR32>
  385. void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 0);
  386. template <class COLOR32>
  387. void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 4);
  388. template <class COLOR32>
  389. void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 0);
  390. template <class COLOR32>
  391. void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix = 4);
  392. void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix = 4);
  393. void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
  394. void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
  395. void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstNv12UV = nullptr);
  396. void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstP016UV = nullptr);
  397. void ScaleYUV420(unsigned char *dpDstY, unsigned char* dpDstU, unsigned char* dpDstV, int nDstPitch, int nDstChromaPitch, int nDstWidth, int nDstHeight,
  398. unsigned char *dpSrcY, unsigned char* dpSrcU, unsigned char* dpSrcV, int nSrcPitch, int nSrcChromaPitch, int nSrcWidth, int nSrcHeight, bool bSemiplanar);
  399. #ifdef __cuda_cuda_h__
  400. void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, CUstream_st *outputCUStream);
  401. #endif