ImageFusion.cpp 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238
  1. #include "ImageFusion.h"
  2. #include <libyuv.h>
  3. #include <cmath>
  4. #include "Timer.h"
  5. ImageFusion::ImageFusion()
  6. {
  7. bIsModified = false;
  8. }
  9. /// <summary>
  10. /// Initializes the specified p image.
  11. /// the Pano Image must be aligned 4,and has been allocated memory
  12. /// </summary>
  13. /// <param name="pImage">The p image.</param>
  14. /// <param name="nPanoWidth">Width of the n pano.</param>
  15. /// <param name="nPanoHeight">Height of the n pano.</param>
  16. /// <param name="nPanoPitch">The n pano pitch.</param>
  17. void ImageFusion::Init(unsigned char* pImage, int nPanoWidth, int nPanoHeight, int nPanoPitch)
  18. {
  19. pPanoImageBuffer = pImage;
  20. this->nPanoWidth = nPanoWidth;
  21. this->nPanoHeight = nPanoHeight;
  22. this->nPanoPitch = nPanoPitch;
  23. }
  24. /// <summary>
  25. /// Gets the laplace pyramid.
  26. /// </summary>
  27. /// <param name="Image">The image to be laplace.</param>
  28. /// <param name="vLaplacePyramid">The vector of laplace pyramid.</param>
  29. /// <param name="nLayer">The Laplace total layer.</param>
  30. /// <param name="nSize">The size of the gauss kernel.</param>
  31. /// <param name="Sigma">The sigma.</param>
  32. void ImageFusion::GetLaplacePyramid(cv::Mat& Image, std::vector<cv::Mat>& vLaplacePyramid, int nLayer, int nSize, float Sigma)
  33. {
  34. //get the Image size
  35. //clear the vector
  36. std::vector<cv::Mat>().swap(vLaplacePyramid);
  37. //get the gauss pyramid
  38. std::vector<cv::Mat> vGaussPyramid;
  39. cv::Mat ImageCurrent = Image.clone();
  40. cv::Mat GaussImage, NextLayer, UpLayer;
  41. for (int i = 0; i < nLayer; i++)
  42. {
  43. cv::GaussianBlur(ImageCurrent, GaussImage, cv::Size(nSize, nSize), Sigma, Sigma);
  44. //vGaussPyramid.push_back(GaussImage.clone());
  45. cv::pyrDown(GaussImage, NextLayer);
  46. cv::pyrUp(NextLayer, UpLayer, cv::Size(GaussImage.cols, GaussImage.rows));
  47. //ImageCurrent = NextLayer;
  48. cv::Mat LaplaceImage = GaussImage - UpLayer;
  49. vLaplacePyramid.push_back(LaplaceImage.clone());
  50. ImageCurrent = NextLayer.clone();
  51. }
  52. vLaplacePyramid.push_back(ImageCurrent);
  53. }
  54. void ImageFusion::GetDogPyramid(cv::Mat& Mask, std::vector<cv::Mat>& DogPyr, int nLevel)
  55. {
  56. cv::Mat Down, Current;
  57. cv::cvtColor(Mask, Current, cv::COLOR_GRAY2RGB);
  58. DogPyr.push_back(Current);
  59. Current = Mask.clone();
  60. for (int i = 0; i < nLevel; i++)
  61. {
  62. cv::Mat _3ChMask;
  63. cv::pyrDown(Current, Down);
  64. cv::cvtColor(Down, _3ChMask, cv::COLOR_GRAY2RGB);
  65. DogPyr.push_back(_3ChMask);
  66. Current = Down;
  67. }
  68. }
  69. /// <summary>
  70. /// Gets the gauss pyramid.
  71. /// </summary>
  72. /// <param name="Image">The image.</param>
  73. /// <param name="vGaussPyramid">The v gauss pyramid.</param>
  74. /// <param name="nLayer">The n layer.</param>
  75. /// <param name="nSize">Size of the n.</param>
  76. /// <param name="Sigma">The sigma.</param>
  77. void ImageFusion::GetGaussPyramid(cv::Mat& Image, std::vector<cv::Mat>& vGaussPyramid, int nLayer, int nSize, float Sigma)
  78. {
  79. //get the Image size
  80. //clear the vector
  81. std::vector<cv::Mat>().swap(vGaussPyramid);
  82. //get the gauss pyramid
  83. cv::Mat ImageCopy = Image.clone();
  84. for (int i = 0; i < nLayer; i++)
  85. {
  86. cv::Mat GaussImage, NextLayer, UpLayer;
  87. cv::GaussianBlur(ImageCopy, GaussImage, cv::Size(nSize, nSize), Sigma, Sigma);
  88. cv::pyrDown(GaussImage, NextLayer);
  89. vGaussPyramid.push_back(NextLayer.clone());
  90. if (NextLayer.cols <= 1 || NextLayer.rows <= 1)
  91. {
  92. break;
  93. }
  94. ImageCopy = NextLayer.clone();
  95. }
  96. }
  97. /// <summary>
  98. /// Cals the laplace blend image.
  99. /// </summary>
  100. /// <param name="vInferPy">The vec infer py.</param>
  101. /// <param name="vTargetPy">The vec target py.</param>
  102. /// <param name="vMaskPy">The vec mask py.</param>
  103. /// <param name="vResultPy">The vec result py.</param>
  104. void ImageFusion::CalBlendImage(vMat& vInferPy, vMat& vTargetPy, vMat& vMaskPy, vMat& vResultPy)
  105. {
  106. cv::Mat AfterCountWeightInfer, AfterCountWeightTarget;
  107. cv::Mat Result;
  108. //get the current pyramid level
  109. int nLevel = vInferPy.size() - 1;
  110. for (int index = 0; index < nLevel; index++)
  111. {
  112. AfterCountWeightInfer = vInferPy[index].mul(vMaskPy[index]);
  113. AfterCountWeightTarget = vTargetPy[index].mul(cv::Scalar(1.0,1.0,1.0) - vMaskPy[index]);
  114. Result = AfterCountWeightInfer + AfterCountWeightTarget;
  115. vResultPy.push_back(Result.clone());
  116. }
  117. cv::Mat lastOne = vInferPy.back().mul(vMaskPy.back()) + vTargetPy.back().mul(cv::Scalar(1.f, 1.f, 1.f) - vMaskPy.back());
  118. vResultPy.push_back(lastOne);
  119. }
  120. void ImageFusion::FusionImageByLaplacePyramid(cv::Mat* pLeft, cv::Mat* Right, int nLayer, int nSize, float Sigma)
  121. {
  122. }
  123. /// <summary>
  124. /// Fusions the image by laplace pyramid.
  125. /// </summary>
  126. /// <param name="pPano">The p pano.</param>
  127. /// <param name="InferRc">The infer rc.</param>
  128. /// <param name="pCvTarget">The p cv target.</param>
  129. /// <param name="SzTargetMat">The sz target mat.</param>
  130. /// <param name="nLayer">The n layer.</param>
  131. /// <param name="nSize">Size of the n.</param>
  132. /// <param name="Sigma">The sigma.</param>
  133. /// <param name="LeftTop">The left top.</param>
  134. void ImageFusion::FusionImageByLaplacePyramid(unsigned char* pPano, cv::Rect InferRc, unsigned char* pCvTarget, cv::Size SzTargetMat, int nLayer, int nSize, float Sigma, cv::Point LeftTop)
  135. {
  136. //first get the fusion part in pano image
  137. cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano);
  138. cv::Mat CvInferMat = PanoImage(InferRc);
  139. cv::Mat Mask(InferRc.size(), CV_8UC1, cv::Scalar(0));
  140. //get the target image to cv mat
  141. //create a cv mat to store the target image
  142. cv::Mat CvTargetMat = CvInferMat;
  143. //get the offset in target mat to store the target image
  144. //int nOffsetX = InferRc.width - SzTargetMat.width;
  145. //Copy the target image to cv mat
  146. for (int i = 0; i < InferRc.height; i++)
  147. {
  148. // FastCopy_Any(pCvTarget + ALIGN_4(SzTargetMat.width*3) * i,
  149. // CvTargetMat.ptr<unsigned char>(i) + nOffsetX*3,
  150. // SzTargetMat.width*3
  151. // );
  152. memset(Mask.ptr<unsigned char>(i), 255 , int(InferRc.width) / 2 );
  153. }
  154. //get the laplace pyramid of the fusion part
  155. std::vector<cv::Mat> vInferLaplacePyramid;
  156. std::vector<cv::Mat> vTargetLaplacePyramid;
  157. std::vector<cv::Mat> vMaskLapLacePyramid;
  158. std::vector<cv::Mat> vResultLaplacePyramid;
  159. cv::Mat CvInferMat_f, CvTargetMat_f, Mask_f;
  160. CvInferMat.convertTo(CvInferMat_f, CV_32FC3,1.f/255.f);
  161. CvTargetMat.convertTo(CvTargetMat_f, CV_32FC3, 1.f / 255.f);
  162. Mask.convertTo(Mask_f, CV_32F, 1.f / 255.f);
  163. GetLaplacePyramid(CvInferMat_f, vInferLaplacePyramid, nLayer, nSize, Sigma);
  164. GetLaplacePyramid(CvTargetMat_f, vTargetLaplacePyramid, nLayer, nSize, Sigma);
  165. GetDogPyramid(Mask_f, vMaskLapLacePyramid, nLayer);
  166. CalBlendImage(vInferLaplacePyramid,vTargetLaplacePyramid,vMaskLapLacePyramid,vResultLaplacePyramid);
  167. //get the result image
  168. int nVecLength = vResultLaplacePyramid.size();
  169. cv::Mat UpLayer;
  170. cv::Mat CurrentMat = vResultLaplacePyramid.back();
  171. for (int level = nVecLength -2 ; level >= 0; level--)
  172. {
  173. cv::pyrUp(CurrentMat, UpLayer, vResultLaplacePyramid[level].size());
  174. CurrentMat = vResultLaplacePyramid[level] + UpLayer;
  175. }
  176. CurrentMat.convertTo(CurrentMat, CV_8UC3,255);
  177. //获取结果
  178. unsigned char* pModifyPart = pPano + InferRc.y * nPanoPitch + InferRc.x * 3;
  179. for (int i = 0; i < InferRc.height; i++)
  180. {
  181. memcpy(
  182. pModifyPart + i * nPanoPitch,
  183. CurrentMat.ptr<unsigned char>(i),
  184. InferRc.width * 3
  185. );
  186. }
  187. }
  188. /// <summary>
  189. /// Gets the laplace fusion image.
  190. /// </summary>
  191. /// <param name="pFusionImage">The p fusion image.</param>
  192. /// <param name="nWidth">Width of the n.</param>
  193. /// <param name="nHeight">Height of the n.</param>
  194. /// <param name="nPitch">The n pitch.</param>
  195. void ImageFusion::GetLaplaceFusionImage(unsigned char* pFusionImage, int& nWidth, int& nHeight, int& nPitch)
  196. {
  197. pFusionImage = this->pPanoImageBuffer;
  198. nWidth = this->nPanoWidth;
  199. nHeight = this->nPanoHeight;
  200. nPitch = this->nPanoPitch;
  201. }
  202. void ImageFusion::FusionImageByThinRectWindowSlideMean(unsigned char* pPano, cv::Rect InferRc, int nSize, int nType, float Sigma)
  203. {
  204. cv::Mat Kernal;
  205. if (nType == 0)
  206. {
  207. Kernal = CreateThinRectWindow(1, nSize, nType, 1);
  208. }
  209. else if(nType == 1)
  210. {
  211. Kernal = CreateThinRectWindow(1, nSize, nType, 1);
  212. }
  213. //first get the fusion part in pano image
  214. cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano);
  215. //get infer image from PanoImage
  216. cv::Rect WidenInferRect = InferRc;
  217. WidenInferRect.x -= nSize / 2;
  218. WidenInferRect.width += nSize;
  219. cv::Mat InferImage = PanoImage(WidenInferRect);
  220. cv::Mat _32FInferImage;
  221. InferImage.convertTo(_32FInferImage, CV_32F);
  222. std::vector<cv::Mat> vSingleChannelImage;
  223. cv::split(_32FInferImage, vSingleChannelImage);
  224. cv::Mat MergeImage;
  225. cv::Mat Afterfilter;
  226. for (int i = 0; i < 3; i++)
  227. {
  228. cv::filter2D(vSingleChannelImage[i], vSingleChannelImage[i], -1, Kernal);
  229. }
  230. cv::merge(vSingleChannelImage, MergeImage);
  231. MergeImage.convertTo(Afterfilter, CV_8U);
  232. Afterfilter.copyTo(InferImage);
  233. }
  234. /// <summary>
  235. /// Fusions the image by thin rect window slide mean.
  236. /// </summary>
  237. /// <param name="pPano">The panorama Image to be modified.</param>
  238. /// <param name="InferRc">The Overlap Rect.</param>
  239. /// <param name="InferImg">The Overlap infer img.</param>
  240. /// <param name="CurCoordinateRc">The Overlap Rc in Current Image.</param>
  241. /// <param name="TargetImg">The Overlap img.</param>
  242. /// <param name="TargetRc">The Overlap rc in Target Image.</param>
  243. /// <param name="nSize">Size of the Filter.</param>
  244. /// <param name="nType">Type of the Filter
  245. /// 0.Mean 1.Gauss.</param>
  246. /// <param name="Sigma">The sigma.</param>
  247. void ImageFusion::FusionImageByThinRectWindowSlideMean(unsigned char* pPano, cv::Rect InferRc, cv::Mat& InferImg, cv::Rect CurCoordinateRc, cv::Mat& TargetImg, cv::Rect TargetRc, int nSize, int nType, float Sigma)
  248. {
  249. if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 ||
  250. TargetRc.width == 0 || TargetRc.height == 0)
  251. return;
  252. cv::Mat Kernal;
  253. if (nType == 0)
  254. {
  255. Kernal = CreateThinRectWindow(1, nSize, nType, 1);
  256. }
  257. else if (nType == 1)
  258. {
  259. Kernal = CreateThinRectWindow(1, nSize, nType, 1.5);
  260. }
  261. //first get the fusion part in pano image
  262. cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano);
  263. //get infer image from PanoImage
  264. cv::Rect WidenPanoRect = InferRc;
  265. //WidenPanoRect.x -= nSize / 2;
  266. //WidenPanoRect.width += nSize;
  267. cv::Mat InferImage = PanoImage(WidenPanoRect);
  268. cv::Mat InferImageCopy = InferImage.clone();
  269. if (WidenPanoRect.width % 2 == 1)
  270. TargetRc.width -= 1;
  271. //get infer image from inferImage and Taraget image
  272. cv::Rect WidenInferRect = CurCoordinateRc;
  273. //WidenInferRect.x -= (nSize-1) / 2;
  274. WidenInferRect.width = WidenPanoRect.width /2;
  275. if(WidenInferRect.width != 0)
  276. InferImg(WidenInferRect).copyTo(InferImageCopy(cv::Rect(0, 0, WidenInferRect.width, WidenInferRect.height)));
  277. cv::Rect WidenTargetRect = TargetRc;
  278. WidenTargetRect.x += WidenPanoRect.width / 2;
  279. WidenTargetRect.width = WidenPanoRect.width / 2;
  280. if (TargetRc.width != 0)
  281. TargetImg(WidenTargetRect).copyTo(InferImageCopy(cv::Rect(0 + WidenInferRect.width,
  282. 0, WidenTargetRect.width, WidenTargetRect.height)));
  283. cv::Mat _32FInferImage;
  284. InferImageCopy.convertTo(_32FInferImage, CV_32F);
  285. std::vector<cv::Mat> vSingleChannelImage;
  286. cv::split(_32FInferImage, vSingleChannelImage);
  287. cv::Mat MergeImage;
  288. cv::Mat Afterfilter;
  289. for (int i = 0; i < 3; i++)
  290. {
  291. //cv::filter2D(vSingleChannelImage[i], vSingleChannelImage[i], -1, Kernal);
  292. for (int ImageRows = 0; ImageRows < vSingleChannelImage[i].rows; ImageRows++)
  293. {
  294. cv::Mat CurRow = vSingleChannelImage[i](cv::Range(ImageRows, ImageRows+1),cv::Range(0, _32FInferImage.cols));
  295. cv::Mat ResRow;
  296. Convolution1D(CurRow, ResRow, Kernal);
  297. ResRow.copyTo(vSingleChannelImage[i](cv::Range(ImageRows, ImageRows + 1), cv::Range(0, _32FInferImage.cols)));
  298. }
  299. }
  300. cv::merge(vSingleChannelImage, MergeImage);
  301. MergeImage.convertTo(Afterfilter, CV_8U);
  302. Afterfilter.copyTo(InferImage);
  303. }
  304. cv::Mat ImageFusion::CreateThinRectWindow(int nHeight, int nWidth, int nType, int nChannels)
  305. {
  306. //window size must be odd
  307. if (nWidth % 2 == 0 || nHeight % 2 == 0)
  308. return cv::Mat();
  309. cv::Mat Window;
  310. if(nChannels == 1)
  311. Window = cv::Mat(nHeight, nWidth, CV_32F, cv::Scalar(0));
  312. else
  313. Window = cv::Mat(nHeight, nWidth, CV_32FC3, cv::Scalar(0,0,0));
  314. //according to type to set the value of the window
  315. //the center value in window
  316. if (nType == 0)
  317. {
  318. Window.setTo(cv::Scalar::all(1));
  319. Window = Window / nWidth;
  320. }
  321. else if(nType == 1)
  322. {
  323. //get center of the filter
  324. int nCenterX = nWidth / 2;
  325. //calcute the weight of the filter
  326. float sum = 0.0;
  327. for (int i = 0; i < nWidth; i++)
  328. {
  329. float x = i - nCenterX;
  330. float weight = CaculateGaussWeight(x, 1.5);
  331. sum += weight;
  332. Window.at<float>(i) = weight;
  333. }
  334. for (int i = 0; i < nWidth; i++)
  335. {
  336. Window.at<float>(i) /= sum;
  337. }
  338. }
  339. return Window;
  340. }
  341. /// <summary>
  342. /// Caculates the gauss weight.
  343. /// </summary>
  344. /// <param name="x">The x.</param>
  345. /// <param name="sigma">The sigma.</param>
  346. /// <returns></returns>
  347. double ImageFusion::CaculateGaussWeight(double x, double sigma)
  348. {
  349. return exp(-(x * x) / (2 * sigma * sigma)) / (sqrt(2 * M_PI) * sigma);
  350. }
  351. void ImageFusion::Convolution2D(cv::Mat& Src, cv::Mat& Dst, cv::Mat& Kernel)
  352. {
  353. int nChannels = Src.channels();
  354. int nRows = Src.rows;
  355. int nCols = Src.cols * nChannels;
  356. int nKernelRows = Kernel.rows;
  357. int nKernelCols = Kernel.cols * nChannels;
  358. int nKernelCenterX = nKernelCols / 2;
  359. int nKernelCenterY = nKernelRows / 2;
  360. Dst = cv::Mat(nRows, nCols, CV_32FC3, cv::Scalar(0, 0, 0));
  361. for (int i = 0; i < nRows; i++)
  362. {
  363. float* pDst = Dst.ptr<float>(i);
  364. for (int j = 0; j < nCols; j++)
  365. {
  366. float sum = 0.0;
  367. for (int m = 0; m < nKernelRows; m++)
  368. {
  369. int nSrcRow = i + m - nKernelCenterY;
  370. if (nSrcRow < 0)
  371. nSrcRow = -nSrcRow;
  372. else if (nSrcRow >= nRows)
  373. nSrcRow = nRows - (nSrcRow - nRows) - 1;
  374. float* pSrc = Src.ptr<float>(nSrcRow);
  375. float* pKernel = Kernel.ptr<float>(m);
  376. for (int n = 0; n < nKernelCols; n++)
  377. {
  378. int nSrcCol = j + n - nKernelCenterX;
  379. if (nSrcCol < 0)
  380. nSrcCol = -nSrcCol;
  381. else if (nSrcCol >= nCols)
  382. nSrcCol = nCols - (nSrcCol - nCols) - 1;
  383. sum += pSrc[nSrcCol] * pKernel[n];
  384. }
  385. }
  386. pDst[j] = sum;
  387. }
  388. }
  389. }
  390. void ImageFusion::Convolution1D(cv::Mat& Src, cv::Mat& Dst, cv::Mat& Kernel)
  391. {
  392. int nRows = Src.rows;
  393. int nCols = Src.cols;
  394. if (nRows == 0 || nCols == 0)
  395. return;
  396. if (!(Kernel.cols % 2 != 0))
  397. cv::error((CV_StsInternal), "Convolution1D", "Assertion: " "Kernel.cols % 2 == 0" " failed", "D:\\kang\\360stitching\\QtCameraHardWareCopilot\\QtCameraHardWareCopilot\\ImageFusion.cpp", 428);
  398. if ((Kernel.channels() != 1))
  399. cv::error(cv::Error::StsAssert, "Kernel.channels() != 1", __FUNCTION__, "D:\\kang\\360stitching\\QtCameraHardWareCopilot\\QtCameraHardWareCopilot\\ImageFusion.cpp", 433);
  400. int nKernelRows = Kernel.rows;
  401. int nKernelCols = Kernel.cols;
  402. int nKernelCenterX = nKernelCols / 2 + 1;
  403. //constraction the Dst
  404. cv::Mat WidenSrc = cv::Mat(nRows, nCols + nKernelCols - 1, CV_32F, cv::Scalar(0));
  405. Dst = cv::Mat(nRows, nCols, CV_32F, cv::Scalar(0));
  406. Src.copyTo(WidenSrc(cv::Range(0, 1), cv::Range(nKernelCols / 2, nCols + nKernelCols / 2)));
  407. if (nKernelCols < nRows)
  408. for (int Bordi = 0; Bordi < nKernelCols / 2; Bordi++)
  409. {
  410. WidenSrc.ptr<float>(0)[nKernelCols / 2 - 1 - Bordi] = WidenSrc.ptr<float>(0)[nKernelCols / 2 + 1 + Bordi];
  411. WidenSrc.ptr<float>(0)[nCols + nKernelCols / 2 + Bordi] = WidenSrc.ptr<float>(0)[nCols + nKernelCols / 2 - 2 - Bordi];
  412. }
  413. else if (nKernelCols != 1)
  414. for (int Bordi = 0; Bordi < nKernelCols / 2; Bordi++)
  415. {
  416. WidenSrc.ptr<float>(0)[nKernelCols / 2 - 1 - Bordi] = WidenSrc.ptr<float>(0)[nKernelCols / 2 + 1];
  417. WidenSrc.ptr<float>(0)[nCols + nKernelCols / 2 + Bordi] = WidenSrc.ptr<float>(0)[nCols + nKernelCols / 2 - 2];
  418. }
  419. else
  420. for (int Bordi = 0; Bordi < nKernelCols / 2; Bordi++)
  421. {
  422. WidenSrc.ptr<float>(0)[nKernelCols / 2 - 1 - Bordi] = WidenSrc.ptr<float>(0)[nKernelCols / 2];
  423. WidenSrc.ptr<float>(0)[nCols + nKernelCols / 2 + Bordi] = WidenSrc.ptr<float>(0)[nCols + nKernelCols / 2 - 1];
  424. }
  425. if (nKernelCols <= 7)
  426. {
  427. __m256 Kernal;
  428. for (int i = 0; i < nKernelCols; i++)
  429. {
  430. Kernal.m256_f32[i] = Kernel.ptr<float>(0)[i];
  431. }
  432. //begin to convolution
  433. for (int i = 0; i < nCols; i++)
  434. {
  435. __m256 mmSum = _mm256_setzero_ps();
  436. __m256 mmSrc = _mm256_setzero_ps();
  437. memcpy(mmSrc.m256_f32, WidenSrc.ptr<float>(0)+i, sizeof(float) * nKernelCols);
  438. mmSum = (mmSum, _mm256_mul_ps(mmSrc, Kernal));
  439. for (int j = 0; j < nKernelCols; j++)
  440. {
  441. Dst.ptr<float>(0)[i] += mmSum.m256_f32[j];
  442. }
  443. WidenSrc.ptr<float>(0)[nKernelCenterX + i] = Dst.ptr<float>(0)[i];
  444. }
  445. }
  446. }
  447. void ImageFusion::FusionImageByBlendingGradient(unsigned char* pPano, cv::Rect InferRc, cv::Mat& InferImg, cv::Rect CurCoordinateRc, cv::Mat& TargetImg, cv::Rect TargetRc)
  448. {
  449. if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 ||
  450. TargetRc.width == 0 || TargetRc.height == 0)
  451. return;
  452. if (CurCoordinateRc.width + CurCoordinateRc.x > InferImg.cols
  453. || CurCoordinateRc.height + CurCoordinateRc.y > InferImg.rows
  454. || InferRc.height + InferRc.y > nPanoHeight
  455. || TargetRc.width + TargetRc.x > TargetImg.cols
  456. || TargetRc.height + TargetRc.y > TargetImg.rows
  457. || CurCoordinateRc.x < 0
  458. || CurCoordinateRc.y < 0
  459. || TargetRc.x < 0
  460. || TargetRc.y < 0
  461. )
  462. return;
  463. //first get the fusion part in pano image
  464. cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano);
  465. if (PanoImage.empty())
  466. return;
  467. //get infer image from PanoImage
  468. cv::Rect WidenPanoRect = InferRc;
  469. //WidenPanoRect.x -= nSize / 2;
  470. //WidenPanoRect.width += nSize;
  471. cv::Mat InferImage = PanoImage(WidenPanoRect);
  472. cv::Mat InferImageCopy = InferImage.clone();
  473. if (WidenPanoRect.width % 2 == 1)
  474. TargetRc.width -= 1;
  475. cv::Mat InferPart, TargetPart;
  476. //get infer image from inferImage and Taraget image
  477. cv::Rect WidenInferRect = CurCoordinateRc;
  478. //WidenInferRect.x -= (nSize-1) / 2;
  479. //WidenInferRect.width = WidenPanoRect.width / 2;
  480. if (WidenInferRect.width != 0)
  481. InferPart = InferImg(WidenInferRect).clone();
  482. else
  483. return;
  484. cv::Rect WidenTargetRect = TargetRc;
  485. //WidenTargetRect.x += WidenPanoRect.width / 2;
  486. //WidenTargetRect.width = WidenPanoRect.width / 2;
  487. if (TargetRc.width != 0)
  488. TargetPart = TargetImg(WidenTargetRect).clone();
  489. else
  490. return;
  491. if (InferPart.cols != TargetPart.cols)
  492. return;
  493. //create the weight for two blend image
  494. cv::Mat InferWeight, TargetWeight;
  495. std::vector<bool> Useless;
  496. GetGradientMask(CurCoordinateRc, TargetRc, InferWeight, TargetWeight, Useless);
  497. cv::Mat Res;
  498. cv::blendLinear(InferPart, TargetPart, InferWeight, TargetWeight, Res);
  499. Res.copyTo(PanoImage(WidenPanoRect));
  500. }
  501. void ImageFusion::FusionImageByBlendingGradientYUV(unsigned char* pPano, cv::Rect InferRc, unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, cv::Rect CurCoordinateRc, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Rect TargetRc, bool bUseSSE2)
  502. {
  503. if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 ||
  504. TargetRc.width == 0 || TargetRc.height == 0)
  505. return;
  506. if (CurCoordinateRc.width + CurCoordinateRc.x > nInferWidth
  507. || CurCoordinateRc.height + CurCoordinateRc.y > nInferHeight
  508. || InferRc.height + InferRc.y > nPanoHeight
  509. || TargetRc.width + TargetRc.x > nTargetWidth
  510. || TargetRc.height + TargetRc.y > nInferHeight
  511. || CurCoordinateRc.x < 0
  512. || CurCoordinateRc.y < 0
  513. || TargetRc.x < 0
  514. || TargetRc.y < 0
  515. )
  516. return;
  517. /***test***/
  518. cv::Mat matPanoImage = cv::Mat(nPanoHeight * 3 / 2, nPanoWidth, CV_8U, pPano);
  519. /***test***/
  520. //get infer image from PanoImage
  521. cv::Rect WidenPanoRect = InferRc;
  522. if (WidenPanoRect.width % 2 == 1)
  523. TargetRc.width -= 1;
  524. //get infer image from inferImage and Taraget image
  525. cv::Rect WidenInferRect = CurCoordinateRc;
  526. //申请一块内存,用于存放裁剪后的infer图像数据
  527. unsigned char* pInferCorp = new unsigned char[
  528. ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height * 3 / 2
  529. ];
  530. if (WidenInferRect.width != 0)
  531. //InferPart = InferImg(WidenInferRect).clone();
  532. {
  533. libyuv::ConvertToI420(
  534. pInfer,
  535. nInferPitch * nInferHeight *3/2,
  536. pInferCorp,
  537. ALIGN_4(CurCoordinateRc.width),
  538. pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height,
  539. ALIGN_4(CurCoordinateRc.width) / 2,
  540. pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height * 5 / 4,
  541. ALIGN_4(CurCoordinateRc.width) / 2,
  542. CurCoordinateRc.x,
  543. CurCoordinateRc.y,
  544. nInferWidth,
  545. nInferHeight,
  546. ALIGN_4(CurCoordinateRc.width),
  547. CurCoordinateRc.height,
  548. libyuv::kRotate0,
  549. libyuv::FOURCC_I420
  550. );
  551. }
  552. else
  553. return;
  554. cv::Rect WidenTargetRect = TargetRc;
  555. //申请一块内存,用于存放裁剪后的Target图像数据
  556. unsigned char* pTargetCorp = new unsigned char[
  557. ALIGN_4(TargetRc.width) * TargetRc.height * 3 / 2
  558. ];
  559. if (TargetRc.width != 0)
  560. //TargetPart = TargetImg(WidenTargetRect).clone();
  561. {
  562. libyuv::ConvertToI420(
  563. pTarget,
  564. nTargetPitch * nTargetHeight *3/2,
  565. pTargetCorp,
  566. ALIGN_4(TargetRc.width),
  567. pTargetCorp + ALIGN_4(TargetRc.width) * TargetRc.height,
  568. ALIGN_4(TargetRc.width) / 2,
  569. pTargetCorp + ALIGN_4(TargetRc.width) * TargetRc.height * 5 / 4,
  570. ALIGN_4(TargetRc.width) / 2,
  571. TargetRc.x,
  572. TargetRc.y,
  573. nTargetWidth,
  574. nTargetHeight,
  575. ALIGN_4(TargetRc.width),
  576. TargetRc.height,
  577. libyuv::kRotate0,
  578. libyuv::FOURCC_I420
  579. );
  580. }
  581. else
  582. return;
  583. //create the weight for two blend image
  584. cv::Mat InferWeight, TargetWeight;
  585. std::vector<bool> Useless;
  586. GetGradientMask(CurCoordinateRc, TargetRc, InferWeight, TargetWeight, Useless);
  587. unsigned char* pBlenderPart = NULL;
  588. cv::Mat testInferImage = cv::Mat(nTargetHeight * 3 / 2, ALIGN_4(TargetRc.width), CV_8U, pInfer);
  589. cv::Mat testTargetImage = cv::Mat(nTargetHeight * 3 / 2, ALIGN_4(TargetRc.width), CV_8U, pTarget);
  590. TimerCounter Timer;
  591. Timer.Start();
  592. /*BlendingLinearYUV(
  593. pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width),
  594. pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width),
  595. InferWeight, TargetWeight,
  596. pBlenderPart,
  597. true
  598. );*/
  599. BlendingLinearYUV(
  600. pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width),
  601. pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width),
  602. InferWeight, TargetWeight,
  603. pBlenderPart,true
  604. );
  605. Timer.Stop();
  606. /***test***/
  607. cv::Mat matTargetImage = cv::Mat(TargetRc.height * 3 / 2, TargetRc.width+2, CV_8U, pBlenderPart);
  608. /***test***/
  609. PastePartInWholeImageYUV(
  610. pPano, nPanoWidth, nPanoHeight, nPanoPitch,
  611. pBlenderPart, WidenPanoRect.width, WidenPanoRect.height, ALIGN_4(WidenPanoRect.width),
  612. WidenPanoRect.x, WidenPanoRect.y,
  613. false
  614. );
  615. }
  616. void ImageFusion::FusionImageByBlendingGradientYUVByGpu(unsigned char* pPano, cv::Rect InferRc, unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, cv::Rect CurCoordinateRc, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Rect TargetRc)
  617. {
  618. if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 ||
  619. TargetRc.width == 0 || TargetRc.height == 0)
  620. return;
  621. if (CurCoordinateRc.width + CurCoordinateRc.x > nInferWidth
  622. || CurCoordinateRc.height + CurCoordinateRc.y > nInferHeight
  623. || InferRc.height + InferRc.y > nPanoHeight
  624. || TargetRc.width + TargetRc.x > nTargetWidth
  625. || TargetRc.height + TargetRc.y > nInferHeight
  626. || CurCoordinateRc.x < 0
  627. || CurCoordinateRc.y < 0
  628. || TargetRc.x < 0
  629. || TargetRc.y < 0
  630. )
  631. return;
  632. //get infer image from PanoImage
  633. cv::Rect WidenPanoRect = InferRc;
  634. if (WidenPanoRect.width % 2 == 1)
  635. TargetRc.width -= 1;
  636. //get infer image from inferImage and Taraget image
  637. cv::Rect WidenInferRect = CurCoordinateRc;
  638. //申请一块内存,用于存放裁剪后的infer图像数据
  639. unsigned char* pInferCorp = nullptr;
  640. cudaMalloc((void**)&pInferCorp, ALIGN_4(WidenInferRect.width) * WidenInferRect.height * 3 / 2);
  641. cv::Rect WidenTargetRect = TargetRc;
  642. //申请一块内存,用于存放裁剪后的Target图像数据
  643. unsigned char* pTargetCorp = nullptr;
  644. cudaMalloc((void**)&pTargetCorp, ALIGN_4(TargetRc.width) * TargetRc.height * 3 / 2);
  645. cudaError Err = cudaDeviceSynchronize();
  646. if (WidenInferRect.width != 0)
  647. //InferPart = InferImg(WidenInferRect).clone();
  648. {
  649. unsigned char* pSrcCorpY = pInfer + WidenInferRect.y * nInferPitch + WidenInferRect.x;
  650. unsigned char* pSrcCorpU = pSrcCorpY + nInferPitch * nInferHeight + WidenInferRect.y * nInferPitch / 4 + WidenInferRect.x / 2;
  651. YUVTailorAndBlender::CropI420(
  652. pInfer, nInferPitch,
  653. pInfer + nInferPitch * nInferHeight, nInferPitch / 2,
  654. pInfer + nInferPitch * nInferHeight * 5 / 4, nInferPitch / 2,
  655. pInferCorp, ALIGN_4(CurCoordinateRc.width),
  656. pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width) / 2,
  657. pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height * 5 / 4, ALIGN_4(CurCoordinateRc.width) / 2,
  658. CurCoordinateRc.width, CurCoordinateRc.height,
  659. CurCoordinateRc.x, CurCoordinateRc.y
  660. );
  661. }
  662. else
  663. return;
  664. if (TargetRc.width != 0)
  665. //TargetPart = TargetImg(WidenTargetRect).clone();
  666. {
  667. YUVTailorAndBlender::CropI420(
  668. pTarget, nTargetPitch,
  669. pTarget + nTargetPitch * nTargetHeight, nTargetPitch / 2,
  670. pTarget + nTargetPitch * nTargetHeight * 5 / 4, nTargetPitch / 2,
  671. pTargetCorp, ALIGN_4(WidenTargetRect.width),
  672. pTargetCorp + ALIGN_4(WidenTargetRect.width) * WidenTargetRect.height, ALIGN_4(WidenTargetRect.width) / 2,
  673. pTargetCorp + ALIGN_4(WidenTargetRect.width) * WidenTargetRect.height * 5 / 4, ALIGN_4(WidenTargetRect.width) / 2,
  674. WidenTargetRect.width, WidenTargetRect.height,
  675. WidenTargetRect.x, WidenTargetRect.y
  676. );
  677. }
  678. else
  679. return;
  680. Err = cudaDeviceSynchronize();
  681. //create the weight for two blend image
  682. cv::Mat InferWeight, TargetWeight;
  683. std::vector<bool> Useless;
  684. GetGradientMask(CurCoordinateRc, TargetRc, InferWeight, TargetWeight, Useless);
  685. unsigned char* pBlenderPart = NULL;
  686. TimerCounter Timer;
  687. Timer.Start();
  688. cudaMalloc((void**) & pBlenderPart, ALIGN_4(TargetRc.width) * TargetRc.height * 3 / 2);
  689. /*BlendingLinearYUV(
  690. pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width),
  691. pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width),
  692. InferWeight, TargetWeight,
  693. pBlenderPart,
  694. true
  695. );*/
  696. BlendingLinearYUVByGpu(
  697. pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width),
  698. pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width),
  699. InferWeight, TargetWeight,
  700. pBlenderPart
  701. );
  702. Timer.Stop();
  703. Err = cudaDeviceSynchronize();
  704. if(Err ==700)
  705. int a = 0;
  706. /***test***/
  707. //cv::Mat matTargetImage = cv::Mat(TargetRc.height * 3 / 2, TargetRc.width + 2, CV_8U, pBlenderPart);
  708. /***test***/
  709. PastePartInWholeImageYUVByGpu(
  710. pPano, nPanoWidth, nPanoHeight, nPanoPitch,
  711. pBlenderPart, WidenPanoRect.width, WidenPanoRect.height, ALIGN_4(WidenPanoRect.width),
  712. WidenPanoRect.x, WidenPanoRect.y,
  713. NULL
  714. );
  715. Err = cudaDeviceSynchronize();
  716. cudaFree(pBlenderPart);
  717. cudaFree(pTargetCorp);
  718. cudaFree(pInferCorp);
  719. }
  720. void ImageFusion::GetGradientMask(cv::Rect Infer, cv::Rect Target, cv::Mat& MaskTarget, cv::Mat& MaskInfer, std::vector<bool>& bAddOrSub)
  721. {
  722. bAddOrSub.resize(3);
  723. //according to intersect rect rgb avg val to get the light base in this image
  724. cv::Mat WeigthMapTarget = cv::Mat(cv::Size(Infer.width, Infer.height), CV_8U, cv::Scalar(255));
  725. cv::Mat WeightMapInfer = cv::Mat(cv::Size(Infer.width, Infer.height), CV_8U, cv::Scalar(255));
  726. cv::Mat DistanceMaskTarget, DistanceMaskInfer;
  727. //set a column 0 in Mask
  728. WeigthMapTarget(cv::Rect(Infer.width - 1, 0, 1, Infer.height)).setTo(cv::Scalar(0));
  729. WeightMapInfer(cv::Rect(0, 0, 1, Infer.height)).setTo(cv::Scalar(0));
  730. DistanceMaskTarget.create(WeigthMapTarget.size(), WeigthMapTarget.type());
  731. DistanceMaskInfer.create(WeightMapInfer.size(), WeightMapInfer.type());
  732. //calculate a Weightmap in DistanceMask
  733. distanceATS_L1_8u(WeigthMapTarget.data, DistanceMaskTarget.cols, DistanceMaskTarget.rows, DistanceMaskTarget.step, DistanceMaskTarget.data);
  734. distanceATS_L1_8u(WeightMapInfer.data, DistanceMaskInfer.cols, DistanceMaskInfer.rows, DistanceMaskInfer.step, DistanceMaskInfer.data);
  735. std::shared_ptr<float> pfLightBaseDif(new float[3] {0});
  736. cv::Mat vTargetChannels, vInferChannels;
  737. DistanceMaskTarget.convertTo(DistanceMaskTarget, CV_32F);
  738. DistanceMaskInfer.convertTo(DistanceMaskInfer, CV_32F);
  739. //step is according to inferRc`s width
  740. float fStep = abs(1.f / Infer.width);
  741. vTargetChannels = DistanceMaskTarget * fStep;
  742. vInferChannels = DistanceMaskInfer * fStep;
  743. MaskTarget = (vTargetChannels);
  744. MaskInfer = (vInferChannels);
  745. }
  746. void ImageFusion::BlendingLinearYUV(unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Mat& cvInferWeight, cv::Mat& cvTargetWeight, unsigned char*& pResult, bool bUseAVX)
  747. {
  748. //获取YUV三个通道的分量的指针
  749. unsigned char* pInferY = pInfer;
  750. unsigned char* pInferU = pInfer + nInferPitch * nInferHeight;
  751. unsigned char* pInferV = pInfer + nInferPitch * nInferHeight + (nInferPitch * nInferHeight) / 4;
  752. unsigned char* pTargetY = pTarget;
  753. unsigned char* pTargetU = pTarget + nTargetPitch * nTargetHeight;
  754. unsigned char* pTargetV = pTarget + nTargetPitch * nTargetHeight * 5 / 4;
  755. //默认infer 和 target的 大小一样的,且都能被整除,或者整采样
  756. assert(nInferHeight == nTargetHeight || nInferWidth == nTargetWidth);
  757. assert(nInferHeight == cvInferWeight.rows ||
  758. nInferWidth == cvInferWeight.cols ||
  759. nTargetHeight == cvTargetWeight.rows ||
  760. nTargetWidth == cvTargetWeight.cols
  761. );
  762. assert(nInferWidth % 2 == 0);
  763. assert(nInferHeight % 2 == 0);
  764. //设置一个大小与之相同的dst,分别有uchar和float类型的
  765. if (pResult != NULL)
  766. delete[]pResult;
  767. pResult = new unsigned char[nInferPitch * cvInferWeight.rows * 3 / 2];
  768. //计算得出result的分区指针
  769. unsigned char* pDstY = pResult;
  770. unsigned char* pDstU = pResult + nInferPitch * cvInferWeight.rows;
  771. unsigned char* pDstV = pResult + nInferPitch * cvInferWeight.rows * 5 / 4;
  772. int nDstPitch = nInferPitch;
  773. int nDstUVPitch = nInferPitch / 2;
  774. //开始计算
  775. cv::Mat InferImage = cv::Mat(nInferHeight * 3 / 2, nInferWidth +2, CV_8U, pInferY);
  776. cv::Mat TargetImage = cv::Mat(nInferHeight * 3 / 2, nInferWidth +2, CV_8U, pTargetY);
  777. cv::Mat ResultImage = cv::Mat(nInferHeight * 3 / 2, nInferWidth+2, CV_8U, pDstY);
  778. if (bUseAVX)
  779. {
  780. //计算Y通道
  781. for (int i = 0; i < cvInferWeight.rows; i++)
  782. {
  783. for (int j = 0; j < cvInferWeight.cols; )
  784. {
  785. __m256 mInferWeight, mTargetWeight;
  786. int nOffsetMax = 0;
  787. if (j + 8 < cvInferWeight.cols)
  788. {
  789. //本模块内所有的宽度均进行了宽度测试所以均可以满足被4整除
  790. mInferWeight = _mm256_loadu_ps(cvInferWeight.ptr<float>(i) + j);
  791. mTargetWeight = _mm256_loadu_ps(cvTargetWeight.ptr<float>(i) + j);
  792. nOffsetMax = 8;
  793. }
  794. else
  795. {
  796. nOffsetMax = cvInferWeight.cols - j;
  797. for (int offset = 0; offset < nOffsetMax; offset++)
  798. {
  799. mInferWeight.m256_f32[offset] = cvInferWeight.ptr<float>(i)[j + offset];
  800. mTargetWeight.m256_f32[offset] = cvTargetWeight.ptr<float>(i)[j + offset];
  801. }
  802. }
  803. unsigned char* pInferPointer = pInferY + i * nInferPitch + j;
  804. unsigned char* pTargetPointer = pTarget + i * nTargetPitch + j;
  805. unsigned char* pResultPointer = pDstY + nDstPitch * i + j;
  806. __m256 mTargetData, mInferData;
  807. for (int offset = 0; offset < nOffsetMax; offset++)
  808. {
  809. mTargetData.m256_f32[offset] = float(*(pTargetPointer + offset));
  810. mInferData.m256_f32[offset] = float(*( pInferPointer+ offset));
  811. }
  812. __m256 Res,WeightAdd;
  813. Res = _mm256_add_ps(_mm256_mul_ps(mInferWeight, mInferData), _mm256_mul_ps(mTargetWeight, mTargetData));
  814. WeightAdd = _mm256_add_ps(mInferWeight, mTargetWeight);
  815. for (int offset = 0; offset < nOffsetMax; offset++)
  816. {
  817. *(pResultPointer + offset) = unsigned char(Res.m256_f32[offset]/WeightAdd.m256_f32[offset]);
  818. }
  819. j += 8;
  820. }
  821. }
  822. //计算UV通道
  823. for (int i = 0; i < cvInferWeight.rows; i+=2)
  824. {
  825. for (int j = 0; j < cvInferWeight.cols; )
  826. {
  827. if (i % 2 != 0 && j % 2 != 0)
  828. continue;
  829. int X = j / 2;
  830. int Y = i / 2;
  831. __m256 mInferWeight, mTargetWeight;
  832. int nOffsetMax = 0;
  833. if (j + 16 < cvInferWeight.cols)
  834. {
  835. nOffsetMax = 16;
  836. //本模块内所有的宽度均进行了宽度测试所以均可以满足被4整除
  837. for (int offset = 0; offset < nOffsetMax; offset += 2)
  838. {
  839. mInferWeight.m256_f32[offset / 2] = cvInferWeight.ptr<float>(i)[j + offset];
  840. mTargetWeight.m256_f32[offset / 2] = cvTargetWeight.ptr<float>(i)[j + offset];
  841. }
  842. }
  843. else
  844. {
  845. nOffsetMax = cvInferWeight.cols - j;
  846. for (int offset = 0; offset < nOffsetMax; offset += 2)
  847. {
  848. mInferWeight.m256_f32[offset / 2] = cvInferWeight.ptr<float>(i)[j + offset];
  849. mTargetWeight.m256_f32[offset / 2] = cvTargetWeight.ptr<float>(i)[j + offset];
  850. }
  851. }
  852. unsigned char* pInferPointerU = pInferU + Y * nInferPitch / 2 + X;
  853. unsigned char* pInferPointerV = pInferV + Y * nInferPitch / 2 + X;
  854. unsigned char* pTargetPointerU = pTargetU + Y * nTargetPitch / 2 + X;
  855. unsigned char* pTargetPointerV = pTargetV + Y * nTargetPitch / 2 + X;
  856. unsigned char* pResultPointerU = pDstU + Y * nDstUVPitch + X;
  857. unsigned char* pResultPointerV = pDstV + Y * nDstUVPitch + X;
  858. __m256 mTargetDataU, mTargetDataV, mInferDataU, mInferDataV;
  859. for (int offset = 0; offset < nOffsetMax / 2; offset++)
  860. {
  861. mTargetDataU.m256_f32[offset] = float(*(pInferPointerU + offset));
  862. mTargetDataV.m256_f32[offset] = float(*(pInferPointerV + offset));
  863. mInferDataU.m256_f32[offset] = float(*(pTargetPointerU + offset));
  864. mInferDataV.m256_f32[offset] = float(*(pTargetPointerV + offset));
  865. }
  866. __m256 ResU, ResV, WeightAdd;
  867. ResU = _mm256_add_ps(_mm256_mul_ps(mInferWeight, mInferDataU), _mm256_mul_ps(mTargetWeight, mTargetDataU));
  868. ResV = _mm256_add_ps(_mm256_mul_ps(mInferWeight, mInferDataV), _mm256_mul_ps(mTargetWeight, mTargetDataV));
  869. WeightAdd = _mm256_add_ps(mInferWeight, mTargetWeight);
  870. for (int offset = 0; offset < nOffsetMax / 2; offset++)
  871. {
  872. *(pResultPointerU + offset) = unsigned char(ResU.m256_f32[offset]/ WeightAdd.m256_f32[offset]);
  873. *(pResultPointerV + offset) = unsigned char(ResV.m256_f32[offset] / WeightAdd.m256_f32[offset]);
  874. }
  875. j += 16;
  876. }
  877. }
  878. }
  879. else
  880. {
  881. for (int i = 0; i < cvInferWeight.rows; i++)
  882. {
  883. for (int j = 0; j < cvInferWeight.cols; j++)
  884. {
  885. //计算Y通道的权重
  886. float fInferWeight = cvInferWeight.at<float>(i, j);
  887. float fTargetWeight = cvTargetWeight.at<float>(i, j);
  888. //计算Y通道的值
  889. float fY = fInferWeight * pInferY[i * nInferPitch + j] + fTargetWeight * pTargetY[i * nTargetPitch + j];
  890. float fWeight = fInferWeight + fTargetWeight;
  891. pDstY[i * nDstPitch + j] = unsigned char(fY / fWeight);
  892. //计算U通道的值
  893. //当两个通道均被2整除则说明是采样的位置
  894. if (i % 2 == 0 && j % 2 == 0)
  895. {
  896. int X = j / 2;
  897. int Y = i / 2;
  898. float fU = fInferWeight * pInferU[Y * nDstUVPitch + X] +
  899. fTargetWeight * pTargetU[Y * nDstUVPitch + X];
  900. float fV = fInferWeight * pInferV[Y * nDstUVPitch + X] +
  901. fTargetWeight * pTargetV[Y * nDstUVPitch + X];
  902. //计算X和Y在i和j中的偏移,之后将之转换到真正的图像坐标内来
  903. //int nTotal = Y * nDstUVPitch + X;
  904. //int nTrueY = nTotal / nDstPitch;
  905. //int nTrueX = nTotal % nDstPitch;
  906. pDstU[Y * nDstUVPitch + X] = unsigned char(fU / fWeight);
  907. pDstV[Y * nDstUVPitch + X] = unsigned char(fV / fWeight);
  908. }
  909. }
  910. }
  911. }
  912. }
  913. void ImageFusion::BlendingLinearYUVByGpu(unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Mat& cvInferWeight, cv::Mat& cvTargetWeight, unsigned char*& pResult)
  914. {
  915. //默认infer 和 target的 大小一样的,且都能被整除,或者整采样
  916. assert(nInferHeight == nTargetHeight || nInferWidth == nTargetWidth);
  917. assert(nInferHeight == cvInferWeight.rows ||
  918. nInferWidth == cvInferWeight.cols ||
  919. nTargetHeight == cvTargetWeight.rows ||
  920. nTargetWidth == cvTargetWeight.cols
  921. );
  922. assert(nInferWidth % 2 == 0);
  923. //assert(nInferHeight % 2 == 0);
  924. if (nInferHeight % 2 != 0)
  925. {
  926. nInferHeight -= 1;
  927. nTargetHeight -= 1;
  928. }
  929. //首先将数据传入到GPU中
  930. float* pDevInferMask = NULL, * pDevTargetMask = NULL;
  931. cudaMalloc((void**)&pDevTargetMask,cvTargetWeight.step* cvTargetWeight.rows);
  932. cudaMalloc((void**)&pDevInferMask,cvInferWeight.step* cvInferWeight.rows);
  933. //传输
  934. cudaError Err = cudaMemcpy(pDevTargetMask, cvTargetWeight.data, cvTargetWeight.step * cvTargetWeight.rows, cudaMemcpyHostToDevice);
  935. Err = cudaMemcpy(pDevInferMask, cvInferWeight.data, cvInferWeight.step * cvInferWeight.rows, cudaMemcpyHostToDevice);
  936. if (Err != 0)
  937. int i = 10;
  938. //计算得出result的分区指针
  939. unsigned char* pDstY = pResult;
  940. unsigned char* pDstU = pResult + nInferPitch * cvInferWeight.rows;
  941. unsigned char* pDstV = pResult + nInferPitch * cvInferWeight.rows * 5 / 4;
  942. int nDstPitch = nInferPitch;
  943. int nDstUVPitch = nInferPitch / 2;
  944. //获取YUV三个通道的分量的指针
  945. unsigned char* pInferY = pInfer;
  946. unsigned char* pInferU = pInfer + nInferPitch * nInferHeight;
  947. unsigned char* pInferV = pInfer + nInferPitch * nInferHeight * 5 / 4;
  948. unsigned char* pTargetY = pTarget;
  949. unsigned char* pTargetU = pTarget + nTargetPitch * nTargetHeight;
  950. unsigned char* pTargetV = pTarget + nTargetPitch * nTargetHeight * 5 / 4;
  951. YUVTailorAndBlender::GradientBlenderYUV(
  952. pInferY, nInferPitch,
  953. pInferU, nInferPitch / 2,
  954. pInferV, nInferPitch / 2,
  955. pTargetY, nTargetPitch,
  956. pTargetU, nTargetPitch / 2,
  957. pTargetV, nTargetPitch / 2,
  958. cvInferWeight.cols, cvInferWeight.rows,
  959. pDevInferMask, pDevTargetMask, cvInferWeight.step,
  960. pDstY, nDstPitch,
  961. pDstU, nDstUVPitch,
  962. pDstV, nDstUVPitch,
  963. cvInferWeight.cols, cvInferWeight.rows
  964. );
  965. //释放内存
  966. cudaFree(pDevInferMask);
  967. cudaFree(pDevTargetMask);
  968. }
  969. void ImageFusion::PastePartInWholeImageYUV(unsigned char* pPanoImg, int nPanoWidth, int nPanoHeight, int nPanoPitch, unsigned char* pPartImg, int nPartWidth, int nPartHeight, int nPartPitch, int nLeft, int nTop, bool bUseAVX)
  970. {
  971. //chatgtp
  972. uint8_t* dst_y = pPanoImg; // 源Y平面地址
  973. uint8_t* dst_u = pPanoImg + nPanoHeight * nPanoPitch; // 源U平面地址
  974. uint8_t* dst_v = pPanoImg + nPanoHeight * nPanoPitch * 5 / 4; // 源V平面地址
  975. int dst_stride_y = nPanoPitch; // 源Y平面跨距
  976. int dst_stride_u = nPanoPitch >> 1; // 源U平面跨距
  977. int dst_stride_v = nPanoPitch >> 1; // 源V平面跨距
  978. uint8_t* src_y = pPartImg; // 目标Y平面地址
  979. uint8_t* src_u = pPartImg + nPartPitch * nPartHeight; // 目标U平面地址
  980. uint8_t* src_v = pPartImg + nPartPitch * nPartHeight * 5 / 4; // 目标V平面地址
  981. int src_stride_y = nPartPitch; // 目标Y平面跨距
  982. int src_stride_u = nPartPitch >> 1; // 目标U平面跨距
  983. int src_stride_v = nPartPitch >> 1; // 目标V平面跨距
  984. int width = nPartWidth; // 拷贝区域宽度
  985. int height = nPartHeight; // 拷贝区域高度
  986. //定位到目标的位置
  987. unsigned char* dst_target_y = dst_y + nTop * nPanoPitch + nLeft;
  988. unsigned char* dst_target_u = dst_u + nTop * nPanoPitch / 4 + nLeft / 2;
  989. unsigned char* dst_target_v = dst_v + nTop * nPanoPitch / 4 + nLeft / 2;
  990. // 拷贝Y平面
  991. libyuv::CopyPlane(
  992. src_y, src_stride_y,
  993. dst_target_y, dst_stride_y,
  994. width, height);
  995. // 拷贝U平面
  996. libyuv::CopyPlane(
  997. src_u, src_stride_u,
  998. dst_target_u, dst_stride_u,
  999. width / 2, height / 2);
  1000. // 拷贝V平面
  1001. libyuv::CopyPlane(
  1002. src_v, src_stride_v,
  1003. dst_target_v, dst_stride_v,
  1004. width / 2, height / 2);
  1005. }
  1006. void ImageFusion::PastePartInWholeImageYUVByGpu(unsigned char* pPanoImg, int nPanoWidth, int nPanoHeight,
  1007. int nPanoPitch, unsigned char* pPartImg, int nPartWidth,
  1008. int nPartHeight, int nPartPitch,
  1009. int nLeft, int nTop, CUstream* pStream)
  1010. {
  1011. unsigned char* pDstY = pPanoImg;
  1012. unsigned char* pDstU = pPanoImg + nPanoPitch * nPanoHeight;
  1013. unsigned char* pDstV = pPanoImg + nPanoPitch * nPanoHeight * 5 / 4;
  1014. int dst_stride_y = nPanoPitch; // 源Y平面跨距
  1015. int dst_stride_u = nPanoPitch >> 1; // 源U平面跨距
  1016. int dst_stride_v = nPanoPitch >> 1; // 源V平面跨距
  1017. uint8_t* src_y = pPartImg; // 目标Y平面地址
  1018. uint8_t* src_u = pPartImg + nPartPitch * nPartHeight; // 目标U平面地址
  1019. uint8_t* src_v = pPartImg + nPartPitch * nPartHeight * 5 / 4; // 目标V平面地址
  1020. int src_stride_y = nPartPitch; // 目标Y平面跨距
  1021. int src_stride_u = nPartPitch >> 1; // 目标U平面跨距
  1022. int src_stride_v = nPartPitch >> 1; // 目标V平面跨距
  1023. int width = nPartWidth; // 拷贝区域宽度
  1024. int height = nPartHeight; // 拷贝区域高度
  1025. YUVTailorAndBlender::CopyPlane(
  1026. pPartImg, nPartWidth, nPartHeight, nPartPitch,
  1027. pPanoImg, nPanoWidth, nPanoHeight, nPanoPitch,
  1028. nPartWidth, nPartHeight,
  1029. nLeft, nTop, 1, NULL
  1030. );
  1031. YUVTailorAndBlender::CopyPlane(
  1032. src_u, nPartWidth / 2, nPartHeight / 2, nPartPitch / 2,
  1033. pDstU, nPanoWidth / 2, nPanoHeight / 2, nPanoPitch / 2,
  1034. nPartWidth / 2, nPartHeight / 2,
  1035. nLeft / 2, nTop / 2, 1, NULL
  1036. );
  1037. YUVTailorAndBlender::CopyPlane(
  1038. src_v, nPartWidth / 2, nPartHeight / 2, nPartPitch / 2,
  1039. pDstV, nPanoWidth / 2, nPanoHeight / 2, nPanoPitch / 2,
  1040. nPartWidth / 2, nPartHeight / 2,
  1041. nLeft / 2, nTop / 2, 1, NULL
  1042. );
  1043. }
  1044. void ImageFusion::Init_Gpu(unsigned char* pImage, int nPanoWidth, int nPanoHeight, int nPanoPitch)
  1045. {
  1046. pPanoImageBufferGpu = pImage;
  1047. this->nPanoWidth = nPanoWidth;
  1048. this->nPanoHeight = nPanoHeight;
  1049. this->nPanoPitch = nPanoPitch;
  1050. }