#include "ImageFusion.h" #include #include #include "Timer.h" ImageFusion::ImageFusion() { bIsModified = false; } /// /// Initializes the specified p image. /// the Pano Image must be aligned 4,and has been allocated memory /// /// The p image. /// Width of the n pano. /// Height of the n pano. /// The n pano pitch. void ImageFusion::Init(unsigned char* pImage, int nPanoWidth, int nPanoHeight, int nPanoPitch) { pPanoImageBuffer = pImage; this->nPanoWidth = nPanoWidth; this->nPanoHeight = nPanoHeight; this->nPanoPitch = nPanoPitch; } /// /// Gets the laplace pyramid. /// /// The image to be laplace. /// The vector of laplace pyramid. /// The Laplace total layer. /// The size of the gauss kernel. /// The sigma. void ImageFusion::GetLaplacePyramid(cv::Mat& Image, std::vector& vLaplacePyramid, int nLayer, int nSize, float Sigma) { //get the Image size //clear the vector std::vector().swap(vLaplacePyramid); //get the gauss pyramid std::vector vGaussPyramid; cv::Mat ImageCurrent = Image.clone(); cv::Mat GaussImage, NextLayer, UpLayer; for (int i = 0; i < nLayer; i++) { cv::GaussianBlur(ImageCurrent, GaussImage, cv::Size(nSize, nSize), Sigma, Sigma); //vGaussPyramid.push_back(GaussImage.clone()); cv::pyrDown(GaussImage, NextLayer); cv::pyrUp(NextLayer, UpLayer, cv::Size(GaussImage.cols, GaussImage.rows)); //ImageCurrent = NextLayer; cv::Mat LaplaceImage = GaussImage - UpLayer; vLaplacePyramid.push_back(LaplaceImage.clone()); ImageCurrent = NextLayer.clone(); } vLaplacePyramid.push_back(ImageCurrent); } void ImageFusion::GetDogPyramid(cv::Mat& Mask, std::vector& DogPyr, int nLevel) { cv::Mat Down, Current; cv::cvtColor(Mask, Current, cv::COLOR_GRAY2RGB); DogPyr.push_back(Current); Current = Mask.clone(); for (int i = 0; i < nLevel; i++) { cv::Mat _3ChMask; cv::pyrDown(Current, Down); cv::cvtColor(Down, _3ChMask, cv::COLOR_GRAY2RGB); DogPyr.push_back(_3ChMask); Current = Down; } } /// /// Gets the gauss pyramid. /// /// The image. /// The v gauss pyramid. /// The n layer. /// Size of the n. /// The sigma. void ImageFusion::GetGaussPyramid(cv::Mat& Image, std::vector& vGaussPyramid, int nLayer, int nSize, float Sigma) { //get the Image size //clear the vector std::vector().swap(vGaussPyramid); //get the gauss pyramid cv::Mat ImageCopy = Image.clone(); for (int i = 0; i < nLayer; i++) { cv::Mat GaussImage, NextLayer, UpLayer; cv::GaussianBlur(ImageCopy, GaussImage, cv::Size(nSize, nSize), Sigma, Sigma); cv::pyrDown(GaussImage, NextLayer); vGaussPyramid.push_back(NextLayer.clone()); if (NextLayer.cols <= 1 || NextLayer.rows <= 1) { break; } ImageCopy = NextLayer.clone(); } } /// /// Cals the laplace blend image. /// /// The vec infer py. /// The vec target py. /// The vec mask py. /// The vec result py. void ImageFusion::CalBlendImage(vMat& vInferPy, vMat& vTargetPy, vMat& vMaskPy, vMat& vResultPy) { cv::Mat AfterCountWeightInfer, AfterCountWeightTarget; cv::Mat Result; //get the current pyramid level int nLevel = vInferPy.size() - 1; for (int index = 0; index < nLevel; index++) { AfterCountWeightInfer = vInferPy[index].mul(vMaskPy[index]); AfterCountWeightTarget = vTargetPy[index].mul(cv::Scalar(1.0,1.0,1.0) - vMaskPy[index]); Result = AfterCountWeightInfer + AfterCountWeightTarget; vResultPy.push_back(Result.clone()); } cv::Mat lastOne = vInferPy.back().mul(vMaskPy.back()) + vTargetPy.back().mul(cv::Scalar(1.f, 1.f, 1.f) - vMaskPy.back()); vResultPy.push_back(lastOne); } void ImageFusion::FusionImageByLaplacePyramid(cv::Mat* pLeft, cv::Mat* Right, int nLayer, int nSize, float Sigma) { } /// /// Fusions the image by laplace pyramid. /// /// The p pano. /// The infer rc. /// The p cv target. /// The sz target mat. /// The n layer. /// Size of the n. /// The sigma. /// The left top. void ImageFusion::FusionImageByLaplacePyramid(unsigned char* pPano, cv::Rect InferRc, unsigned char* pCvTarget, cv::Size SzTargetMat, int nLayer, int nSize, float Sigma, cv::Point LeftTop) { //first get the fusion part in pano image cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano); cv::Mat CvInferMat = PanoImage(InferRc); cv::Mat Mask(InferRc.size(), CV_8UC1, cv::Scalar(0)); //get the target image to cv mat //create a cv mat to store the target image cv::Mat CvTargetMat = CvInferMat; //get the offset in target mat to store the target image //int nOffsetX = InferRc.width - SzTargetMat.width; //Copy the target image to cv mat for (int i = 0; i < InferRc.height; i++) { // FastCopy_Any(pCvTarget + ALIGN_4(SzTargetMat.width*3) * i, // CvTargetMat.ptr(i) + nOffsetX*3, // SzTargetMat.width*3 // ); memset(Mask.ptr(i), 255 , int(InferRc.width) / 2 ); } //get the laplace pyramid of the fusion part std::vector vInferLaplacePyramid; std::vector vTargetLaplacePyramid; std::vector vMaskLapLacePyramid; std::vector vResultLaplacePyramid; cv::Mat CvInferMat_f, CvTargetMat_f, Mask_f; CvInferMat.convertTo(CvInferMat_f, CV_32FC3,1.f/255.f); CvTargetMat.convertTo(CvTargetMat_f, CV_32FC3, 1.f / 255.f); Mask.convertTo(Mask_f, CV_32F, 1.f / 255.f); GetLaplacePyramid(CvInferMat_f, vInferLaplacePyramid, nLayer, nSize, Sigma); GetLaplacePyramid(CvTargetMat_f, vTargetLaplacePyramid, nLayer, nSize, Sigma); GetDogPyramid(Mask_f, vMaskLapLacePyramid, nLayer); CalBlendImage(vInferLaplacePyramid,vTargetLaplacePyramid,vMaskLapLacePyramid,vResultLaplacePyramid); //get the result image int nVecLength = vResultLaplacePyramid.size(); cv::Mat UpLayer; cv::Mat CurrentMat = vResultLaplacePyramid.back(); for (int level = nVecLength -2 ; level >= 0; level--) { cv::pyrUp(CurrentMat, UpLayer, vResultLaplacePyramid[level].size()); CurrentMat = vResultLaplacePyramid[level] + UpLayer; } CurrentMat.convertTo(CurrentMat, CV_8UC3,255); //获取结果 unsigned char* pModifyPart = pPano + InferRc.y * nPanoPitch + InferRc.x * 3; for (int i = 0; i < InferRc.height; i++) { memcpy( pModifyPart + i * nPanoPitch, CurrentMat.ptr(i), InferRc.width * 3 ); } } /// /// Gets the laplace fusion image. /// /// The p fusion image. /// Width of the n. /// Height of the n. /// The n pitch. void ImageFusion::GetLaplaceFusionImage(unsigned char* pFusionImage, int& nWidth, int& nHeight, int& nPitch) { pFusionImage = this->pPanoImageBuffer; nWidth = this->nPanoWidth; nHeight = this->nPanoHeight; nPitch = this->nPanoPitch; } void ImageFusion::FusionImageByThinRectWindowSlideMean(unsigned char* pPano, cv::Rect InferRc, int nSize, int nType, float Sigma) { cv::Mat Kernal; if (nType == 0) { Kernal = CreateThinRectWindow(1, nSize, nType, 1); } else if(nType == 1) { Kernal = CreateThinRectWindow(1, nSize, nType, 1); } //first get the fusion part in pano image cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano); //get infer image from PanoImage cv::Rect WidenInferRect = InferRc; WidenInferRect.x -= nSize / 2; WidenInferRect.width += nSize; cv::Mat InferImage = PanoImage(WidenInferRect); cv::Mat _32FInferImage; InferImage.convertTo(_32FInferImage, CV_32F); std::vector vSingleChannelImage; cv::split(_32FInferImage, vSingleChannelImage); cv::Mat MergeImage; cv::Mat Afterfilter; for (int i = 0; i < 3; i++) { cv::filter2D(vSingleChannelImage[i], vSingleChannelImage[i], -1, Kernal); } cv::merge(vSingleChannelImage, MergeImage); MergeImage.convertTo(Afterfilter, CV_8U); Afterfilter.copyTo(InferImage); } /// /// Fusions the image by thin rect window slide mean. /// /// The panorama Image to be modified. /// The Overlap Rect. /// The Overlap infer img. /// The Overlap Rc in Current Image. /// The Overlap img. /// The Overlap rc in Target Image. /// Size of the Filter. /// Type of the Filter /// 0.Mean 1.Gauss. /// The sigma. void ImageFusion::FusionImageByThinRectWindowSlideMean(unsigned char* pPano, cv::Rect InferRc, cv::Mat& InferImg, cv::Rect CurCoordinateRc, cv::Mat& TargetImg, cv::Rect TargetRc, int nSize, int nType, float Sigma) { if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 || TargetRc.width == 0 || TargetRc.height == 0) return; cv::Mat Kernal; if (nType == 0) { Kernal = CreateThinRectWindow(1, nSize, nType, 1); } else if (nType == 1) { Kernal = CreateThinRectWindow(1, nSize, nType, 1.5); } //first get the fusion part in pano image cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano); //get infer image from PanoImage cv::Rect WidenPanoRect = InferRc; //WidenPanoRect.x -= nSize / 2; //WidenPanoRect.width += nSize; cv::Mat InferImage = PanoImage(WidenPanoRect); cv::Mat InferImageCopy = InferImage.clone(); if (WidenPanoRect.width % 2 == 1) TargetRc.width -= 1; //get infer image from inferImage and Taraget image cv::Rect WidenInferRect = CurCoordinateRc; //WidenInferRect.x -= (nSize-1) / 2; WidenInferRect.width = WidenPanoRect.width /2; if(WidenInferRect.width != 0) InferImg(WidenInferRect).copyTo(InferImageCopy(cv::Rect(0, 0, WidenInferRect.width, WidenInferRect.height))); cv::Rect WidenTargetRect = TargetRc; WidenTargetRect.x += WidenPanoRect.width / 2; WidenTargetRect.width = WidenPanoRect.width / 2; if (TargetRc.width != 0) TargetImg(WidenTargetRect).copyTo(InferImageCopy(cv::Rect(0 + WidenInferRect.width, 0, WidenTargetRect.width, WidenTargetRect.height))); cv::Mat _32FInferImage; InferImageCopy.convertTo(_32FInferImage, CV_32F); std::vector vSingleChannelImage; cv::split(_32FInferImage, vSingleChannelImage); cv::Mat MergeImage; cv::Mat Afterfilter; for (int i = 0; i < 3; i++) { //cv::filter2D(vSingleChannelImage[i], vSingleChannelImage[i], -1, Kernal); for (int ImageRows = 0; ImageRows < vSingleChannelImage[i].rows; ImageRows++) { cv::Mat CurRow = vSingleChannelImage[i](cv::Range(ImageRows, ImageRows+1),cv::Range(0, _32FInferImage.cols)); cv::Mat ResRow; Convolution1D(CurRow, ResRow, Kernal); ResRow.copyTo(vSingleChannelImage[i](cv::Range(ImageRows, ImageRows + 1), cv::Range(0, _32FInferImage.cols))); } } cv::merge(vSingleChannelImage, MergeImage); MergeImage.convertTo(Afterfilter, CV_8U); Afterfilter.copyTo(InferImage); } cv::Mat ImageFusion::CreateThinRectWindow(int nHeight, int nWidth, int nType, int nChannels) { //window size must be odd if (nWidth % 2 == 0 || nHeight % 2 == 0) return cv::Mat(); cv::Mat Window; if(nChannels == 1) Window = cv::Mat(nHeight, nWidth, CV_32F, cv::Scalar(0)); else Window = cv::Mat(nHeight, nWidth, CV_32FC3, cv::Scalar(0,0,0)); //according to type to set the value of the window //the center value in window if (nType == 0) { Window.setTo(cv::Scalar::all(1)); Window = Window / nWidth; } else if(nType == 1) { //get center of the filter int nCenterX = nWidth / 2; //calcute the weight of the filter float sum = 0.0; for (int i = 0; i < nWidth; i++) { float x = i - nCenterX; float weight = CaculateGaussWeight(x, 1.5); sum += weight; Window.at(i) = weight; } for (int i = 0; i < nWidth; i++) { Window.at(i) /= sum; } } return Window; } /// /// Caculates the gauss weight. /// /// The x. /// The sigma. /// double ImageFusion::CaculateGaussWeight(double x, double sigma) { return exp(-(x * x) / (2 * sigma * sigma)) / (sqrt(2 * M_PI) * sigma); } void ImageFusion::Convolution2D(cv::Mat& Src, cv::Mat& Dst, cv::Mat& Kernel) { int nChannels = Src.channels(); int nRows = Src.rows; int nCols = Src.cols * nChannels; int nKernelRows = Kernel.rows; int nKernelCols = Kernel.cols * nChannels; int nKernelCenterX = nKernelCols / 2; int nKernelCenterY = nKernelRows / 2; Dst = cv::Mat(nRows, nCols, CV_32FC3, cv::Scalar(0, 0, 0)); for (int i = 0; i < nRows; i++) { float* pDst = Dst.ptr(i); for (int j = 0; j < nCols; j++) { float sum = 0.0; for (int m = 0; m < nKernelRows; m++) { int nSrcRow = i + m - nKernelCenterY; if (nSrcRow < 0) nSrcRow = -nSrcRow; else if (nSrcRow >= nRows) nSrcRow = nRows - (nSrcRow - nRows) - 1; float* pSrc = Src.ptr(nSrcRow); float* pKernel = Kernel.ptr(m); for (int n = 0; n < nKernelCols; n++) { int nSrcCol = j + n - nKernelCenterX; if (nSrcCol < 0) nSrcCol = -nSrcCol; else if (nSrcCol >= nCols) nSrcCol = nCols - (nSrcCol - nCols) - 1; sum += pSrc[nSrcCol] * pKernel[n]; } } pDst[j] = sum; } } } void ImageFusion::Convolution1D(cv::Mat& Src, cv::Mat& Dst, cv::Mat& Kernel) { int nRows = Src.rows; int nCols = Src.cols; if (nRows == 0 || nCols == 0) return; if (!(Kernel.cols % 2 != 0)) cv::error((CV_StsInternal), "Convolution1D", "Assertion: " "Kernel.cols % 2 == 0" " failed", "D:\\kang\\360stitching\\QtCameraHardWareCopilot\\QtCameraHardWareCopilot\\ImageFusion.cpp", 428); if ((Kernel.channels() != 1)) cv::error(cv::Error::StsAssert, "Kernel.channels() != 1", __FUNCTION__, "D:\\kang\\360stitching\\QtCameraHardWareCopilot\\QtCameraHardWareCopilot\\ImageFusion.cpp", 433); int nKernelRows = Kernel.rows; int nKernelCols = Kernel.cols; int nKernelCenterX = nKernelCols / 2 + 1; //constraction the Dst cv::Mat WidenSrc = cv::Mat(nRows, nCols + nKernelCols - 1, CV_32F, cv::Scalar(0)); Dst = cv::Mat(nRows, nCols, CV_32F, cv::Scalar(0)); Src.copyTo(WidenSrc(cv::Range(0, 1), cv::Range(nKernelCols / 2, nCols + nKernelCols / 2))); if (nKernelCols < nRows) for (int Bordi = 0; Bordi < nKernelCols / 2; Bordi++) { WidenSrc.ptr(0)[nKernelCols / 2 - 1 - Bordi] = WidenSrc.ptr(0)[nKernelCols / 2 + 1 + Bordi]; WidenSrc.ptr(0)[nCols + nKernelCols / 2 + Bordi] = WidenSrc.ptr(0)[nCols + nKernelCols / 2 - 2 - Bordi]; } else if (nKernelCols != 1) for (int Bordi = 0; Bordi < nKernelCols / 2; Bordi++) { WidenSrc.ptr(0)[nKernelCols / 2 - 1 - Bordi] = WidenSrc.ptr(0)[nKernelCols / 2 + 1]; WidenSrc.ptr(0)[nCols + nKernelCols / 2 + Bordi] = WidenSrc.ptr(0)[nCols + nKernelCols / 2 - 2]; } else for (int Bordi = 0; Bordi < nKernelCols / 2; Bordi++) { WidenSrc.ptr(0)[nKernelCols / 2 - 1 - Bordi] = WidenSrc.ptr(0)[nKernelCols / 2]; WidenSrc.ptr(0)[nCols + nKernelCols / 2 + Bordi] = WidenSrc.ptr(0)[nCols + nKernelCols / 2 - 1]; } if (nKernelCols <= 7) { __m256 Kernal; for (int i = 0; i < nKernelCols; i++) { Kernal.m256_f32[i] = Kernel.ptr(0)[i]; } //begin to convolution for (int i = 0; i < nCols; i++) { __m256 mmSum = _mm256_setzero_ps(); __m256 mmSrc = _mm256_setzero_ps(); memcpy(mmSrc.m256_f32, WidenSrc.ptr(0)+i, sizeof(float) * nKernelCols); mmSum = (mmSum, _mm256_mul_ps(mmSrc, Kernal)); for (int j = 0; j < nKernelCols; j++) { Dst.ptr(0)[i] += mmSum.m256_f32[j]; } WidenSrc.ptr(0)[nKernelCenterX + i] = Dst.ptr(0)[i]; } } } void ImageFusion::FusionImageByBlendingGradient(unsigned char* pPano, cv::Rect InferRc, cv::Mat& InferImg, cv::Rect CurCoordinateRc, cv::Mat& TargetImg, cv::Rect TargetRc) { if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 || TargetRc.width == 0 || TargetRc.height == 0) return; if (CurCoordinateRc.width + CurCoordinateRc.x > InferImg.cols || CurCoordinateRc.height + CurCoordinateRc.y > InferImg.rows || InferRc.height + InferRc.y > nPanoHeight || TargetRc.width + TargetRc.x > TargetImg.cols || TargetRc.height + TargetRc.y > TargetImg.rows || CurCoordinateRc.x < 0 || CurCoordinateRc.y < 0 || TargetRc.x < 0 || TargetRc.y < 0 ) return; //first get the fusion part in pano image cv::Mat PanoImage = cv::Mat(this->nPanoHeight, this->nPanoWidth, CV_8UC3, pPano); if (PanoImage.empty()) return; //get infer image from PanoImage cv::Rect WidenPanoRect = InferRc; //WidenPanoRect.x -= nSize / 2; //WidenPanoRect.width += nSize; cv::Mat InferImage = PanoImage(WidenPanoRect); cv::Mat InferImageCopy = InferImage.clone(); if (WidenPanoRect.width % 2 == 1) TargetRc.width -= 1; cv::Mat InferPart, TargetPart; //get infer image from inferImage and Taraget image cv::Rect WidenInferRect = CurCoordinateRc; //WidenInferRect.x -= (nSize-1) / 2; //WidenInferRect.width = WidenPanoRect.width / 2; if (WidenInferRect.width != 0) InferPart = InferImg(WidenInferRect).clone(); else return; cv::Rect WidenTargetRect = TargetRc; //WidenTargetRect.x += WidenPanoRect.width / 2; //WidenTargetRect.width = WidenPanoRect.width / 2; if (TargetRc.width != 0) TargetPart = TargetImg(WidenTargetRect).clone(); else return; if (InferPart.cols != TargetPart.cols) return; //create the weight for two blend image cv::Mat InferWeight, TargetWeight; std::vector Useless; GetGradientMask(CurCoordinateRc, TargetRc, InferWeight, TargetWeight, Useless); cv::Mat Res; cv::blendLinear(InferPart, TargetPart, InferWeight, TargetWeight, Res); Res.copyTo(PanoImage(WidenPanoRect)); } void ImageFusion::FusionImageByBlendingGradientYUV(unsigned char* pPano, cv::Rect InferRc, unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, cv::Rect CurCoordinateRc, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Rect TargetRc, bool bUseSSE2) { if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 || TargetRc.width == 0 || TargetRc.height == 0) return; if (CurCoordinateRc.width + CurCoordinateRc.x > nInferWidth || CurCoordinateRc.height + CurCoordinateRc.y > nInferHeight || InferRc.height + InferRc.y > nPanoHeight || TargetRc.width + TargetRc.x > nTargetWidth || TargetRc.height + TargetRc.y > nInferHeight || CurCoordinateRc.x < 0 || CurCoordinateRc.y < 0 || TargetRc.x < 0 || TargetRc.y < 0 ) return; /***test***/ cv::Mat matPanoImage = cv::Mat(nPanoHeight * 3 / 2, nPanoWidth, CV_8U, pPano); /***test***/ //get infer image from PanoImage cv::Rect WidenPanoRect = InferRc; if (WidenPanoRect.width % 2 == 1) TargetRc.width -= 1; //get infer image from inferImage and Taraget image cv::Rect WidenInferRect = CurCoordinateRc; //申请一块内存,用于存放裁剪后的infer图像数据 unsigned char* pInferCorp = new unsigned char[ ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height * 3 / 2 ]; if (WidenInferRect.width != 0) //InferPart = InferImg(WidenInferRect).clone(); { libyuv::ConvertToI420( pInfer, nInferPitch * nInferHeight *3/2, pInferCorp, ALIGN_4(CurCoordinateRc.width), pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width) / 2, pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height * 5 / 4, ALIGN_4(CurCoordinateRc.width) / 2, CurCoordinateRc.x, CurCoordinateRc.y, nInferWidth, nInferHeight, ALIGN_4(CurCoordinateRc.width), CurCoordinateRc.height, libyuv::kRotate0, libyuv::FOURCC_I420 ); } else return; cv::Rect WidenTargetRect = TargetRc; //申请一块内存,用于存放裁剪后的Target图像数据 unsigned char* pTargetCorp = new unsigned char[ ALIGN_4(TargetRc.width) * TargetRc.height * 3 / 2 ]; if (TargetRc.width != 0) //TargetPart = TargetImg(WidenTargetRect).clone(); { libyuv::ConvertToI420( pTarget, nTargetPitch * nTargetHeight *3/2, pTargetCorp, ALIGN_4(TargetRc.width), pTargetCorp + ALIGN_4(TargetRc.width) * TargetRc.height, ALIGN_4(TargetRc.width) / 2, pTargetCorp + ALIGN_4(TargetRc.width) * TargetRc.height * 5 / 4, ALIGN_4(TargetRc.width) / 2, TargetRc.x, TargetRc.y, nTargetWidth, nTargetHeight, ALIGN_4(TargetRc.width), TargetRc.height, libyuv::kRotate0, libyuv::FOURCC_I420 ); } else return; //create the weight for two blend image cv::Mat InferWeight, TargetWeight; std::vector Useless; GetGradientMask(CurCoordinateRc, TargetRc, InferWeight, TargetWeight, Useless); unsigned char* pBlenderPart = NULL; cv::Mat testInferImage = cv::Mat(nTargetHeight * 3 / 2, ALIGN_4(TargetRc.width), CV_8U, pInfer); cv::Mat testTargetImage = cv::Mat(nTargetHeight * 3 / 2, ALIGN_4(TargetRc.width), CV_8U, pTarget); TimerCounter Timer; Timer.Start(); /*BlendingLinearYUV( pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width), pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width), InferWeight, TargetWeight, pBlenderPart, true );*/ BlendingLinearYUV( pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width), pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width), InferWeight, TargetWeight, pBlenderPart,true ); Timer.Stop(); /***test***/ cv::Mat matTargetImage = cv::Mat(TargetRc.height * 3 / 2, TargetRc.width+2, CV_8U, pBlenderPart); /***test***/ PastePartInWholeImageYUV( pPano, nPanoWidth, nPanoHeight, nPanoPitch, pBlenderPart, WidenPanoRect.width, WidenPanoRect.height, ALIGN_4(WidenPanoRect.width), WidenPanoRect.x, WidenPanoRect.y, false ); } void ImageFusion::FusionImageByBlendingGradientYUVByGpu(unsigned char* pPano, cv::Rect InferRc, unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, cv::Rect CurCoordinateRc, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Rect TargetRc) { if (CurCoordinateRc.width == 0 || CurCoordinateRc.height == 0 || TargetRc.width == 0 || TargetRc.height == 0) return; if (CurCoordinateRc.width + CurCoordinateRc.x > nInferWidth || CurCoordinateRc.height + CurCoordinateRc.y > nInferHeight || InferRc.height + InferRc.y > nPanoHeight || TargetRc.width + TargetRc.x > nTargetWidth || TargetRc.height + TargetRc.y > nInferHeight || CurCoordinateRc.x < 0 || CurCoordinateRc.y < 0 || TargetRc.x < 0 || TargetRc.y < 0 ) return; //get infer image from PanoImage cv::Rect WidenPanoRect = InferRc; if (WidenPanoRect.width % 2 == 1) TargetRc.width -= 1; //get infer image from inferImage and Taraget image cv::Rect WidenInferRect = CurCoordinateRc; //申请一块内存,用于存放裁剪后的infer图像数据 unsigned char* pInferCorp = nullptr; cudaMalloc((void**)&pInferCorp, ALIGN_4(WidenInferRect.width) * WidenInferRect.height * 3 / 2); cv::Rect WidenTargetRect = TargetRc; //申请一块内存,用于存放裁剪后的Target图像数据 unsigned char* pTargetCorp = nullptr; cudaMalloc((void**)&pTargetCorp, ALIGN_4(TargetRc.width) * TargetRc.height * 3 / 2); cudaError Err = cudaDeviceSynchronize(); if (WidenInferRect.width != 0) //InferPart = InferImg(WidenInferRect).clone(); { unsigned char* pSrcCorpY = pInfer + WidenInferRect.y * nInferPitch + WidenInferRect.x; unsigned char* pSrcCorpU = pSrcCorpY + nInferPitch * nInferHeight + WidenInferRect.y * nInferPitch / 4 + WidenInferRect.x / 2; YUVTailorAndBlender::CropI420( pInfer, nInferPitch, pInfer + nInferPitch * nInferHeight, nInferPitch / 2, pInfer + nInferPitch * nInferHeight * 5 / 4, nInferPitch / 2, pInferCorp, ALIGN_4(CurCoordinateRc.width), pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width) / 2, pInferCorp + ALIGN_4(CurCoordinateRc.width) * CurCoordinateRc.height * 5 / 4, ALIGN_4(CurCoordinateRc.width) / 2, CurCoordinateRc.width, CurCoordinateRc.height, CurCoordinateRc.x, CurCoordinateRc.y ); } else return; if (TargetRc.width != 0) //TargetPart = TargetImg(WidenTargetRect).clone(); { YUVTailorAndBlender::CropI420( pTarget, nTargetPitch, pTarget + nTargetPitch * nTargetHeight, nTargetPitch / 2, pTarget + nTargetPitch * nTargetHeight * 5 / 4, nTargetPitch / 2, pTargetCorp, ALIGN_4(WidenTargetRect.width), pTargetCorp + ALIGN_4(WidenTargetRect.width) * WidenTargetRect.height, ALIGN_4(WidenTargetRect.width) / 2, pTargetCorp + ALIGN_4(WidenTargetRect.width) * WidenTargetRect.height * 5 / 4, ALIGN_4(WidenTargetRect.width) / 2, WidenTargetRect.width, WidenTargetRect.height, WidenTargetRect.x, WidenTargetRect.y ); } else return; Err = cudaDeviceSynchronize(); //create the weight for two blend image cv::Mat InferWeight, TargetWeight; std::vector Useless; GetGradientMask(CurCoordinateRc, TargetRc, InferWeight, TargetWeight, Useless); unsigned char* pBlenderPart = NULL; TimerCounter Timer; Timer.Start(); cudaMalloc((void**) & pBlenderPart, ALIGN_4(TargetRc.width) * TargetRc.height * 3 / 2); /*BlendingLinearYUV( pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width), pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width), InferWeight, TargetWeight, pBlenderPart, true );*/ BlendingLinearYUVByGpu( pInferCorp, CurCoordinateRc.width, CurCoordinateRc.height, ALIGN_4(CurCoordinateRc.width), pTargetCorp, TargetRc.width, TargetRc.height, ALIGN_4(TargetRc.width), InferWeight, TargetWeight, pBlenderPart ); Timer.Stop(); Err = cudaDeviceSynchronize(); if(Err ==700) int a = 0; /***test***/ //cv::Mat matTargetImage = cv::Mat(TargetRc.height * 3 / 2, TargetRc.width + 2, CV_8U, pBlenderPart); /***test***/ PastePartInWholeImageYUVByGpu( pPano, nPanoWidth, nPanoHeight, nPanoPitch, pBlenderPart, WidenPanoRect.width, WidenPanoRect.height, ALIGN_4(WidenPanoRect.width), WidenPanoRect.x, WidenPanoRect.y, NULL ); Err = cudaDeviceSynchronize(); cudaFree(pBlenderPart); cudaFree(pTargetCorp); cudaFree(pInferCorp); } void ImageFusion::GetGradientMask(cv::Rect Infer, cv::Rect Target, cv::Mat& MaskTarget, cv::Mat& MaskInfer, std::vector& bAddOrSub) { bAddOrSub.resize(3); //according to intersect rect rgb avg val to get the light base in this image cv::Mat WeigthMapTarget = cv::Mat(cv::Size(Infer.width, Infer.height), CV_8U, cv::Scalar(255)); cv::Mat WeightMapInfer = cv::Mat(cv::Size(Infer.width, Infer.height), CV_8U, cv::Scalar(255)); cv::Mat DistanceMaskTarget, DistanceMaskInfer; //set a column 0 in Mask WeigthMapTarget(cv::Rect(Infer.width - 1, 0, 1, Infer.height)).setTo(cv::Scalar(0)); WeightMapInfer(cv::Rect(0, 0, 1, Infer.height)).setTo(cv::Scalar(0)); DistanceMaskTarget.create(WeigthMapTarget.size(), WeigthMapTarget.type()); DistanceMaskInfer.create(WeightMapInfer.size(), WeightMapInfer.type()); //calculate a Weightmap in DistanceMask distanceATS_L1_8u(WeigthMapTarget.data, DistanceMaskTarget.cols, DistanceMaskTarget.rows, DistanceMaskTarget.step, DistanceMaskTarget.data); distanceATS_L1_8u(WeightMapInfer.data, DistanceMaskInfer.cols, DistanceMaskInfer.rows, DistanceMaskInfer.step, DistanceMaskInfer.data); std::shared_ptr pfLightBaseDif(new float[3] {0}); cv::Mat vTargetChannels, vInferChannels; DistanceMaskTarget.convertTo(DistanceMaskTarget, CV_32F); DistanceMaskInfer.convertTo(DistanceMaskInfer, CV_32F); //step is according to inferRc`s width float fStep = abs(1.f / Infer.width); vTargetChannels = DistanceMaskTarget * fStep; vInferChannels = DistanceMaskInfer * fStep; MaskTarget = (vTargetChannels); MaskInfer = (vInferChannels); } void ImageFusion::BlendingLinearYUV(unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Mat& cvInferWeight, cv::Mat& cvTargetWeight, unsigned char*& pResult, bool bUseAVX) { //获取YUV三个通道的分量的指针 unsigned char* pInferY = pInfer; unsigned char* pInferU = pInfer + nInferPitch * nInferHeight; unsigned char* pInferV = pInfer + nInferPitch * nInferHeight + (nInferPitch * nInferHeight) / 4; unsigned char* pTargetY = pTarget; unsigned char* pTargetU = pTarget + nTargetPitch * nTargetHeight; unsigned char* pTargetV = pTarget + nTargetPitch * nTargetHeight * 5 / 4; //默认infer 和 target的 大小一样的,且都能被整除,或者整采样 assert(nInferHeight == nTargetHeight || nInferWidth == nTargetWidth); assert(nInferHeight == cvInferWeight.rows || nInferWidth == cvInferWeight.cols || nTargetHeight == cvTargetWeight.rows || nTargetWidth == cvTargetWeight.cols ); assert(nInferWidth % 2 == 0); assert(nInferHeight % 2 == 0); //设置一个大小与之相同的dst,分别有uchar和float类型的 if (pResult != NULL) delete[]pResult; pResult = new unsigned char[nInferPitch * cvInferWeight.rows * 3 / 2]; //计算得出result的分区指针 unsigned char* pDstY = pResult; unsigned char* pDstU = pResult + nInferPitch * cvInferWeight.rows; unsigned char* pDstV = pResult + nInferPitch * cvInferWeight.rows * 5 / 4; int nDstPitch = nInferPitch; int nDstUVPitch = nInferPitch / 2; //开始计算 cv::Mat InferImage = cv::Mat(nInferHeight * 3 / 2, nInferWidth +2, CV_8U, pInferY); cv::Mat TargetImage = cv::Mat(nInferHeight * 3 / 2, nInferWidth +2, CV_8U, pTargetY); cv::Mat ResultImage = cv::Mat(nInferHeight * 3 / 2, nInferWidth+2, CV_8U, pDstY); if (bUseAVX) { //计算Y通道 for (int i = 0; i < cvInferWeight.rows; i++) { for (int j = 0; j < cvInferWeight.cols; ) { __m256 mInferWeight, mTargetWeight; int nOffsetMax = 0; if (j + 8 < cvInferWeight.cols) { //本模块内所有的宽度均进行了宽度测试所以均可以满足被4整除 mInferWeight = _mm256_loadu_ps(cvInferWeight.ptr(i) + j); mTargetWeight = _mm256_loadu_ps(cvTargetWeight.ptr(i) + j); nOffsetMax = 8; } else { nOffsetMax = cvInferWeight.cols - j; for (int offset = 0; offset < nOffsetMax; offset++) { mInferWeight.m256_f32[offset] = cvInferWeight.ptr(i)[j + offset]; mTargetWeight.m256_f32[offset] = cvTargetWeight.ptr(i)[j + offset]; } } unsigned char* pInferPointer = pInferY + i * nInferPitch + j; unsigned char* pTargetPointer = pTarget + i * nTargetPitch + j; unsigned char* pResultPointer = pDstY + nDstPitch * i + j; __m256 mTargetData, mInferData; for (int offset = 0; offset < nOffsetMax; offset++) { mTargetData.m256_f32[offset] = float(*(pTargetPointer + offset)); mInferData.m256_f32[offset] = float(*( pInferPointer+ offset)); } __m256 Res,WeightAdd; Res = _mm256_add_ps(_mm256_mul_ps(mInferWeight, mInferData), _mm256_mul_ps(mTargetWeight, mTargetData)); WeightAdd = _mm256_add_ps(mInferWeight, mTargetWeight); for (int offset = 0; offset < nOffsetMax; offset++) { *(pResultPointer + offset) = unsigned char(Res.m256_f32[offset]/WeightAdd.m256_f32[offset]); } j += 8; } } //计算UV通道 for (int i = 0; i < cvInferWeight.rows; i+=2) { for (int j = 0; j < cvInferWeight.cols; ) { if (i % 2 != 0 && j % 2 != 0) continue; int X = j / 2; int Y = i / 2; __m256 mInferWeight, mTargetWeight; int nOffsetMax = 0; if (j + 16 < cvInferWeight.cols) { nOffsetMax = 16; //本模块内所有的宽度均进行了宽度测试所以均可以满足被4整除 for (int offset = 0; offset < nOffsetMax; offset += 2) { mInferWeight.m256_f32[offset / 2] = cvInferWeight.ptr(i)[j + offset]; mTargetWeight.m256_f32[offset / 2] = cvTargetWeight.ptr(i)[j + offset]; } } else { nOffsetMax = cvInferWeight.cols - j; for (int offset = 0; offset < nOffsetMax; offset += 2) { mInferWeight.m256_f32[offset / 2] = cvInferWeight.ptr(i)[j + offset]; mTargetWeight.m256_f32[offset / 2] = cvTargetWeight.ptr(i)[j + offset]; } } unsigned char* pInferPointerU = pInferU + Y * nInferPitch / 2 + X; unsigned char* pInferPointerV = pInferV + Y * nInferPitch / 2 + X; unsigned char* pTargetPointerU = pTargetU + Y * nTargetPitch / 2 + X; unsigned char* pTargetPointerV = pTargetV + Y * nTargetPitch / 2 + X; unsigned char* pResultPointerU = pDstU + Y * nDstUVPitch + X; unsigned char* pResultPointerV = pDstV + Y * nDstUVPitch + X; __m256 mTargetDataU, mTargetDataV, mInferDataU, mInferDataV; for (int offset = 0; offset < nOffsetMax / 2; offset++) { mTargetDataU.m256_f32[offset] = float(*(pInferPointerU + offset)); mTargetDataV.m256_f32[offset] = float(*(pInferPointerV + offset)); mInferDataU.m256_f32[offset] = float(*(pTargetPointerU + offset)); mInferDataV.m256_f32[offset] = float(*(pTargetPointerV + offset)); } __m256 ResU, ResV, WeightAdd; ResU = _mm256_add_ps(_mm256_mul_ps(mInferWeight, mInferDataU), _mm256_mul_ps(mTargetWeight, mTargetDataU)); ResV = _mm256_add_ps(_mm256_mul_ps(mInferWeight, mInferDataV), _mm256_mul_ps(mTargetWeight, mTargetDataV)); WeightAdd = _mm256_add_ps(mInferWeight, mTargetWeight); for (int offset = 0; offset < nOffsetMax / 2; offset++) { *(pResultPointerU + offset) = unsigned char(ResU.m256_f32[offset]/ WeightAdd.m256_f32[offset]); *(pResultPointerV + offset) = unsigned char(ResV.m256_f32[offset] / WeightAdd.m256_f32[offset]); } j += 16; } } } else { for (int i = 0; i < cvInferWeight.rows; i++) { for (int j = 0; j < cvInferWeight.cols; j++) { //计算Y通道的权重 float fInferWeight = cvInferWeight.at(i, j); float fTargetWeight = cvTargetWeight.at(i, j); //计算Y通道的值 float fY = fInferWeight * pInferY[i * nInferPitch + j] + fTargetWeight * pTargetY[i * nTargetPitch + j]; float fWeight = fInferWeight + fTargetWeight; pDstY[i * nDstPitch + j] = unsigned char(fY / fWeight); //计算U通道的值 //当两个通道均被2整除则说明是采样的位置 if (i % 2 == 0 && j % 2 == 0) { int X = j / 2; int Y = i / 2; float fU = fInferWeight * pInferU[Y * nDstUVPitch + X] + fTargetWeight * pTargetU[Y * nDstUVPitch + X]; float fV = fInferWeight * pInferV[Y * nDstUVPitch + X] + fTargetWeight * pTargetV[Y * nDstUVPitch + X]; //计算X和Y在i和j中的偏移,之后将之转换到真正的图像坐标内来 //int nTotal = Y * nDstUVPitch + X; //int nTrueY = nTotal / nDstPitch; //int nTrueX = nTotal % nDstPitch; pDstU[Y * nDstUVPitch + X] = unsigned char(fU / fWeight); pDstV[Y * nDstUVPitch + X] = unsigned char(fV / fWeight); } } } } } void ImageFusion::BlendingLinearYUVByGpu(unsigned char* pInfer, int nInferWidth, int nInferHeight, int nInferPitch, unsigned char* pTarget, int nTargetWidth, int nTargetHeight, int nTargetPitch, cv::Mat& cvInferWeight, cv::Mat& cvTargetWeight, unsigned char*& pResult) { //默认infer 和 target的 大小一样的,且都能被整除,或者整采样 assert(nInferHeight == nTargetHeight || nInferWidth == nTargetWidth); assert(nInferHeight == cvInferWeight.rows || nInferWidth == cvInferWeight.cols || nTargetHeight == cvTargetWeight.rows || nTargetWidth == cvTargetWeight.cols ); assert(nInferWidth % 2 == 0); //assert(nInferHeight % 2 == 0); if (nInferHeight % 2 != 0) { nInferHeight -= 1; nTargetHeight -= 1; } //首先将数据传入到GPU中 float* pDevInferMask = NULL, * pDevTargetMask = NULL; cudaMalloc((void**)&pDevTargetMask,cvTargetWeight.step* cvTargetWeight.rows); cudaMalloc((void**)&pDevInferMask,cvInferWeight.step* cvInferWeight.rows); //传输 cudaError Err = cudaMemcpy(pDevTargetMask, cvTargetWeight.data, cvTargetWeight.step * cvTargetWeight.rows, cudaMemcpyHostToDevice); Err = cudaMemcpy(pDevInferMask, cvInferWeight.data, cvInferWeight.step * cvInferWeight.rows, cudaMemcpyHostToDevice); if (Err != 0) int i = 10; //计算得出result的分区指针 unsigned char* pDstY = pResult; unsigned char* pDstU = pResult + nInferPitch * cvInferWeight.rows; unsigned char* pDstV = pResult + nInferPitch * cvInferWeight.rows * 5 / 4; int nDstPitch = nInferPitch; int nDstUVPitch = nInferPitch / 2; //获取YUV三个通道的分量的指针 unsigned char* pInferY = pInfer; unsigned char* pInferU = pInfer + nInferPitch * nInferHeight; unsigned char* pInferV = pInfer + nInferPitch * nInferHeight * 5 / 4; unsigned char* pTargetY = pTarget; unsigned char* pTargetU = pTarget + nTargetPitch * nTargetHeight; unsigned char* pTargetV = pTarget + nTargetPitch * nTargetHeight * 5 / 4; YUVTailorAndBlender::GradientBlenderYUV( pInferY, nInferPitch, pInferU, nInferPitch / 2, pInferV, nInferPitch / 2, pTargetY, nTargetPitch, pTargetU, nTargetPitch / 2, pTargetV, nTargetPitch / 2, cvInferWeight.cols, cvInferWeight.rows, pDevInferMask, pDevTargetMask, cvInferWeight.step, pDstY, nDstPitch, pDstU, nDstUVPitch, pDstV, nDstUVPitch, cvInferWeight.cols, cvInferWeight.rows ); //释放内存 cudaFree(pDevInferMask); cudaFree(pDevTargetMask); } void ImageFusion::PastePartInWholeImageYUV(unsigned char* pPanoImg, int nPanoWidth, int nPanoHeight, int nPanoPitch, unsigned char* pPartImg, int nPartWidth, int nPartHeight, int nPartPitch, int nLeft, int nTop, bool bUseAVX) { //chatgtp uint8_t* dst_y = pPanoImg; // 源Y平面地址 uint8_t* dst_u = pPanoImg + nPanoHeight * nPanoPitch; // 源U平面地址 uint8_t* dst_v = pPanoImg + nPanoHeight * nPanoPitch * 5 / 4; // 源V平面地址 int dst_stride_y = nPanoPitch; // 源Y平面跨距 int dst_stride_u = nPanoPitch >> 1; // 源U平面跨距 int dst_stride_v = nPanoPitch >> 1; // 源V平面跨距 uint8_t* src_y = pPartImg; // 目标Y平面地址 uint8_t* src_u = pPartImg + nPartPitch * nPartHeight; // 目标U平面地址 uint8_t* src_v = pPartImg + nPartPitch * nPartHeight * 5 / 4; // 目标V平面地址 int src_stride_y = nPartPitch; // 目标Y平面跨距 int src_stride_u = nPartPitch >> 1; // 目标U平面跨距 int src_stride_v = nPartPitch >> 1; // 目标V平面跨距 int width = nPartWidth; // 拷贝区域宽度 int height = nPartHeight; // 拷贝区域高度 //定位到目标的位置 unsigned char* dst_target_y = dst_y + nTop * nPanoPitch + nLeft; unsigned char* dst_target_u = dst_u + nTop * nPanoPitch / 4 + nLeft / 2; unsigned char* dst_target_v = dst_v + nTop * nPanoPitch / 4 + nLeft / 2; // 拷贝Y平面 libyuv::CopyPlane( src_y, src_stride_y, dst_target_y, dst_stride_y, width, height); // 拷贝U平面 libyuv::CopyPlane( src_u, src_stride_u, dst_target_u, dst_stride_u, width / 2, height / 2); // 拷贝V平面 libyuv::CopyPlane( src_v, src_stride_v, dst_target_v, dst_stride_v, width / 2, height / 2); } void ImageFusion::PastePartInWholeImageYUVByGpu(unsigned char* pPanoImg, int nPanoWidth, int nPanoHeight, int nPanoPitch, unsigned char* pPartImg, int nPartWidth, int nPartHeight, int nPartPitch, int nLeft, int nTop, CUstream* pStream) { unsigned char* pDstY = pPanoImg; unsigned char* pDstU = pPanoImg + nPanoPitch * nPanoHeight; unsigned char* pDstV = pPanoImg + nPanoPitch * nPanoHeight * 5 / 4; int dst_stride_y = nPanoPitch; // 源Y平面跨距 int dst_stride_u = nPanoPitch >> 1; // 源U平面跨距 int dst_stride_v = nPanoPitch >> 1; // 源V平面跨距 uint8_t* src_y = pPartImg; // 目标Y平面地址 uint8_t* src_u = pPartImg + nPartPitch * nPartHeight; // 目标U平面地址 uint8_t* src_v = pPartImg + nPartPitch * nPartHeight * 5 / 4; // 目标V平面地址 int src_stride_y = nPartPitch; // 目标Y平面跨距 int src_stride_u = nPartPitch >> 1; // 目标U平面跨距 int src_stride_v = nPartPitch >> 1; // 目标V平面跨距 int width = nPartWidth; // 拷贝区域宽度 int height = nPartHeight; // 拷贝区域高度 YUVTailorAndBlender::CopyPlane( pPartImg, nPartWidth, nPartHeight, nPartPitch, pPanoImg, nPanoWidth, nPanoHeight, nPanoPitch, nPartWidth, nPartHeight, nLeft, nTop, 1, NULL ); YUVTailorAndBlender::CopyPlane( src_u, nPartWidth / 2, nPartHeight / 2, nPartPitch / 2, pDstU, nPanoWidth / 2, nPanoHeight / 2, nPanoPitch / 2, nPartWidth / 2, nPartHeight / 2, nLeft / 2, nTop / 2, 1, NULL ); YUVTailorAndBlender::CopyPlane( src_v, nPartWidth / 2, nPartHeight / 2, nPartPitch / 2, pDstV, nPanoWidth / 2, nPanoHeight / 2, nPanoPitch / 2, nPartWidth / 2, nPartHeight / 2, nLeft / 2, nTop / 2, 1, NULL ); } void ImageFusion::Init_Gpu(unsigned char* pImage, int nPanoWidth, int nPanoHeight, int nPanoPitch) { pPanoImageBufferGpu = pImage; this->nPanoWidth = nPanoWidth; this->nPanoHeight = nPanoHeight; this->nPanoPitch = nPanoPitch; }