|
@@ -12,7 +12,6 @@
|
|
|
// See the License for the specific language governing permissions and
|
|
|
// limitations under the License.
|
|
|
|
|
|
-
|
|
|
#include <math.h>
|
|
|
#include <stdint.h>
|
|
|
#include <stdio.h>
|
|
@@ -85,7 +84,6 @@ static int readLines(const char *fileName, char *lines[], int max_line)
|
|
|
return i;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1,
|
|
|
float ymax1)
|
|
|
{
|
|
@@ -195,18 +193,22 @@ static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return (
|
|
|
|
|
|
static float deqnt_affine_u8_to_f32(uint8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
|
|
|
|
|
|
-static void compute_dfl(float* tensor, int dfl_len, float* box){
|
|
|
- for (int b=0; b<4; b++){
|
|
|
+static void compute_dfl(float *tensor, int dfl_len, float *box)
|
|
|
+{
|
|
|
+ for (int b = 0; b < 4; b++)
|
|
|
+ {
|
|
|
float exp_t[dfl_len];
|
|
|
- float exp_sum=0;
|
|
|
- float acc_sum=0;
|
|
|
- for (int i=0; i< dfl_len; i++){
|
|
|
- exp_t[i] = exp(tensor[i+b*dfl_len]);
|
|
|
+ float exp_sum = 0;
|
|
|
+ float acc_sum = 0;
|
|
|
+ for (int i = 0; i < dfl_len; i++)
|
|
|
+ {
|
|
|
+ exp_t[i] = exp(tensor[i + b * dfl_len]);
|
|
|
exp_sum += exp_t[i];
|
|
|
}
|
|
|
-
|
|
|
- for (int i=0; i< dfl_len; i++){
|
|
|
- acc_sum += exp_t[i]/exp_sum *i;
|
|
|
+
|
|
|
+ for (int i = 0; i < dfl_len; i++)
|
|
|
+ {
|
|
|
+ acc_sum += exp_t[i] / exp_sum * i;
|
|
|
}
|
|
|
box[b] = acc_sum;
|
|
|
}
|
|
@@ -219,7 +221,7 @@ static int process_u8(uint8_t *box_tensor, int32_t box_zp, float box_scale,
|
|
|
std::vector<float> &boxes,
|
|
|
std::vector<float> &objProbs,
|
|
|
std::vector<int> &classId,
|
|
|
- float threshold,int OBJ_CLASS_NUM)
|
|
|
+ float threshold, int OBJ_CLASS_NUM)
|
|
|
{
|
|
|
int validCount = 0;
|
|
|
int grid_len = grid_h * grid_w;
|
|
@@ -291,10 +293,10 @@ static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
|
|
|
int8_t *score_tensor, int32_t score_zp, float score_scale,
|
|
|
int8_t *score_sum_tensor, int32_t score_sum_zp, float score_sum_scale,
|
|
|
int grid_h, int grid_w, int stride, int dfl_len,
|
|
|
- std::vector<float> &boxes,
|
|
|
- std::vector<float> &objProbs,
|
|
|
- std::vector<int> &classId,
|
|
|
- float threshold,int OBJ_CLASS_NUM)
|
|
|
+ std::vector<float> &boxes,
|
|
|
+ std::vector<float> &objProbs,
|
|
|
+ std::vector<int> &classId,
|
|
|
+ float threshold, int OBJ_CLASS_NUM)
|
|
|
{
|
|
|
int validCount = 0;
|
|
|
int grid_len = grid_h * grid_w;
|
|
@@ -305,18 +307,21 @@ static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
|
|
|
{
|
|
|
for (int j = 0; j < grid_w; j++)
|
|
|
{
|
|
|
- int offset = i* grid_w + j;
|
|
|
+ int offset = i * grid_w + j;
|
|
|
int max_class_id = -1;
|
|
|
|
|
|
// 通过 score sum 起到快速过滤的作用
|
|
|
- if (score_sum_tensor != nullptr){
|
|
|
- if (score_sum_tensor[offset] < score_sum_thres_i8){
|
|
|
+ if (score_sum_tensor != nullptr)
|
|
|
+ {
|
|
|
+ if (score_sum_tensor[offset] < score_sum_thres_i8)
|
|
|
+ {
|
|
|
continue;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
int8_t max_score = -score_zp;
|
|
|
- for (int c= 0; c< OBJ_CLASS_NUM; c++){
|
|
|
+ for (int c = 0; c < OBJ_CLASS_NUM; c++)
|
|
|
+ {
|
|
|
if ((score_tensor[offset] > score_thres_i8) && (score_tensor[offset] > max_score))
|
|
|
{
|
|
|
max_score = score_tensor[offset];
|
|
@@ -326,21 +331,23 @@ static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
|
|
|
}
|
|
|
|
|
|
// compute box
|
|
|
- if (max_score> score_thres_i8){
|
|
|
- offset = i* grid_w + j;
|
|
|
+ if (max_score > score_thres_i8)
|
|
|
+ {
|
|
|
+ offset = i * grid_w + j;
|
|
|
float box[4];
|
|
|
- float before_dfl[dfl_len*4];
|
|
|
- for (int k=0; k< dfl_len*4; k++){
|
|
|
+ float before_dfl[dfl_len * 4];
|
|
|
+ for (int k = 0; k < dfl_len * 4; k++)
|
|
|
+ {
|
|
|
before_dfl[k] = deqnt_affine_to_f32(box_tensor[offset], box_zp, box_scale);
|
|
|
offset += grid_len;
|
|
|
}
|
|
|
compute_dfl(before_dfl, dfl_len, box);
|
|
|
|
|
|
- float x1,y1,x2,y2,w,h;
|
|
|
- x1 = (-box[0] + j + 0.5)*stride;
|
|
|
- y1 = (-box[1] + i + 0.5)*stride;
|
|
|
- x2 = (box[2] + j + 0.5)*stride;
|
|
|
- y2 = (box[3] + i + 0.5)*stride;
|
|
|
+ float x1, y1, x2, y2, w, h;
|
|
|
+ x1 = (-box[0] + j + 0.5) * stride;
|
|
|
+ y1 = (-box[1] + i + 0.5) * stride;
|
|
|
+ x2 = (box[2] + j + 0.5) * stride;
|
|
|
+ y2 = (box[3] + i + 0.5) * stride;
|
|
|
w = x2 - x1;
|
|
|
h = y2 - y1;
|
|
|
boxes.push_back(x1);
|
|
@@ -350,19 +357,19 @@ static int process_i8(int8_t *box_tensor, int32_t box_zp, float box_scale,
|
|
|
|
|
|
objProbs.push_back(deqnt_affine_to_f32(max_score, score_zp, score_scale));
|
|
|
classId.push_back(max_class_id);
|
|
|
- validCount ++;
|
|
|
+ validCount++;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return validCount;
|
|
|
}
|
|
|
|
|
|
-static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
|
|
|
+static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum_tensor,
|
|
|
int grid_h, int grid_w, int stride, int dfl_len,
|
|
|
- std::vector<float> &boxes,
|
|
|
- std::vector<float> &objProbs,
|
|
|
- std::vector<int> &classId,
|
|
|
- float threshold,int OBJ_CLASS_NUM)
|
|
|
+ std::vector<float> &boxes,
|
|
|
+ std::vector<float> &objProbs,
|
|
|
+ std::vector<int> &classId,
|
|
|
+ float threshold, int OBJ_CLASS_NUM)
|
|
|
{
|
|
|
int validCount = 0;
|
|
|
int grid_len = grid_h * grid_w;
|
|
@@ -370,18 +377,21 @@ static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum
|
|
|
{
|
|
|
for (int j = 0; j < grid_w; j++)
|
|
|
{
|
|
|
- int offset = i* grid_w + j;
|
|
|
+ int offset = i * grid_w + j;
|
|
|
int max_class_id = -1;
|
|
|
|
|
|
// 通过 score sum 起到快速过滤的作用
|
|
|
- if (score_sum_tensor != nullptr){
|
|
|
- if (score_sum_tensor[offset] < threshold){
|
|
|
+ if (score_sum_tensor != nullptr)
|
|
|
+ {
|
|
|
+ if (score_sum_tensor[offset] < threshold)
|
|
|
+ {
|
|
|
continue;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
float max_score = 0;
|
|
|
- for (int c= 0; c< OBJ_CLASS_NUM; c++){
|
|
|
+ for (int c = 0; c < OBJ_CLASS_NUM; c++)
|
|
|
+ {
|
|
|
if ((score_tensor[offset] > threshold) && (score_tensor[offset] > max_score))
|
|
|
{
|
|
|
max_score = score_tensor[offset];
|
|
@@ -391,21 +401,23 @@ static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum
|
|
|
}
|
|
|
|
|
|
// compute box
|
|
|
- if (max_score> threshold){
|
|
|
- offset = i* grid_w + j;
|
|
|
+ if (max_score > threshold)
|
|
|
+ {
|
|
|
+ offset = i * grid_w + j;
|
|
|
float box[4];
|
|
|
- float before_dfl[dfl_len*4];
|
|
|
- for (int k=0; k< dfl_len*4; k++){
|
|
|
+ float before_dfl[dfl_len * 4];
|
|
|
+ for (int k = 0; k < dfl_len * 4; k++)
|
|
|
+ {
|
|
|
before_dfl[k] = box_tensor[offset];
|
|
|
offset += grid_len;
|
|
|
}
|
|
|
compute_dfl(before_dfl, dfl_len, box);
|
|
|
|
|
|
- float x1,y1,x2,y2,w,h;
|
|
|
- x1 = (-box[0] + j + 0.5)*stride;
|
|
|
- y1 = (-box[1] + i + 0.5)*stride;
|
|
|
- x2 = (box[2] + j + 0.5)*stride;
|
|
|
- y2 = (box[3] + i + 0.5)*stride;
|
|
|
+ float x1, y1, x2, y2, w, h;
|
|
|
+ x1 = (-box[0] + j + 0.5) * stride;
|
|
|
+ y1 = (-box[1] + i + 0.5) * stride;
|
|
|
+ x2 = (box[2] + j + 0.5) * stride;
|
|
|
+ y2 = (box[3] + i + 0.5) * stride;
|
|
|
w = x2 - x1;
|
|
|
h = y2 - y1;
|
|
|
boxes.push_back(x1);
|
|
@@ -415,17 +427,16 @@ static int process_fp32(float *box_tensor, float *score_tensor, float *score_sum
|
|
|
|
|
|
objProbs.push_back(max_score);
|
|
|
classId.push_back(max_class_id);
|
|
|
- validCount ++;
|
|
|
+ validCount++;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return validCount;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results,int OBJ_CLASS_NUM)
|
|
|
+int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results, int OBJ_CLASS_NUM)
|
|
|
{
|
|
|
-
|
|
|
+
|
|
|
std::vector<float> filterBoxes;
|
|
|
std::vector<float> objProbs;
|
|
|
std::vector<int> classId;
|
|
@@ -442,7 +453,7 @@ int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, f
|
|
|
#ifdef RKNPU1
|
|
|
int dfl_len = app_ctx->output_attrs[0].dims[2] / 4;
|
|
|
#else
|
|
|
- int dfl_len = app_ctx->output_attrs[0].dims[1] /4;
|
|
|
+ int dfl_len = app_ctx->output_attrs[0].dims[1] / 4;
|
|
|
#endif
|
|
|
int output_per_branch = app_ctx->io_num.n_output / 3;
|
|
|
for (int i = 0; i < 3; i++)
|
|
@@ -451,13 +462,14 @@ int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, f
|
|
|
void *score_sum = nullptr;
|
|
|
int32_t score_sum_zp = 0;
|
|
|
float score_sum_scale = 1.0;
|
|
|
- if (output_per_branch == 3){
|
|
|
- score_sum = outputs[i*output_per_branch + 2].buf;
|
|
|
- score_sum_zp = app_ctx->output_attrs[i*output_per_branch + 2].zp;
|
|
|
- score_sum_scale = app_ctx->output_attrs[i*output_per_branch + 2].scale;
|
|
|
+ if (output_per_branch == 3)
|
|
|
+ {
|
|
|
+ score_sum = outputs[i * output_per_branch + 2].buf;
|
|
|
+ score_sum_zp = app_ctx->output_attrs[i * output_per_branch + 2].zp;
|
|
|
+ score_sum_scale = app_ctx->output_attrs[i * output_per_branch + 2].scale;
|
|
|
}
|
|
|
- int box_idx = i*output_per_branch;
|
|
|
- int score_idx = i*output_per_branch + 1;
|
|
|
+ int box_idx = i * output_per_branch;
|
|
|
+ int score_idx = i * output_per_branch + 1;
|
|
|
|
|
|
#ifdef RKNPU1
|
|
|
grid_h = app_ctx->output_attrs[box_idx].dims[1];
|
|
@@ -480,17 +492,16 @@ int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, f
|
|
|
validCount += process_i8((int8_t *)outputs[box_idx].buf, app_ctx->output_attrs[box_idx].zp, app_ctx->output_attrs[box_idx].scale,
|
|
|
(int8_t *)outputs[score_idx].buf, app_ctx->output_attrs[score_idx].zp, app_ctx->output_attrs[score_idx].scale,
|
|
|
(int8_t *)score_sum, score_sum_zp, score_sum_scale,
|
|
|
- grid_h, grid_w, stride, dfl_len,
|
|
|
- filterBoxes, objProbs, classId, conf_threshold,OBJ_CLASS_NUM);
|
|
|
+ grid_h, grid_w, stride, dfl_len,
|
|
|
+ filterBoxes, objProbs, classId, conf_threshold, OBJ_CLASS_NUM);
|
|
|
#endif
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
validCount += process_fp32((float *)outputs[box_idx].buf, (float *)outputs[score_idx].buf, (float *)score_sum,
|
|
|
- grid_h, grid_w, stride, dfl_len,
|
|
|
- filterBoxes, objProbs, classId, conf_threshold,OBJ_CLASS_NUM);
|
|
|
+ grid_h, grid_w, stride, dfl_len,
|
|
|
+ filterBoxes, objProbs, classId, conf_threshold, OBJ_CLASS_NUM);
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
|
|
|
// no object detect
|
|
@@ -515,7 +526,6 @@ int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, f
|
|
|
int last_count = 0;
|
|
|
od_results->count = 0;
|
|
|
|
|
|
-
|
|
|
/* box valid detect target */
|
|
|
for (int i = 0; i < validCount; ++i)
|
|
|
{
|
|
@@ -532,7 +542,6 @@ int post_process(PPYOLOE *app_ctx, rknn_output *outputs, float conf_threshold, f
|
|
|
int id = classId[n];
|
|
|
float obj_conf = objProbs[i];
|
|
|
|
|
|
-
|
|
|
od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w));
|
|
|
od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h));
|
|
|
od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w));
|