update

2024-08-06 18:43:30 +00:00 · 2024-08-06 18:43:30 +00:00 · 611e9b1637
commit 611e9b1637
parent 4349005f83
12 changed files with 387 additions and 496 deletions
--- a/letterbox_input.jpg
+++ b/letterbox_input.jpg
--- a/output.jpg
+++ b/output.jpg
--- a/src/guangpo_yolo/CMakeLists.txt
+++ b/src/guangpo_yolo/CMakeLists.txt
@ -5,6 +5,7 @@ project(guangpo_yolo)
 # add_compile_options(-std=c++11)

 set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)

 ## Find catkin macros and libraries
@ -152,6 +153,7 @@ include/rga/include
 ## With catkin_make all packages are built within a single CMake context
 ## The recommended prefix ensures that target names across packages don't collide
 add_executable(${PROJECT_NAME}_node src/main.cpp  src/postprocess.cc src/preprocess.cc yaml_config/yaml_config.cpp)
+add_executable(${PROJECT_NAME}_test_node src/main_test.cc  src/postprocess.cc src/preprocess.cc yaml_config/yaml_config.cpp)

 ## Rename C++ executable without prefix
 ## The above recommended prefix causes long target names, the following renames the
@ -170,6 +172,12 @@ target_link_libraries(${PROJECT_NAME}_node
  ${LIB_FILES}
  yaml-cpp
 )
+target_link_libraries(${PROJECT_NAME}_test_node
+  ${catkin_LIBRARIES}
+  ${OpenCV_LIBS}
+  ${LIB_FILES}
+  yaml-cpp
+)

 #############
 ## Install ##
--- a/src/guangpo_yolo/include/dianti.h
+++ b/src/guangpo_yolo/include/dianti.h
@ -1,3 +1,4 @@
+#pragma once
 #include <Eigen/Core>
 #include <cmath>
 #include <iostream>
--- a/src/guangpo_yolo/include/postprocess.h
+++ b/src/guangpo_yolo/include/postprocess.h
@ -3,6 +3,7 @@

 #include <stdint.h>
 #include <vector>
+#include <string>

 #define OBJ_NAME_MAX_SIZE 16
 #define OBJ_NUMB_MAX_SIZE 64
@ -36,7 +37,7 @@ typedef struct _detect_result_group_t
 int post_process(int8_t *input0, int8_t *input1, int8_t *input2, int model_in_h, int model_in_w,
                 float conf_threshold, float nms_threshold, BOX_RECT pads, float scale_w, float scale_h,
                 std::vector<int32_t> &qnt_zps, std::vector<float> &qnt_scales,
-                 detect_result_group_t *group);
+                 detect_result_group_t *group, const std::vector<std::string>& labels);

-void deinitPostProcess();
+// void deinitPostProcess();
 #endif //_RKNN_YOLOV5_DEMO_POSTPROCESS_H_
--- a/src/guangpo_yolo/include/yolov5.hpp
+++ b/src/guangpo_yolo/include/yolov5.hpp
@ -4,6 +4,7 @@
                Includes
 -------------------------------------------*/
 #include <dlfcn.h>
+#include <exception>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@ -12,13 +13,18 @@
 #define _BASETSD_H

 #include "RgaUtils.h"
-#include "debugstream.hpp"
-#include "dianti.h"
 #include "postprocess.h"
+
 #include "preprocess.h"
 #include "rknn_api.h"

+#include "debugstream.hpp"
+#include "dianti.h"
+
 #define PERF_WITH_POST 1
+/*-------------------------------------------
+                  Functions
+-------------------------------------------*/

 static void dump_tensor_attr(rknn_tensor_attr *attr) {
  std::string shape_str = attr->n_dims < 1 ? "" : std::to_string(attr->dims[0]);
@ -26,20 +32,17 @@ static void dump_tensor_attr(rknn_tensor_attr *attr) {
    shape_str += ", " + std::to_string(attr->dims[i]);
  }

-  printf(
-      "  index=%d, name=%s, n_dims=%d, dims=[%s], n_elems=%d, size=%d, "
+  printf("  index=%d, name=%s, n_dims=%d, dims=[%s], n_elems=%d, size=%d, "
         "w_stride = %d, size_with_stride=%d, fmt=%s, "
         "type=%s, qnt_type=%s, "
         "zp=%d, scale=%f\n",
-      attr->index, attr->name, attr->n_dims, shape_str.c_str(), attr->n_elems,
-      attr->size, attr->w_stride, attr->size_with_stride,
+         attr->index, attr->name, attr->n_dims, shape_str.c_str(),
+         attr->n_elems, attr->size, attr->w_stride, attr->size_with_stride,
         get_format_string(attr->fmt), get_type_string(attr->type),
         get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
 }

-static double __get_us(struct timeval t) {
-  return (t.tv_sec * 1000000 + t.tv_usec);
-}
+double __get_us(struct timeval t) { return (t.tv_sec * 1000000 + t.tv_usec); }

 static unsigned char *load_data(FILE *fp, size_t ofst, size_t sz) {
  unsigned char *data;
@ -99,15 +102,27 @@ static int saveFloat(const char *file_name, float *output, int element_size) {

 class Yolov5 {
 public:
-  /*-------------------------------------------
-                    Functions
-  -------------------------------------------*/
-
-  std::string model_path_;
+  rknn_context ctx;
+  rknn_input inputs[1];
+  int img_width = 0;
+  int img_height = 0;
+  int img_channel = 0;
+  int channel = 3;
+  int width = 0;
+  int height = 0;
+  rga_buffer_t src;
+  rga_buffer_t dst;
+  std::string option = "letterbox";
+  rknn_input_output_num io_num;
+  struct timeval start_time, stop_time;
+  rknn_tensor_attr *output_attrs;
+  const float nms_threshold = NMS_THRESH;      // 默认的NMS阈值
+  const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
+  unsigned char *model_data;
+  std::vector<std::string> labels;
  Yolov5() {}
-  void Init(const std::string &model_path) {}
  ~Yolov5() {
-    deinitPostProcess();
+    // deinitPostProcess();

    // release
    rknn_destroy(ctx);
@ -116,37 +131,17 @@ class Yolov5 {
      free(model_data);
    }
  }
-
-  rknn_context ctx;
-  unsigned char *model_data;
+  void SetLabels(const std::vector<std::string> &labelss) { labels = labelss; }
+  void LoadModel(const std::string &model_name_str) {
    int ret;
-
-  void LoadModel() {
-    int img_width = 0;
-    int img_height = 0;
-    int img_channel = 0;
-    const float nms_threshold = NMS_THRESH;       // 默认的NMS阈值
-    const float box_conf_threshold = BOX_THRESH;  // 默认的置信度阈值
-    struct timeval start_time, stop_time;
-    // char *model_name = (char *)argv[1];
-    // char *input_path = argv[2];
-    std::string option = "letterbox";
-    std::string out_path = "./out.jpg";
-    if (argc >= 4) {
-      option = argv[3];
-    }
-    if (argc >= 5) {
-      out_path = argv[4];
-    }
+    const char *model_name = model_name_str.c_str();
+    // std::string out_path = "./out.jpg";

    // init rga context
-    rga_buffer_t src;
-    rga_buffer_t dst;
    memset(&src, 0, sizeof(src));
    memset(&dst, 0, sizeof(dst));

-    printf(
-        "post process config: box_conf_threshold = %.2f, nms_threshold = "
+    printf("post process config: box_conf_threshold = %.2f, nms_threshold = "
           "%.2f\n",
           box_conf_threshold, nms_threshold);

@ -157,7 +152,7 @@ class Yolov5 {
    ret = rknn_init(&ctx, model_data, model_data_size, 0, NULL);
    if (ret < 0) {
      printf("rknn_init error ret=%d\n", ret);
-      return -1;
+      std::terminate();
    }

    rknn_sdk_version version;
@ -165,16 +160,15 @@ class Yolov5 {
                     sizeof(rknn_sdk_version));
    if (ret < 0) {
      printf("rknn_init error ret=%d\n", ret);
-      return -1;
+      std::terminate();
    }
    printf("sdk version: %s driver version: %s\n", version.api_version,
           version.drv_version);

-    rknn_input_output_num io_num;
    ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
    if (ret < 0) {
      printf("rknn_init error ret=%d\n", ret);
-      return -1;
+      std::terminate();
    }
    printf("model input num: %d, output num: %d\n", io_num.n_input,
           io_num.n_output);
@ -187,13 +181,14 @@ class Yolov5 {
                       sizeof(rknn_tensor_attr));
      if (ret < 0) {
        printf("rknn_init error ret=%d\n", ret);
-        return -1;
+        std::terminate();
      }
      dump_tensor_attr(&(input_attrs[i]));
    }

-    rknn_tensor_attr output_attrs[io_num.n_output];
-    memset(output_attrs, 0, sizeof(output_attrs));
+    //   rknn_tensor_attr output_attrs[io_num.n_output];
+    output_attrs = new rknn_tensor_attr[io_num.n_output];
+    memset(output_attrs, 0, sizeof(*output_attrs));
    for (int i = 0; i < io_num.n_output; i++) {
      output_attrs[i].index = i;
      ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
@ -201,9 +196,6 @@ class Yolov5 {
      dump_tensor_attr(&(output_attrs[i]));
    }

-    int channel = 3;
-    int width = 0;
-    int height = 0;
    if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
      printf("model is NCHW input fmt\n");
      channel = input_attrs[0].dims[1];
@ -218,23 +210,18 @@ class Yolov5 {

    printf("model input height=%d, width=%d, channel=%d\n", height, width,
           channel);
-  }
-  void Infer() {
-    rknn_input inputs[1];
+
    memset(inputs, 0, sizeof(inputs));
    inputs[0].index = 0;
    inputs[0].type = RKNN_TENSOR_UINT8;
    inputs[0].size = width * height * channel;
    inputs[0].fmt = RKNN_TENSOR_NHWC;
    inputs[0].pass_through = 0;
-
-    // 读取图片
-    printf("Read %s ...\n", input_path);
-    cv::Mat orig_img = cv::imread(input_path, 1);
-    if (!orig_img.data) {
-      printf("cv::imread %s fail!\n", input_path);
-      return -1;
  }
+  detect_result_group_t Infer(const cv::Mat &orig_img,
+                              const std::string &out_path = "output.jpg",
+                              bool debug = false) {
+    int ret;
    cv::Mat img;
    cv::cvtColor(orig_img, img, cv::COLOR_BGR2RGB);
    img_width = img.cols;
@ -257,9 +244,10 @@ class Yolov5 {
        ret = resize_rga(src, dst, img, resized_img, target_size);
        if (ret != 0) {
          fprintf(stderr, "resize with rga error\n");
-          return -1;
+          std::terminate();
        }
        // 保存预处理图片
+        if (debug)
          cv::imwrite("resize_input.jpg", resized_img);
      } else if (option == "letterbox") {
        printf("resize image with letterbox\n");
@ -268,11 +256,12 @@ class Yolov5 {
        scale_h = min_scale;
        letterbox(img, resized_img, pads, min_scale, target_size);
        // 保存预处理图片
+        if (debug)
          cv::imwrite("letterbox_input.jpg", resized_img);
      } else {
        fprintf(stderr,
                "Invalid resize option. Use 'resize' or 'letterbox'.\n");
-        return -1;
+        std::terminate();
      }
      inputs[0].buf = resized_img.data;
    } else {
@ -292,9 +281,6 @@ class Yolov5 {
    // 执行推理
    ret = rknn_run(ctx, NULL);
    ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
-    gettimeofday(&stop_time, NULL);
-    printf("once run use %f ms\n",
-           (__get_us(stop_time) - __get_us(start_time)) / 1000);

    // 后处理
    detect_result_group_t detect_result_group;
@ -307,7 +293,11 @@ class Yolov5 {
    post_process((int8_t *)outputs[0].buf, (int8_t *)outputs[1].buf,
                 (int8_t *)outputs[2].buf, height, width, box_conf_threshold,
                 nms_threshold, pads, scale_w, scale_h, out_zps, out_scales,
-                 &detect_result_group);
+                 &detect_result_group, labels);
+
+    gettimeofday(&stop_time, NULL);
+    printf("once run use %f ms\n",
+           (__get_us(stop_time) - __get_us(start_time)) / 1000);

    // 画框和概率
    char text[256];
@ -326,72 +316,11 @@ class Yolov5 {
      putText(orig_img, text, cv::Point(x1, y1 + 12), cv::FONT_HERSHEY_SIMPLEX,
              0.4, cv::Scalar(255, 255, 255));
    }
-
-    // gxt: add my process here ==============begin
-    // 相机内参
-    Eigen::Matrix3d camera_matrix;
-    camera_matrix << 787.22316869, 0.0, 628.91534144, 0.0, 793.45182,
-        313.46301416, 0.0, 0.0, 1.0;
-    //   int my_width=orig_img.cols;
-    //   int my_height=orig_img.rows;
-    DistanceEstimator estimator(camera_matrix, img_width, img_height);
-    std::vector<Box> rens;
-    std::vector<Box> diantis;
-    gDebug(detect_result_group.count);
-    for (int i = 0; i < detect_result_group.count; i++) {
-      detect_result_t *det_result = &(detect_result_group.results[i]);
-      // sprintf(text, "%s %.1f%%", det_result->name, det_result->prop * 100);
-      // printf("%s @ (%d %d %d %d) %f\n", det_result->name,
-      // det_result->box.left, det_result->box.top,
-      //        det_result->box.right, det_result->box.bottom,
-      //        det_result->prop);
-      Box box;
-      box.x = (double)(det_result->box.left + det_result->box.right) / 2.0 /
-              img_width;
-      box.y = (double)(det_result->box.top + det_result->box.bottom) / 2.0 /
-              img_height;
-      box.w = (double)std::abs(det_result->box.right - det_result->box.left) /
-              img_width;
-      box.h = (double)std::abs(det_result->box.bottom - det_result->box.top) /
-              img_height;
-      std::string class_name = det_result->name;
-      if (class_name == "Dianti") {
-        diantis.push_back(box);
-      } else if (class_name == "Ren") {
-        rens.push_back(box);
-      }
-      // int x1 = det_result->box.left;
-      // int y1 = det_result->box.top;
-      // int x2 = det_result->box.right;
-      // int y2 = det_result->box.bottom;
-      // rectangle(orig_img, cv::Point(x1, y1), cv::Point(x2, y2),
-      // cv::Scalar(256, 0, 0, 256), 3); putText(orig_img, text, cv::Point(x1,
-      // y1 + 12), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
-    }
-    DealImage(estimator, orig_img, rens, diantis);
-    // gxt: add my process here ==============end
-
+    if (!out_path.empty()) {
      printf("save detect result to %s\n", out_path.c_str());
      imwrite(out_path, orig_img);
-    ret = rknn_outputs_release(ctx, io_num.n_output, outputs);
-
-    // 耗时统计
-    int test_count = 10;
-    gettimeofday(&start_time, NULL);
-    for (int i = 0; i < test_count; ++i) {
-      rknn_inputs_set(ctx, io_num.n_input, inputs);
-      ret = rknn_run(ctx, NULL);
-      ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
-#if PERF_WITH_POST
-      post_process((int8_t *)outputs[0].buf, (int8_t *)outputs[1].buf,
-                   (int8_t *)outputs[2].buf, height, width, box_conf_threshold,
-                   nms_threshold, pads, scale_w, scale_h, out_zps, out_scales,
-                   &detect_result_group);
-#endif
-      ret = rknn_outputs_release(ctx, io_num.n_output, outputs);
    }
-    gettimeofday(&stop_time, NULL);
-    printf("loop count = %d , average run  %f ms\n", test_count,
-           (__get_us(stop_time) - __get_us(start_time)) / 1000.0 / test_count);
+    rknn_outputs_release(ctx, io_num.n_output, outputs);
+    return detect_result_group;
  }
 };
--- a/src/guangpo_yolo/launch/run.launch
+++ b/src/guangpo_yolo/launch/run.launch
@ -0,0 +1,4 @@
+<launch>
+<node pkg="rviz"  type="rviz"  name="rviz"  args="-d $(find guangpo_yolo)/rviz/guangpo.rviz"></node>
+<node pkg="guangpo_yolo"  type="guangpo_yolo_node"  name="guangpo_yolo_node"  output="screen" ></node>
+</launch>
--- a/src/guangpo_yolo/rviz/guangpo.rviz
+++ b/src/guangpo_yolo/rviz/guangpo.rviz
@ -0,0 +1,208 @@
+Panels:
+  - Class: rviz/Displays
+    Help Height: 78
+    Name: Displays
+    Property Tree Widget:
+      Expanded:
+        - /Global Options1
+        - /Status1
+        - /MarkerArray1/Status1
+        - /PointCloud22
+      Splitter Ratio: 0.5
+    Tree Height: 257
+  - Class: rviz/Selection
+    Name: Selection
+  - Class: rviz/Tool Properties
+    Expanded:
+      - /2D Pose Estimate1
+      - /2D Nav Goal1
+      - /Publish Point1
+    Name: Tool Properties
+    Splitter Ratio: 0.5886790156364441
+  - Class: rviz/Views
+    Expanded:
+      - /Current View1
+    Name: Views
+    Splitter Ratio: 0.5
+  - Class: rviz/Time
+    Name: Time
+    SyncMode: 0
+    SyncSource: Image
+Preferences:
+  PromptSaveOnExit: true
+Toolbars:
+  toolButtonStyle: 2
+Visualization Manager:
+  Class: ""
+  Displays:
+    - Alpha: 0.5
+      Cell Size: 1
+      Class: rviz/Grid
+      Color: 160; 160; 164
+      Enabled: true
+      Line Style:
+        Line Width: 0.029999999329447746
+        Value: Lines
+      Name: Grid
+      Normal Cell Count: 0
+      Offset:
+        X: 0
+        Y: 0
+        Z: 0
+      Plane: XY
+      Plane Cell Count: 10
+      Reference Frame: <Fixed Frame>
+      Value: true
+    - Class: rviz/Image
+      Enabled: true
+      Image Topic: /usb_cam/image_raw
+      Max Value: 1
+      Median window: 5
+      Min Value: 0
+      Name: Image
+      Normalize Range: true
+      Queue Size: 2
+      Transport Hint: raw
+      Unreliable: false
+      Value: true
+    - Alpha: 1
+      Class: rviz/Axes
+      Enabled: true
+      Length: 1
+      Name: Axes
+      Radius: 0.10000000149011612
+      Reference Frame: <Fixed Frame>
+      Show Trail: false
+      Value: true
+    - Class: rviz/MarkerArray
+      Enabled: true
+      Marker Topic: /obstacles_topic
+      Name: MarkerArray
+      Namespaces:
+        "": true
+      Queue Size: 100
+      Value: true
+    - Alpha: 1
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 10
+        Min Value: -10
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: Intensity
+      Decay Time: 0
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Min Color: 0; 0; 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Queue Size: 10
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.009999999776482582
+      Style: Flat Squares
+      Topic: /cloud_ground
+      Unreliable: false
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+    - Alpha: 1
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 2.202476739883423
+        Min Value: 0.08952760696411133
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: Intensity
+      Decay Time: 0
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Min Color: 0; 0; 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Queue Size: 10
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.009999999776482582
+      Style: Flat Squares
+      Topic: /cloud_noground
+      Unreliable: false
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+  Enabled: true
+  Global Options:
+    Background Color: 48; 48; 48
+    Default Light: true
+    Fixed Frame: world
+    Frame Rate: 30
+  Name: root
+  Tools:
+    - Class: rviz/Interact
+      Hide Inactive Objects: true
+    - Class: rviz/MoveCamera
+    - Class: rviz/Select
+    - Class: rviz/FocusCamera
+    - Class: rviz/Measure
+    - Class: rviz/SetInitialPose
+      Theta std deviation: 0.2617993950843811
+      Topic: /initialpose
+      X std deviation: 0.5
+      Y std deviation: 0.5
+    - Class: rviz/SetGoal
+      Topic: /move_base_simple/goal
+    - Class: rviz/PublishPoint
+      Single click: true
+      Topic: /clicked_point
+  Value: true
+  Views:
+    Current:
+      Class: rviz/Orbit
+      Distance: 19.902732849121094
+      Enable Stereo Rendering:
+        Stereo Eye Separation: 0.05999999865889549
+        Stereo Focal Distance: 1
+        Swap Stereo Eyes: false
+        Value: false
+      Field of View: 0.7853981852531433
+      Focal Point:
+        X: 0
+        Y: 0
+        Z: 0
+      Focal Shape Fixed Size: true
+      Focal Shape Size: 0.05000000074505806
+      Invert Z Axis: false
+      Name: Current View
+      Near Clip Distance: 0.009999999776482582
+      Pitch: 1.344796061515808
+      Target Frame: <Fixed Frame>
+      Yaw: 3.2617619037628174
+    Saved: ~
+Window Geometry:
+  Displays:
+    collapsed: false
+  Height: 740
+  Hide Left Dock: false
+  Hide Right Dock: false
+  Image:
+    collapsed: false
+  QMainWindow State: 000000ff00000000fd00000004000000000000015600000245fc0200000009fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005d00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000003e0000018c000000cb00fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a0049006d00610067006501000001d0000000b30000001600ffffff000000010000010f00000245fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073010000003e00000245000000a500fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000005e80000003efc0100000002fb0000000800540069006d00650100000000000005e80000033700fffffffb0000000800540069006d00650100000000000004500000000000000000000003770000024500000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+  Selection:
+    collapsed: false
+  Time:
+    collapsed: false
+  Tool Properties:
+    collapsed: false
+  Views:
+    collapsed: false
+  Width: 1512
+  X: 0
+  Y: 0
--- a/src/guangpo_yolo/src/main.cpp
+++ b/src/guangpo_yolo/src/main.cpp
@ -6,8 +6,6 @@
 #include <ros/ros.h>
 #include <sensor_msgs/Image.h>

-#define DEBUG 0
-
 std::string model_name = []() -> std::string {
  std::filesystem::path current_dir =
      std::filesystem::path(__FILE__).parent_path();
@ -22,145 +20,28 @@ std::string input_path = []() -> std::string {
  return img_path;
 }();

-#include <dlfcn.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-
-#define _BASETSD_H
-
-#include "RgaUtils.h"
-
-#include "postprocess.h"
-
-#include "preprocess.h"
-#include "rknn_api.h"
+std::vector<std::string> labels={"Dianti","Ren"};

 #include "debugstream.hpp"
 #include "dianti.h"
-
-#define PERF_WITH_POST 1
-/*-------------------------------------------
-                  Functions
-------------------------------------------*/
-
-int ret;
-rknn_context ctx;
-
-static void dump_tensor_attr(rknn_tensor_attr *attr) {
-  std::string shape_str = attr->n_dims < 1 ? "" : std::to_string(attr->dims[0]);
-  for (int i = 1; i < attr->n_dims; ++i) {
-    shape_str += ", " + std::to_string(attr->dims[i]);
-  }
-
-  printf("  index=%d, name=%s, n_dims=%d, dims=[%s], n_elems=%d, size=%d, "
-         "w_stride = %d, size_with_stride=%d, fmt=%s, "
-         "type=%s, qnt_type=%s, "
-         "zp=%d, scale=%f\n",
-         attr->index, attr->name, attr->n_dims, shape_str.c_str(),
-         attr->n_elems, attr->size, attr->w_stride, attr->size_with_stride,
-         get_format_string(attr->fmt), get_type_string(attr->type),
-         get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
-}
-
-double __get_us(struct timeval t) { return (t.tv_sec * 1000000 + t.tv_usec); }
-
-static unsigned char *load_data(FILE *fp, size_t ofst, size_t sz) {
-  unsigned char *data;
-  int ret;
-
-  data = NULL;
-
-  if (NULL == fp) {
-    return NULL;
-  }
-
-  ret = fseek(fp, ofst, SEEK_SET);
-  if (ret != 0) {
-    printf("blob seek failure.\n");
-    return NULL;
-  }
-
-  data = (unsigned char *)malloc(sz);
-  if (data == NULL) {
-    printf("buffer malloc failure.\n");
-    return NULL;
-  }
-  ret = fread(data, 1, sz, fp);
-  return data;
-}
-
-static unsigned char *load_model(const std::string &filename, int *model_size) {
-  FILE *fp;
-  unsigned char *data;
-
-  fp = fopen(filename.c_str(), "rb");
-  if (NULL == fp) {
-    printf("Open file %s failed.\n", filename);
-    return NULL;
-  }
-
-  fseek(fp, 0, SEEK_END);
-  int size = ftell(fp);
-
-  data = load_data(fp, 0, size);
-
-  fclose(fp);
-
-  *model_size = size;
-  return data;
-}
-
-static int saveFloat(const char *file_name, float *output, int element_size) {
-  FILE *fp;
-  fp = fopen(file_name, "w");
-  for (int i = 0; i < element_size; i++) {
-    fprintf(fp, "%.6f\n", output[i]);
-  }
-  fclose(fp);
-  return 0;
-}
+#include "yolov5.hpp"

 class ImageReceiver {
 public:
-  unsigned char *model_data;
-  std::string option = "letterbox";
-  std::string out_path = "./out.jpg";
-  int img_channel = 0;
-  int channel = 3;
-  int width = 0;
-  int height = 0;
-  rknn_input_output_num io_num;
-  rknn_input inputs[1];
-  rknn_tensor_attr *output_attrs;
-  // init rga context
-  rga_buffer_t src;
-  rga_buffer_t dst;
-  size_t actual_size = 0;
-  const float nms_threshold = NMS_THRESH;      // 默认的NMS阈值
-  const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值
-  std::string sub_topic;
-  std::string send_topic;
-  DistanceEstimator *estimator;
+  Yolov5 yolo;
+  std::shared_ptr<DistanceEstimator> estimator;
  ImageReceiver(ros::NodeHandle &nh) {
-    sub_topic = yaml_config["sub_image_topic"].as<std::string>();
-    send_topic = yaml_config["pub_image_topic"].as<std::string>();
+    std::string sub_topic = yaml_config["sub_image_topic"].as<std::string>();
+    std::string send_topic = yaml_config["pub_image_topic"].as<std::string>();
    image_sub_ =
        nh.subscribe(sub_topic, 1, &ImageReceiver::imageCallback, this);
    image_pub_ = nh.advertise<sensor_msgs::Image>(send_topic, 1);

    Init();
+    yolo.SetLabels(labels);
+  yolo.LoadModel(model_name);
  }
-  ~ImageReceiver() {
-    deinitPostProcess();
-    // release
-    ret = rknn_destroy(ctx);
-
-    if (model_data) {
-      free(model_data);
-    }
-  }
+  ~ImageReceiver() {}
  void SendImage(const cv::Mat &image) {
    // 将cv::Mat图像转换为sensor_msgs::Image消息
    sensor_msgs::ImagePtr msg =
@ -180,7 +61,6 @@ public:
      cv_bridge::CvImagePtr cv_ptr =
          cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
      image_ = cv_ptr->image;
-      TIME_BEGIN_MS(detect_image);
      DetectImage(image_);
    } catch (cv_bridge::Exception &e) {
      ROS_ERROR("cv_bridge exception: %s", e.what());
@ -189,87 +69,6 @@ public:

  void Init() {
    gDebug(model_name);
-    gDebug(input_path);
-
-    memset(&src, 0, sizeof(src));
-    memset(&dst, 0, sizeof(dst));
-
-    printf("post process config: box_conf_threshold = %.2f, nms_threshold = "
-           "%.2f\n",
-           box_conf_threshold, nms_threshold);
-
-    /* Create the neural network */
-    printf("Loading mode...\n");
-    int model_data_size = 0;
-    model_data = load_model(model_name, &model_data_size);
-    ret = rknn_init(&ctx, model_data, model_data_size, 0, NULL);
-    if (ret < 0) {
-      printf("rknn_init error ret=%d\n", ret);
-      return;
-    }
-
-    rknn_sdk_version version;
-    ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version,
-                     sizeof(rknn_sdk_version));
-    if (ret < 0) {
-      printf("rknn_init error ret=%d\n", ret);
-      return;
-    }
-    printf("sdk version: %s driver version: %s\n", version.api_version,
-           version.drv_version);
-
-    ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
-    if (ret < 0) {
-      printf("rknn_init error ret=%d\n", ret);
-      return;
-    }
-    printf("model input num: %d, output num: %d\n", io_num.n_input,
-           io_num.n_output);
-
-    rknn_tensor_attr input_attrs[io_num.n_input];
-    memset(input_attrs, 0, sizeof(input_attrs));
-    for (int i = 0; i < io_num.n_input; i++) {
-      input_attrs[i].index = i;
-      ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
-                       sizeof(rknn_tensor_attr));
-      if (ret < 0) {
-        printf("rknn_init error ret=%d\n", ret);
-        return;
-      }
-      dump_tensor_attr(&(input_attrs[i]));
-    }
-
-    // rknn_tensor_attr output_attrs[io_num.n_output];
-    output_attrs = new rknn_tensor_attr[io_num.n_output];
-    memset(output_attrs, 0, sizeof(output_attrs));
-    for (int i = 0; i < io_num.n_output; i++) {
-      output_attrs[i].index = i;
-      ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
-                       sizeof(rknn_tensor_attr));
-      dump_tensor_attr(&(output_attrs[i]));
-    }
-
-    if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
-      printf("model is NCHW input fmt\n");
-      channel = input_attrs[0].dims[1];
-      height = input_attrs[0].dims[2];
-      width = input_attrs[0].dims[3];
-    } else {
-      printf("model is NHWC input fmt\n");
-      height = input_attrs[0].dims[1];
-      width = input_attrs[0].dims[2];
-      channel = input_attrs[0].dims[3];
-    }
-
-    printf("model input height=%d, width=%d, channel=%d\n", height, width,
-           channel);
-
-    memset(inputs, 0, sizeof(inputs));
-    inputs[0].index = 0;
-    inputs[0].type = RKNN_TENSOR_UINT8;
-    inputs[0].size = width * height * channel;
-    inputs[0].fmt = RKNN_TENSOR_NHWC;
-    inputs[0].pass_through = 0;

    Eigen::Matrix3d camera_matrix;
    camera_matrix << yaml_config["camera_in_param"]["dx"].as<double>(), 0.0,
@ -277,119 +76,22 @@ public:
        yaml_config["camera_in_param"]["dy"].as<double>(),
        yaml_config["camera_in_param"]["v0"].as<double>(), 0.0, 0.0, 1.0;
    gDebug(camera_matrix);
-    estimator = new DistanceEstimator(
+    estimator = std::make_shared<DistanceEstimator>(
        camera_matrix, yaml_config["image"]["width"].as<double>(),
        yaml_config["image"]["height"].as<double>());
  }

  void DetectImage(cv::Mat &img) {
    TIME_BEGIN_MS(DetectImage);
-    // 读取图片
-    // printf("Read %s ...\n", input_path);
-    cv::Mat &orig_img = img;
-    if (!orig_img.data) {
-      printf("cv::imread %s fail!\n", input_path);
-      return;
-    }
-    cv::cvtColor(orig_img, img, cv::COLOR_BGR2RGB);
+    detect_result_group_t detect_result_group;
+    detect_result_group = yolo.Infer(img, "");
+
+    TIME_END(DetectImage);
    int img_width = img.cols;
    int img_height = img.rows;
-    printf("img width = %d, img height = %d\n", img_width, img_height);
-
-    // 指定目标大小和预处理方式,默认使用LetterBox的预处理
-    BOX_RECT pads;
-    memset(&pads, 0, sizeof(BOX_RECT));
-    cv::Size target_size(width, height);
-    cv::Mat resized_img(target_size.height, target_size.width, CV_8UC3);
-    // 计算缩放比例
-    float scale_w = (float)target_size.width / img.cols;
-    float scale_h = (float)target_size.height / img.rows;
-
-    if (img_width != width || img_height != height) {
-      // 直接缩放采用RGA加速
-      if (option == "resize") {
-        printf("resize image by rga\n");
-        ret = resize_rga(src, dst, img, resized_img, target_size);
-        if (ret != 0) {
-          fprintf(stderr, "resize with rga error\n");
-          return;
-        }
-// 保存预处理图片
-#if DEBUG
-        cv::imwrite("resize_input.jpg", resized_img);
-#endif
-      } else if (option == "letterbox") {
-        printf("resize image with letterbox\n");
-        float min_scale = std::min(scale_w, scale_h);
-        scale_w = min_scale;
-        scale_h = min_scale;
-        letterbox(img, resized_img, pads, min_scale, target_size);
-// 保存预处理图片
-#if DEBUG
-        cv::imwrite("letterbox_input.jpg", resized_img);
-#endif
-      } else {
-        fprintf(stderr,
-                "Invalid resize option. Use 'resize' or 'letterbox'.\n");
-        return;
-      }
-      inputs[0].buf = resized_img.data;
-    } else {
-      inputs[0].buf = img.data;
-    }
-
-    rknn_inputs_set(ctx, io_num.n_input, inputs);
-
-    rknn_output outputs[io_num.n_output];
-    memset(outputs, 0, sizeof(outputs));
-    for (int i = 0; i < io_num.n_output; i++) {
-      outputs[i].index = i;
-      outputs[i].want_float = 0;
-    }
-
-    // 执行推理
-    ret = rknn_run(ctx, NULL);
-    ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
-
-    // 后处理
-    detect_result_group_t detect_result_group;
-    std::vector<float> out_scales;
-    std::vector<int32_t> out_zps;
-    for (int i = 0; i < io_num.n_output; ++i) {
-      out_scales.push_back(output_attrs[i].scale);
-      out_zps.push_back(output_attrs[i].zp);
-    }
-    post_process((int8_t *)outputs[0].buf, (int8_t *)outputs[1].buf,
-                 (int8_t *)outputs[2].buf, height, width, box_conf_threshold,
-                 nms_threshold, pads, scale_w, scale_h, out_zps, out_scales,
-                 &detect_result_group);
-
-    // 画框和概率
-    char text[256];
-    for (int i = 0; i < detect_result_group.count; i++) {
-      detect_result_t *det_result = &(detect_result_group.results[i]);
-      sprintf(text, "%s %.1f%%", det_result->name, det_result->prop * 100);
-      printf("%s @ (%d %d %d %d) %f\n", det_result->name, det_result->box.left,
-             det_result->box.top, det_result->box.right, det_result->box.bottom,
-             det_result->prop);
-      int x1 = det_result->box.left;
-      int y1 = det_result->box.top;
-      int x2 = det_result->box.right;
-      int y2 = det_result->box.bottom;
-      rectangle(orig_img, cv::Point(x1, y1), cv::Point(x2, y2),
-                cv::Scalar(256, 0, 0, 256), 3);
-      putText(orig_img, text, cv::Point(x1, y1 + 12), cv::FONT_HERSHEY_SIMPLEX,
-              0.4, cv::Scalar(255, 255, 255));
-    }

    // gxt: add my process here ==============begin
    // 相机内参
-    // Eigen::Matrix3d camera_matrix;
-    // camera_matrix << 787.22316869, 0.0, 628.91534144, 0.0, 793.45182,
-    //     313.46301416, 0.0, 0.0, 1.0;
-    // //   int my_width=orig_img.cols;
-    // //   int my_height=orig_img.rows;
-    // DistanceEstimator estimator(camera_matrix, img_width, img_height);
    std::vector<Box> rens;
    std::vector<Box> diantis;
    gDebug(detect_result_group.count);
@ -411,15 +113,16 @@ public:
        rens.push_back(box);
      }
    }
-    DealImage(*estimator, orig_img, rens, diantis);
+    TIME_END(DetectImage);
+    DealImage(*estimator, img, rens, diantis);
+    TIME_END(DetectImage);
    // gxt: add my process here ==============end
-    SendImage(orig_img);
+    SendImage(img);

 #if DEBUG
    printf("save detect result to %s\n", out_path.c_str());
-    imwrite(out_path, orig_img);
+    imwrite(out_path, img);
 #endif
-    ret = rknn_outputs_release(ctx, io_num.n_output, outputs);
  }

 private:
--- a/src/guangpo_yolo/src/main_guangpo.cc
+++ b/src/guangpo_yolo/src/main_guangpo.cc
--- a/src/guangpo_yolo/src/main_test.cc
+++ b/src/guangpo_yolo/src/main_test.cc
@ -0,0 +1,37 @@
+#include "yolov5.hpp"
+#include <cv_bridge/cv_bridge.h>
+#include <filesystem>
+#include <opencv2/opencv.hpp>
+#include <ros/ros.h>
+#include <sensor_msgs/Image.h>
+
+std::string model_name = []() -> std::string {
+  std::filesystem::path current_dir =
+      std::filesystem::path(__FILE__).parent_path();
+  std::filesystem::path model_path =
+      current_dir / "../model/RK3588/guangpo.rknn";
+  return model_path;
+}();
+std::string input_path = []() -> std::string {
+  std::filesystem::path current_dir =
+      std::filesystem::path(__FILE__).parent_path();
+  std::filesystem::path img_path = current_dir / "../model/RK3588/889.jpg";
+  return img_path;
+}();
+
+int main(int argc, char **argv) {
+  ros::init(argc, argv, "main_test");
+  ros::NodeHandle nh;
+
+  Yolov5 yolo;
+  std::cout << "yolo begin" << std::endl;
+  yolo.SetLabels({"Dianti", "Ren"});
+  yolo.LoadModel(model_name);
+
+  cv::Mat image = cv::imread(input_path, 1);
+  yolo.Infer(image, "output.jpg");
+
+  ros::spin();
+
+  return 0;
+}
--- a/src/guangpo_yolo/src/postprocess.cc
+++ b/src/guangpo_yolo/src/postprocess.cc
@ -28,9 +28,9 @@

 // not use
 // #define LABEL_NALE_TXT_PATH ""
-std::vector<std::string> mylabels ={"Dianti","Ren"};
+// std::vector<std::string> mylabels ={"Dianti","Ren"};

-static char *labels[OBJ_CLASS_NUM];
+// static char *labels[OBJ_CLASS_NUM];

 const int anchor0[6] = {10, 13, 16, 30, 33, 23};
 const int anchor1[6] = {30, 61, 62, 45, 59, 119};
@ -264,26 +264,26 @@ static int process(int8_t *input, int *anchor, int grid_h, int grid_w, int heigh

 int post_process(int8_t *input0, int8_t *input1, int8_t *input2, int model_in_h, int model_in_w, float conf_threshold,
                 float nms_threshold, BOX_RECT pads, float scale_w, float scale_h, std::vector<int32_t> &qnt_zps,
-                 std::vector<float> &qnt_scales, detect_result_group_t *group)
+                 std::vector<float> &qnt_scales, detect_result_group_t *group, const std::vector<std::string>& labels)
 {
-  static int init = -1;
-  if (init == -1)
-  {
-    int ret = 0;
-    // ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
-    for(int i=0;i<mylabels.size();i++) {
-      labels[i] = new char[mylabels[i].size() + 1];  
-      std::strcpy((labels[i]),mylabels[i].c_str());
-      // gDebug(mylabels);
-    }
-    ret = 0;
-    if (ret < 0)
-    {
-      return -1;
-    }
+  // static int init = -1;
+  // if (init == -1)
+  // {
+  //   int ret = 0;
+  //   // ret = loadLabelName(LABEL_NALE_TXT_PATH, labels);
+  //   for(int i=0;i<mylabels.size();i++) {
+  //     labels[i] = new char[mylabels[i].size() + 1];  
+  //     std::strcpy((labels[i]),mylabels[i].c_str());
+  //     // gDebug(mylabels);
+  //   }
+  //   ret = 0;
+  //   if (ret < 0)
+  //   {
+  //     return -1;
+  //   }

-    init = 0;
-  }
+  //   init = 0;
+  // }
  memset(group, 0, sizeof(detect_result_group_t));

  std::vector<float> filterBoxes;
@ -359,7 +359,7 @@ int post_process(int8_t *input0, int8_t *input1, int8_t *input2, int model_in_h,
    group->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w) / scale_w);
    group->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h) / scale_h);
    group->results[last_count].prop = obj_conf;
-    char *label = labels[id];
+    const char *label = labels.at(id).c_str();
    strncpy(group->results[last_count].name, label, OBJ_NAME_MAX_SIZE);

    // printf("result %2d: (%4d, %4d, %4d, %4d), %s\n", i, group->results[last_count].box.left,
@ -372,14 +372,14 @@ int post_process(int8_t *input0, int8_t *input1, int8_t *input2, int model_in_h,
  return 0;
 }

-void deinitPostProcess()
-{
-  for (int i = 0; i < OBJ_CLASS_NUM; i++)
-  {
-    if (labels[i] != nullptr)
-    {
-      free(labels[i]);
-      labels[i] = nullptr;
-    }
-  }
-}
+// void deinitPostProcess()
+// {
+//   for (int i = 0; i < OBJ_CLASS_NUM; i++)
+//   {
+//     if (labels[i] != nullptr)
+//     {
+//       free(labels[i]);
+//       labels[i] = nullptr;
+//     }
+//   }
+// }