edgeimpulse
diff --git a/‎LICENSE
Lines changed: 1 addition & 1 deletion b/‎LICENSE
Lines changed: 1 addition & 1 deletion
diff --git a/‎classifier/ei_fill_result_struct.h
Lines changed: 18 additions & 0 deletions b/‎classifier/ei_fill_result_struct.h
Lines changed: 18 additions & 0 deletions
diff --git a/‎classifier/inferencing_engines/akida.h
Lines changed: 19 additions & 14 deletions b/‎classifier/inferencing_engines/akida.h
Lines changed: 19 additions & 14 deletions
diff --git a/‎classifier/inferencing_engines/memryx.h
Lines changed: 3 additions & 1 deletion b/‎classifier/inferencing_engines/memryx.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎classifier/inferencing_engines/tensorrt.h
Lines changed: 66 additions & 26 deletions b/‎classifier/inferencing_engines/tensorrt.h
Lines changed: 66 additions & 26 deletions
@@ -1,4 +1,4 @@
-Unless specifically indicated otherwise in a file, files are licensed under the BSD 3-Clause Clear license, as can be found in: LICENSE.bsd
+Unless specifically indicated otherwise in a file, files are licensed under the BSD 3-Clause Clear license, as can be found in: LICENSE.3-clause-bsd-clear
 
 Folders containing third party code and libraries are listed below. Each folder contains its own license specified for its files.
 
 
@@ -474,6 +474,8 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_visual_ad_struct_f32
         }
     }
 
+    result->classification[0].value = result->visual_ad_result.max_value;
+
     result->visual_ad_grid_cells = results.data();
     result->visual_ad_count = results.size();
 #endif // EI_CLASSIFIER_HAS_VISUAL_ANOMALY
@@ -1886,6 +1888,22 @@ bool find_mtx_by_idx(ei_feature_t* mtx, ei::matrix_t** matrix, uint32_t mtx_id,
     }
     return false;
 }
+
+size_t get_feature_size(ei_feature_t* mtx, uint32_t ids_size, uint32_t* ids, size_t mtx_size) {
+    size_t feat_size = 0;
+    ei::matrix_t* matrix = NULL;
+    for (size_t i = 0; i < ids_size; i++) {
+        size_t cur_mtx = ids[i];
+
+        if (!find_mtx_by_idx(mtx, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return -1;
+        }
+        feat_size += matrix->rows * matrix->cols;
+    }
+    return feat_size;
+}
+
 #endif
 
 #endif // _EI_CLASSIFIER_FILL_RESULT_STRUCT_H_
@@ -204,7 +204,7 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
     // get list of available devices
     py::list devices = akida.attr("devices")();
     if(devices.empty() == true) {
-        ei_printf("ERR: AKD1000 device not found!\n");
+        ei_printf("ERR: Akida device not found!\n");
         return false;
     }
 
@@ -215,16 +215,18 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
     else {
         device = devices[0];
     }
-    //TODO: check if selected device is correct (compare versions)
-    // enable power measurement
-    device.attr("soc").attr("power_measurement_enabled") = true;
+
+    // TODO: check if selected device is correct (compare versions)
+    // power measurement not avaliable on akida1500, commenting out for now
+    //device.attr("soc").attr("power_measurement_enabled") = true;
+
 
     // map model to the device
     try {
         model.attr("map")(device);
     }
     catch (py::error_already_set &e) {
-        ei_printf("ERR: Can't load the ML model onto the AKD1000 SoC\n");
+        ei_printf("ERR: Can't load the ML model onto the Akida SoC\n");
         ei_printf("ERR: %s\n", e.what());
         return false;
     }
@@ -283,6 +285,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     void *config_ptr,
     bool debug)
 {
+
     ei_learning_block_config_tflite_graph_t *block_config = ((ei_learning_block_config_tflite_graph_t*)config_ptr);
     ei_config_tflite_graph_t *graph_config = ((ei_config_tflite_graph_t*)block_config->graph_config);
 
@@ -340,7 +343,7 @@ EI_IMPULSE_ERROR run_nn_inference(
         }
     }
 
-    // Run inference on AKD1000
+    // Run inference on Akida
     uint64_t ctx_start_us = ei_read_timer_us();
     py::array_t<float> potentials;
     try {
@@ -358,7 +361,7 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     if(debug) {
         std::string ret_str = py::str(potentials).cast<std::string>();
-        ei_printf("AKD1000 raw output:\n%s\n", ret_str.c_str());
+        ei_printf("Akida raw output:\n%s\n", ret_str.c_str());
     }
 
     // convert to vector of floats to make further processing much easier
@@ -392,14 +395,16 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     float active_power = 0;
 #if (defined(EI_CLASSIFIER_USE_AKIDA_HARDWARE))
+    // the ADK1500 does not have power measurements, commenting out for now
+    // TODO: check betweewn Akida1000 and Akida1500 or reanble when available
     // power measurement post-processing
-    float floor_power = device.attr("soc").attr("power_meter").attr("floor").cast<float>();
-    py::array pwr_events = device.attr("soc").attr("power_meter").attr("events")();
-    auto events = pwr_events.mutable_unchecked<py::object>();
-    for (py::ssize_t i = 0; i < events.shape(0); i++) {
-        active_power += events(i).attr("power").cast<float>();
-    }
-    active_power = (active_power/pwr_events.size()) - floor_power;
+    //float floor_power = device.attr("soc").attr("power_meter").attr("floor").cast<float>();
+    //py::array pwr_events = device.attr("soc").attr("power_meter").attr("events")();
+    //auto events = pwr_events.mutable_unchecked<py::object>();
+    //for (py::ssize_t i = 0; i < events.shape(0); i++) {
+    //    active_power += events(i).attr("power").cast<float>();
+    //}
+    //active_power = (active_power/pwr_events.size()) - floor_power;
 #endif
 
     result->timing.classification_us = ctx_end_us - ctx_start_us;
 
@@ -253,7 +253,9 @@ EI_IMPULSE_ERROR run_nn_inference(
     size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
     ei::matrix_t* matrix = NULL;
 
-    ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size);
+    size_t combined_matrix_size = get_feature_size(fmatrix, input_block_ids_size, input_block_ids, mtx_size);
+    ei::matrix_t combined_matrix(1, combined_matrix_size);
+
     uint32_t buf_pos = 0;
 
     for (size_t i = 0; i < input_block_ids_size; i++) {
 
@@ -194,6 +194,7 @@ EI_IMPULSE_ERROR run_nn_inference_tflite_full(
 #include <string>
 #include <filesystem>
 #include <stdlib.h>
+#include <map>
 #include "tflite/linux-jetson-nano/libeitrt.h"
 
 #if __APPLE__
@@ -202,7 +203,8 @@ EI_IMPULSE_ERROR run_nn_inference_tflite_full(
 #include <linux/limits.h>
 #endif
 
-EiTrt *ei_trt_handle = NULL;
+EiTrt* ei_trt_handle;
+std::map<int,bool> ei_trt_models_init;
 
 inline bool file_exists(char *model_file_name)
 {
@@ -217,6 +219,7 @@ inline bool file_exists(char *model_file_name)
 
 EI_IMPULSE_ERROR write_model_to_file(
     const ei_impulse_t *impulse,
+    uint32_t learn_block_index,
     char *model_file_name,
     const unsigned char *model,
     size_t model_size,
@@ -251,20 +254,22 @@ EI_IMPULSE_ERROR write_model_to_file(
         snprintf(
             model_file_name,
             PATH_MAX,
-            "/tmp/ei-%d-%d.engine",
+            "/tmp/ei-%d-%d-%d.engine",
             impulse->project_id,
-            impulse->deploy_version);
+            impulse->deploy_version,
+            impulse->learning_blocks[learn_block_index].blockId);
     }
     else {
         std::filesystem::path p(current_exe_path);
         snprintf(
             model_file_name,
             PATH_MAX,
-            "%s/%s-project%d-v%d.engine",
+            "%s/%s-project%d-v%d-%d.engine",
             p.parent_path().c_str(),
             p.stem().c_str(),
             impulse->project_id,
-            impulse->deploy_version);
+            impulse->deploy_version,
+            impulse->learning_blocks[learn_block_index].blockId);
     }
 
     bool fexists = file_exists(model_file_name);
@@ -323,30 +328,43 @@ EI_IMPULSE_ERROR run_nn_inference(
     #error "TensorRT requires an unquantized network"
     #endif
 
-    static bool first_run = true;
     static char model_file_name[PATH_MAX];
-    if (first_run) {
-        write_model_to_file(impulse, model_file_name, graph_config->model, graph_config->model_size);
-        first_run = false;
-    }
+    // writes the model file to filesystem (if and only it doesn't exist)
+    write_model_to_file(impulse, learn_block_index, model_file_name, graph_config->model, graph_config->model_size);
 
-    float *out_data = (float*)ei_malloc(impulse->tflite_output_features_count * sizeof(float));
-    if (out_data == nullptr) {
-        ei_printf("ERR: Cannot allocate memory for output data \n");
+    // create context for building and executing TensorRT engine(s)
+    if (ei_trt_handle == nullptr) {
+        ei_trt_handle = libeitrt::create_EiTrt(debug);
+        libeitrt::setMaxWorkspaceSize(ei_trt_handle, 1<<29); // 512 MB
+
+        if (debug) {
+           ei_printf("Using EI TensorRT lib v%d.%d.%d\r\n", libeitrt::getMajorVersion(ei_trt_handle),
+                     libeitrt::getMinorVersion(ei_trt_handle), libeitrt::getPatchVersion(ei_trt_handle));
+        }
     }
 
-    // lazy initialize tensorRT context
-    if (ei_trt_handle == nullptr) {
-        ei_trt_handle = libeitrt::create_EiTrt(model_file_name, debug);
+    // lazy initialize TensorRT models and warmup only once per model
+    if (ei_trt_models_init.count(learn_block_index) <= 0) {
+        libeitrt::build(ei_trt_handle, learn_block_index, model_file_name);
+        libeitrt::warmUp(ei_trt_handle, learn_block_index, 200);
+        ei_trt_models_init[learn_block_index] = true;
     }
 
+    int input_size = libeitrt::getInputSize(ei_trt_handle, learn_block_index);
+    int output_size = libeitrt::getOutputSize(ei_trt_handle, learn_block_index);
+
 #if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
     size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
     ei::matrix_t* matrix = NULL;
 
-    ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size);
-    uint32_t buf_pos = 0;
+    size_t combined_matrix_size = get_feature_size(fmatrix, input_block_ids_size, input_block_ids, mtx_size);
+    if ((input_size >= 0) && ((size_t)input_size != combined_matrix_size)) {
+        ei_printf("ERR: Invalid input features size, %ld given (expected: %d)\n", combined_matrix_size, input_size);
+        return EI_IMPULSE_INVALID_SIZE;
+    }
+    ei::matrix_t combined_matrix(1, combined_matrix_size);
 
+    uint32_t buf_pos = 0;
     for (size_t i = 0; i < input_block_ids_size; i++) {
         size_t cur_mtx = input_block_ids[i];
 
@@ -364,26 +382,38 @@ EI_IMPULSE_ERROR run_nn_inference(
     ei::matrix_t* matrix = fmatrix[0].matrix;
 #endif
 
-    uint64_t ctx_start_us = ei_read_timer_us();
+    // copy input data to gpu
+    libeitrt::copyInputToDevice(ei_trt_handle, learn_block_index, matrix->buffer,
+                                input_size * sizeof(float));
 
-    libeitrt::infer(ei_trt_handle, matrix->buffer, out_data, impulse->tflite_output_features_count);
+    libeitrt::infer(ei_trt_handle, learn_block_index);
 
-    uint64_t ctx_end_us = ei_read_timer_us();
+    float *out_data = (float*)ei_malloc(output_size * sizeof(float));
+    if (out_data == nullptr) {
+        ei_printf("ERR: Cannot allocate memory for output data \n");
+        return EI_IMPULSE_ALLOC_FAILED;
+    }
 
-    result->timing.classification_us = ctx_end_us - ctx_start_us;
+    // copy output data from gpu
+    libeitrt::copyOutputToHost(ei_trt_handle, learn_block_index, out_data,
+                               output_size * sizeof(float));
+
+
+    // get inference time
+    result->timing.classification_us = libeitrt::getInferenceUs(ei_trt_handle, learn_block_index);
     result->timing.classification = (int)(result->timing.classification_us / 1000);
 
     if (result->copy_output) {
         matrix_t *output_matrix = fmatrix[impulse->dsp_blocks_size + learn_block_index].matrix;
         const size_t matrix_els = output_matrix->rows * output_matrix->cols;
 
-        if (impulse->tflite_output_features_count != matrix_els) {
-                ei_printf("ERR: output tensor has size %d, but input matrix has has size %d\n",
-                    impulse->tflite_output_features_count, (int)matrix_els);
+        if ((output_size >= 0) && ((size_t)output_size != matrix_els)) {
+                ei_printf("ERR: output tensor has size %d, but input matrix has size %d\n",
+                    output_size, (int)matrix_els);
                 ei_free(out_data);
                 return EI_IMPULSE_INVALID_SIZE;
         }
-        memcpy(output_matrix->buffer, out_data, impulse->tflite_output_features_count * sizeof(float));
+        memcpy(output_matrix->buffer, out_data, output_size * sizeof(float));
         ei_free(out_data);
         return EI_IMPULSE_OK;
     }
@@ -445,6 +475,16 @@ EI_IMPULSE_ERROR run_nn_inference(
                     debug);
                 break;
             }
+            case EI_CLASSIFIER_LAST_LAYER_YOLO_PRO: {
+                fill_res = fill_result_struct_f32_yolo_pro(
+                    impulse,
+                    block_config,
+                    result,
+                    out_data,
+                    impulse->tflite_output_features_count,
+                    debug);
+                break;
+            }
             default: {
                 ei_printf(
                     "ERR: Unsupported object detection last layer (%d)\n",
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-Unless specifically indicated otherwise in a file, files are licensed under the BSD 3-Clause Clear license, as can be found in: LICENSE.bsd`
	`1`	`+Unless specifically indicated otherwise in a file, files are licensed under the BSD 3-Clause Clear license, as can be found in: LICENSE.3-clause-bsd-clear`
`2`	`2`
`3`	`3`	`Folders containing third party code and libraries are listed below. Each folder contains its own license specified for its files.`
`4`	`4`