Skip to content

Commit 15fd14e

Browse files
committed
SDK version: v1.71.22
1 parent 9f2b818 commit 15fd14e

25 files changed

+26008
-288
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Unless specifically indicated otherwise in a file, files are licensed under the BSD 3-Clause Clear license, as can be found in: LICENSE.bsd
1+
Unless specifically indicated otherwise in a file, files are licensed under the BSD 3-Clause Clear license, as can be found in: LICENSE.3-clause-bsd-clear
22

33
Folders containing third party code and libraries are listed below. Each folder contains its own license specified for its files.
44

classifier/ei_fill_result_struct.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_visual_ad_struct_f32
474474
}
475475
}
476476

477+
result->classification[0].value = result->visual_ad_result.max_value;
478+
477479
result->visual_ad_grid_cells = results.data();
478480
result->visual_ad_count = results.size();
479481
#endif // EI_CLASSIFIER_HAS_VISUAL_ANOMALY
@@ -1886,6 +1888,22 @@ bool find_mtx_by_idx(ei_feature_t* mtx, ei::matrix_t** matrix, uint32_t mtx_id,
18861888
}
18871889
return false;
18881890
}
1891+
1892+
size_t get_feature_size(ei_feature_t* mtx, uint32_t ids_size, uint32_t* ids, size_t mtx_size) {
1893+
size_t feat_size = 0;
1894+
ei::matrix_t* matrix = NULL;
1895+
for (size_t i = 0; i < ids_size; i++) {
1896+
size_t cur_mtx = ids[i];
1897+
1898+
if (!find_mtx_by_idx(mtx, &matrix, cur_mtx, mtx_size)) {
1899+
ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
1900+
return -1;
1901+
}
1902+
feat_size += matrix->rows * matrix->cols;
1903+
}
1904+
return feat_size;
1905+
}
1906+
18891907
#endif
18901908

18911909
#endif // _EI_CLASSIFIER_FILL_RESULT_STRUCT_H_

classifier/inferencing_engines/akida.h

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
204204
// get list of available devices
205205
py::list devices = akida.attr("devices")();
206206
if(devices.empty() == true) {
207-
ei_printf("ERR: AKD1000 device not found!\n");
207+
ei_printf("ERR: Akida device not found!\n");
208208
return false;
209209
}
210210

@@ -215,16 +215,18 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
215215
else {
216216
device = devices[0];
217217
}
218-
//TODO: check if selected device is correct (compare versions)
219-
// enable power measurement
220-
device.attr("soc").attr("power_measurement_enabled") = true;
218+
219+
// TODO: check if selected device is correct (compare versions)
220+
// power measurement not avaliable on akida1500, commenting out for now
221+
//device.attr("soc").attr("power_measurement_enabled") = true;
222+
221223

222224
// map model to the device
223225
try {
224226
model.attr("map")(device);
225227
}
226228
catch (py::error_already_set &e) {
227-
ei_printf("ERR: Can't load the ML model onto the AKD1000 SoC\n");
229+
ei_printf("ERR: Can't load the ML model onto the Akida SoC\n");
228230
ei_printf("ERR: %s\n", e.what());
229231
return false;
230232
}
@@ -283,6 +285,7 @@ EI_IMPULSE_ERROR run_nn_inference(
283285
void *config_ptr,
284286
bool debug)
285287
{
288+
286289
ei_learning_block_config_tflite_graph_t *block_config = ((ei_learning_block_config_tflite_graph_t*)config_ptr);
287290
ei_config_tflite_graph_t *graph_config = ((ei_config_tflite_graph_t*)block_config->graph_config);
288291

@@ -340,7 +343,7 @@ EI_IMPULSE_ERROR run_nn_inference(
340343
}
341344
}
342345

343-
// Run inference on AKD1000
346+
// Run inference on Akida
344347
uint64_t ctx_start_us = ei_read_timer_us();
345348
py::array_t<float> potentials;
346349
try {
@@ -358,7 +361,7 @@ EI_IMPULSE_ERROR run_nn_inference(
358361

359362
if(debug) {
360363
std::string ret_str = py::str(potentials).cast<std::string>();
361-
ei_printf("AKD1000 raw output:\n%s\n", ret_str.c_str());
364+
ei_printf("Akida raw output:\n%s\n", ret_str.c_str());
362365
}
363366

364367
// convert to vector of floats to make further processing much easier
@@ -392,14 +395,16 @@ EI_IMPULSE_ERROR run_nn_inference(
392395

393396
float active_power = 0;
394397
#if (defined(EI_CLASSIFIER_USE_AKIDA_HARDWARE))
398+
// the ADK1500 does not have power measurements, commenting out for now
399+
// TODO: check betweewn Akida1000 and Akida1500 or reanble when available
395400
// power measurement post-processing
396-
float floor_power = device.attr("soc").attr("power_meter").attr("floor").cast<float>();
397-
py::array pwr_events = device.attr("soc").attr("power_meter").attr("events")();
398-
auto events = pwr_events.mutable_unchecked<py::object>();
399-
for (py::ssize_t i = 0; i < events.shape(0); i++) {
400-
active_power += events(i).attr("power").cast<float>();
401-
}
402-
active_power = (active_power/pwr_events.size()) - floor_power;
401+
//float floor_power = device.attr("soc").attr("power_meter").attr("floor").cast<float>();
402+
//py::array pwr_events = device.attr("soc").attr("power_meter").attr("events")();
403+
//auto events = pwr_events.mutable_unchecked<py::object>();
404+
//for (py::ssize_t i = 0; i < events.shape(0); i++) {
405+
// active_power += events(i).attr("power").cast<float>();
406+
//}
407+
//active_power = (active_power/pwr_events.size()) - floor_power;
403408
#endif
404409

405410
result->timing.classification_us = ctx_end_us - ctx_start_us;

classifier/inferencing_engines/memryx.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,9 @@ EI_IMPULSE_ERROR run_nn_inference(
253253
size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
254254
ei::matrix_t* matrix = NULL;
255255

256-
ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size);
256+
size_t combined_matrix_size = get_feature_size(fmatrix, input_block_ids_size, input_block_ids, mtx_size);
257+
ei::matrix_t combined_matrix(1, combined_matrix_size);
258+
257259
uint32_t buf_pos = 0;
258260

259261
for (size_t i = 0; i < input_block_ids_size; i++) {

classifier/inferencing_engines/tensorrt.h

Lines changed: 66 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ EI_IMPULSE_ERROR run_nn_inference_tflite_full(
194194
#include <string>
195195
#include <filesystem>
196196
#include <stdlib.h>
197+
#include <map>
197198
#include "tflite/linux-jetson-nano/libeitrt.h"
198199

199200
#if __APPLE__
@@ -202,7 +203,8 @@ EI_IMPULSE_ERROR run_nn_inference_tflite_full(
202203
#include <linux/limits.h>
203204
#endif
204205

205-
EiTrt *ei_trt_handle = NULL;
206+
EiTrt* ei_trt_handle;
207+
std::map<int,bool> ei_trt_models_init;
206208

207209
inline bool file_exists(char *model_file_name)
208210
{
@@ -217,6 +219,7 @@ inline bool file_exists(char *model_file_name)
217219

218220
EI_IMPULSE_ERROR write_model_to_file(
219221
const ei_impulse_t *impulse,
222+
uint32_t learn_block_index,
220223
char *model_file_name,
221224
const unsigned char *model,
222225
size_t model_size,
@@ -251,20 +254,22 @@ EI_IMPULSE_ERROR write_model_to_file(
251254
snprintf(
252255
model_file_name,
253256
PATH_MAX,
254-
"/tmp/ei-%d-%d.engine",
257+
"/tmp/ei-%d-%d-%d.engine",
255258
impulse->project_id,
256-
impulse->deploy_version);
259+
impulse->deploy_version,
260+
impulse->learning_blocks[learn_block_index].blockId);
257261
}
258262
else {
259263
std::filesystem::path p(current_exe_path);
260264
snprintf(
261265
model_file_name,
262266
PATH_MAX,
263-
"%s/%s-project%d-v%d.engine",
267+
"%s/%s-project%d-v%d-%d.engine",
264268
p.parent_path().c_str(),
265269
p.stem().c_str(),
266270
impulse->project_id,
267-
impulse->deploy_version);
271+
impulse->deploy_version,
272+
impulse->learning_blocks[learn_block_index].blockId);
268273
}
269274

270275
bool fexists = file_exists(model_file_name);
@@ -323,30 +328,43 @@ EI_IMPULSE_ERROR run_nn_inference(
323328
#error "TensorRT requires an unquantized network"
324329
#endif
325330

326-
static bool first_run = true;
327331
static char model_file_name[PATH_MAX];
328-
if (first_run) {
329-
write_model_to_file(impulse, model_file_name, graph_config->model, graph_config->model_size);
330-
first_run = false;
331-
}
332+
// writes the model file to filesystem (if and only it doesn't exist)
333+
write_model_to_file(impulse, learn_block_index, model_file_name, graph_config->model, graph_config->model_size);
332334

333-
float *out_data = (float*)ei_malloc(impulse->tflite_output_features_count * sizeof(float));
334-
if (out_data == nullptr) {
335-
ei_printf("ERR: Cannot allocate memory for output data \n");
335+
// create context for building and executing TensorRT engine(s)
336+
if (ei_trt_handle == nullptr) {
337+
ei_trt_handle = libeitrt::create_EiTrt(debug);
338+
libeitrt::setMaxWorkspaceSize(ei_trt_handle, 1<<29); // 512 MB
339+
340+
if (debug) {
341+
ei_printf("Using EI TensorRT lib v%d.%d.%d\r\n", libeitrt::getMajorVersion(ei_trt_handle),
342+
libeitrt::getMinorVersion(ei_trt_handle), libeitrt::getPatchVersion(ei_trt_handle));
343+
}
336344
}
337345

338-
// lazy initialize tensorRT context
339-
if (ei_trt_handle == nullptr) {
340-
ei_trt_handle = libeitrt::create_EiTrt(model_file_name, debug);
346+
// lazy initialize TensorRT models and warmup only once per model
347+
if (ei_trt_models_init.count(learn_block_index) <= 0) {
348+
libeitrt::build(ei_trt_handle, learn_block_index, model_file_name);
349+
libeitrt::warmUp(ei_trt_handle, learn_block_index, 200);
350+
ei_trt_models_init[learn_block_index] = true;
341351
}
342352

353+
int input_size = libeitrt::getInputSize(ei_trt_handle, learn_block_index);
354+
int output_size = libeitrt::getOutputSize(ei_trt_handle, learn_block_index);
355+
343356
#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
344357
size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
345358
ei::matrix_t* matrix = NULL;
346359

347-
ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size);
348-
uint32_t buf_pos = 0;
360+
size_t combined_matrix_size = get_feature_size(fmatrix, input_block_ids_size, input_block_ids, mtx_size);
361+
if ((input_size >= 0) && ((size_t)input_size != combined_matrix_size)) {
362+
ei_printf("ERR: Invalid input features size, %ld given (expected: %d)\n", combined_matrix_size, input_size);
363+
return EI_IMPULSE_INVALID_SIZE;
364+
}
365+
ei::matrix_t combined_matrix(1, combined_matrix_size);
349366

367+
uint32_t buf_pos = 0;
350368
for (size_t i = 0; i < input_block_ids_size; i++) {
351369
size_t cur_mtx = input_block_ids[i];
352370

@@ -364,26 +382,38 @@ EI_IMPULSE_ERROR run_nn_inference(
364382
ei::matrix_t* matrix = fmatrix[0].matrix;
365383
#endif
366384

367-
uint64_t ctx_start_us = ei_read_timer_us();
385+
// copy input data to gpu
386+
libeitrt::copyInputToDevice(ei_trt_handle, learn_block_index, matrix->buffer,
387+
input_size * sizeof(float));
368388

369-
libeitrt::infer(ei_trt_handle, matrix->buffer, out_data, impulse->tflite_output_features_count);
389+
libeitrt::infer(ei_trt_handle, learn_block_index);
370390

371-
uint64_t ctx_end_us = ei_read_timer_us();
391+
float *out_data = (float*)ei_malloc(output_size * sizeof(float));
392+
if (out_data == nullptr) {
393+
ei_printf("ERR: Cannot allocate memory for output data \n");
394+
return EI_IMPULSE_ALLOC_FAILED;
395+
}
372396

373-
result->timing.classification_us = ctx_end_us - ctx_start_us;
397+
// copy output data from gpu
398+
libeitrt::copyOutputToHost(ei_trt_handle, learn_block_index, out_data,
399+
output_size * sizeof(float));
400+
401+
402+
// get inference time
403+
result->timing.classification_us = libeitrt::getInferenceUs(ei_trt_handle, learn_block_index);
374404
result->timing.classification = (int)(result->timing.classification_us / 1000);
375405

376406
if (result->copy_output) {
377407
matrix_t *output_matrix = fmatrix[impulse->dsp_blocks_size + learn_block_index].matrix;
378408
const size_t matrix_els = output_matrix->rows * output_matrix->cols;
379409

380-
if (impulse->tflite_output_features_count != matrix_els) {
381-
ei_printf("ERR: output tensor has size %d, but input matrix has has size %d\n",
382-
impulse->tflite_output_features_count, (int)matrix_els);
410+
if ((output_size >= 0) && ((size_t)output_size != matrix_els)) {
411+
ei_printf("ERR: output tensor has size %d, but input matrix has size %d\n",
412+
output_size, (int)matrix_els);
383413
ei_free(out_data);
384414
return EI_IMPULSE_INVALID_SIZE;
385415
}
386-
memcpy(output_matrix->buffer, out_data, impulse->tflite_output_features_count * sizeof(float));
416+
memcpy(output_matrix->buffer, out_data, output_size * sizeof(float));
387417
ei_free(out_data);
388418
return EI_IMPULSE_OK;
389419
}
@@ -445,6 +475,16 @@ EI_IMPULSE_ERROR run_nn_inference(
445475
debug);
446476
break;
447477
}
478+
case EI_CLASSIFIER_LAST_LAYER_YOLO_PRO: {
479+
fill_res = fill_result_struct_f32_yolo_pro(
480+
impulse,
481+
block_config,
482+
result,
483+
out_data,
484+
impulse->tflite_output_features_count,
485+
debug);
486+
break;
487+
}
448488
default: {
449489
ei_printf(
450490
"ERR: Unsupported object detection last layer (%d)\n",

0 commit comments

Comments
 (0)