1
- /* Edge Impulse inferencing library
2
- * Copyright (c) 2020 EdgeImpulse Inc.
1
+ /* The Clear BSD License
3
2
*
4
- * Permission is hereby granted, free of charge, to any person obtaining a copy
5
- * of this software and associated documentation files (the "Software"), to deal
6
- * in the Software without restriction, including without limitation the rights
7
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- * copies of the Software, and to permit persons to whom the Software is
9
- * furnished to do so, subject to the following conditions:
3
+ * Copyright (c) 2025 EdgeImpulse Inc.
4
+ * All rights reserved.
10
5
*
11
- * The above copyright notice and this permission notice shall be included in
12
- * all copies or substantial portions of the Software.
6
+ * Redistribution and use in source and binary forms, with or without
7
+ * modification, are permitted (subject to the limitations in the disclaimer
8
+ * below) provided that the following conditions are met:
13
9
*
14
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
- * SOFTWARE.
10
+ * * Redistributions of source code must retain the above copyright notice,
11
+ * this list of conditions and the following disclaimer.
12
+ *
13
+ * * Redistributions in binary form must reproduce the above copyright
14
+ * notice, this list of conditions and the following disclaimer in the
15
+ * documentation and/or other materials provided with the distribution.
16
+ *
17
+ * * Neither the name of the copyright holder nor the names of its
18
+ * contributors may be used to endorse or promote products derived from this
19
+ * software without specific prior written permission.
20
+ *
21
+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
22
+ * THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
23
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
+ * POSSIBILITY OF SUCH DAMAGE.
21
33
*/
22
34
23
35
#pragma once
@@ -31,32 +43,163 @@ namespace libeitrt
31
43
{
32
44
33
45
/* *
34
- * @brief Creates and initializes an inference engine for TensorRT.
35
- * If the engine has already been created from the provided file path, then
36
- * the engine is loaded from disk.
37
- *
38
- * The engine is then persisted via the EiTrt object until it is deleted,
39
- * to provide for fastest inference with lowest overhead
46
+ * @brief Creates and initializes a context for building and running TensorRT models.
47
+ *
48
+ * The models genenerated (or managed) from this context is then persisted via the EiTrt
49
+ * object until it is deleted, to provide for fastest inference with lowest
50
+ * overhead.
40
51
*
41
52
* WARNING: This function leaks..the handle can not be deleted b/c of forward declaration
42
53
* The fix for this is to define an interface (virtual class) that has a virtual destructor
43
54
* And also the infer function (although this way is more C friendly!)
44
55
* My bad...should have done that from get go.
45
56
*
57
+ * @param debug enable debug if true, disable otherwise.
58
+ * @return std::unique_ptr<EiTrt> EiTrt handle. Contained ptr is NULL if error
59
+ */
60
+ EiTrt* create_EiTrt (bool debug);
61
+
62
+ /* *
63
+ * @brief Builds and initializes an inference engine for TensorRT.
64
+ * If the engine has already been created from the provided file path, then
65
+ * the engine is loaded from disk.
66
+ *
67
+ * The engine is then persisted via the EiTrt object until it is deleted,
68
+ * to provide for fastest inference with lowest overhead
69
+ *
70
+ * @param ei_trt_handle EI TensorRT context.
71
+ * @param model_id an index to associate with the model.
46
72
* @param model_file_name Model file path.
47
73
* Should have hash appended so that engines are regenerated when models change!
48
- * @return std::unique_ptr<EiTrt> EiTrt handle. Contained ptr is NULL if error
74
+ * @return true if building (or loading) the TensorRT model was successful.
75
+ */
76
+ bool build (EiTrt* ei_trt_handle, int model_id, const char *model_file_name);
77
+
78
+ /* *
79
+ * @brief Warms up the model on the GPU for given warm_up_ms ms.
80
+ *
81
+ * @param ei_trt_handle EI TensorRT context.
82
+ * @param model_id a reference to the model to work on.
83
+ * @param warm_up_ms the duration to loop and run inference.
84
+ * @return true if warming up the model was successful.
85
+ */
86
+ bool warmUp (EiTrt* ei_trt_handle, int model_id, int warm_up_ms);
87
+
88
+ /* *
89
+ * @brief Copies input to the GPU (from CPU) for inference for model_id.
90
+ *
91
+ * @param ei_trt_handle EI TensorRT context.
92
+ * @param model_id a reference to the model to work on.
93
+ * @param input a pointer to the (float) input
94
+ * @param size the number of bytes to copy from the input
95
+ * @return true if copying the input was successful.
49
96
*/
50
- EiTrt* create_EiTrt ( const char * model_file_name, bool debug );
97
+ bool copyInputToDevice ( EiTrt* ei_trt_handle, int model_id, float * input, int size );
51
98
52
99
/* *
53
100
* @brief Perform inference
54
- *
55
- * @param ei_trt_handle Created handle to inference engine
56
- * @param[in] input Input features (buffer member of ei_matrix)
57
- * @param[out] output Buffer to write output to
58
- * @param output_size Buffer size
101
+ *
102
+ * @param ei_trt_handle EI TensorRT context.
59
103
* @return int 0 on success, <0 otherwise
60
104
*/
61
- int infer (EiTrt* ei_trt_handle, float * input, float * output, int output_size);
105
+ int infer (EiTrt* ei_trt_handle, int model_id);
106
+
107
+ /* *
108
+ * @brief Copies output to the CPU (from GPU) after inference from model_id.
109
+ *
110
+ * @param ei_trt_handle EI TensorRT context.
111
+ * @param model_id a reference to the model to work on.
112
+ * @param output a pointer to the (float) output
113
+ * @param size the amount of bytes to copy from the output
114
+ * @return true if copying the output was successful.
115
+ */
116
+ bool copyOutputToHost (EiTrt* ei_trt_handle, int model_id, float * output, int size);
117
+
118
+ /* *
119
+ * @brief Configures the maximum workspace that may be allocated
120
+ *
121
+ * @param ei_trt_handle EI TensorRT context.
122
+ * @param size workspace size in bytes.
123
+ */
124
+ void setMaxWorkspaceSize (EiTrt *ei_trt_handle, int size);
125
+
126
+ /* *
127
+ * @brief Returns the current configured maximum workspace size.
128
+ *
129
+ * @param ei_trt_handle EI TensorRT context.
130
+ * @return the size of the workspace in bytes.
131
+ */
132
+ int getMaxWorkspaceSize (EiTrt *ei_trt_handle);
133
+
134
+ /* *
135
+ * @brief Returns the input size (in features) of model_id.
136
+ *
137
+ * @param ei_trt_handle EI TensorRT context.
138
+ * @param model_id a reference to the model to work on.
139
+ * @return the input size (in features).
140
+ */
141
+ int getInputSize (EiTrt* ei_trt_handle, int model_id);
142
+
143
+ /* *
144
+ * @brief Returns the output size (in features) of model_id.
145
+ *
146
+ * @param ei_trt_handle EI TensorRT context.
147
+ * @param model_id a reference to the model to work on.
148
+ * @return the output size (in features).
149
+ */
150
+ int getOutputSize (EiTrt* ei_trt_handle, int model_id);
151
+
152
+ /* *
153
+ * @brief Returns the latest inference latency in ms for model with id
154
+ * (model_id) and context (ei_trt_handle).
155
+ *
156
+ * @param ei_trt_handle EI TensorRT context.
157
+ * @param model_id a reference to the model to work on.
158
+ * @return the inference time in ms.
159
+ **/
160
+ uint64_t getInferenceMs (EiTrt* ei_trt_handle, int model_id);
161
+
162
+ /* *
163
+ * @brief Returns the latest inference latency in us for model with id
164
+ * (model_id) and context (ei_trt_handle).
165
+ *
166
+ * @param ei_trt_handle EI TensorRT context.
167
+ * @param model_id a reference to the model to work on.
168
+ * @return the inference time in us.
169
+ **/
170
+ uint64_t getInferenceUs (EiTrt* ei_trt_handle, int model_id);
171
+
172
+ /* *
173
+ * @brief Returns the latest inference latency in ns for model with id
174
+ * (model_id) and context (ei_trt_handle).
175
+ *
176
+ * @param ei_trt_handle EI TensorRT context.
177
+ * @param model_id a reference to the model to work on.
178
+ * @return the inference time in ns.
179
+ **/
180
+ uint64_t getInferenceNs (EiTrt* ei_trt_handle, int model_id);
181
+
182
+ /* *
183
+ * @brief Returns the current library major version
184
+ *
185
+ * @param ei_trt_handle EI TensorRT context.
186
+ * @return the library's major version.
187
+ **/
188
+ int getMajorVersion (EiTrt *ei_trt_handle);
189
+
190
+ /* *
191
+ * @brief Returns the current library minor version
192
+ *
193
+ * @param ei_trt_handle EI TensorRT context.
194
+ * @return the library's minor version.
195
+ **/
196
+ int getMinorVersion (EiTrt *ei_trt_handle);
197
+
198
+ /* *
199
+ * @brief Returns the current library patch version
200
+ *
201
+ * @param ei_trt_handle EI TensorRT context.
202
+ * @return the library's patch version.
203
+ **/
204
+ int getPatchVersion (EiTrt *ei_trt_handle);
62
205
}
0 commit comments