11
11
*/
12
12
13
13
#include < cstring>
14
- #include < memory>
15
- #include < vector>
16
14
17
15
#include < executorch/runtime/backend/interface.h>
18
16
#include < executorch/runtime/core/error.h>
19
17
#include < executorch/runtime/core/evalue.h>
20
18
19
+ #include < executorch/backends/arm/runtime/VelaBinStream.h>
20
+
21
21
#include < ethosu_driver.h>
22
22
#include < pmu_ethosu.h>
23
23
@@ -52,29 +52,14 @@ class ArmBackend final : public PyTorchBackendInterface {
52
52
53
53
char * data = (char *)processed->data ();
54
54
size_t size = processed->size ();
55
- char * foot = data + size - 16 ;
55
+ char * foot = data + size - sizeof (VelaBinBlock) ;
56
56
57
- // Header and footer both 16 bit aligned suggest valid structure and we
58
- // wont walk off the end of the chunks and segfault
59
- if (!((int )data == next_mul_16 ((uintptr_t )data))) {
60
- ET_LOG (Error, " ArmBackend::init: Binary needs to be 16 byte unaligned" );
61
- return Error::InvalidProgram;
62
- }
63
- if (!((int )foot == next_mul_16 ((uintptr_t )foot))) {
64
- ET_LOG (Error, " ArmBackend::init: Footer expected to be 16 byte aligned" );
65
- ET_LOG (
66
- Error,
67
- " ArmBackend::init: Program expected to be multiple of 16 bytes" );
68
- return Error::InvalidProgram;
69
- }
70
- if (!(0 == strncmp (data, " vela_bin_stream" , 15 ))) {
71
- ET_LOG (Error, " ArmBackend::init: Binary passed is not a vela_bin_stream" );
72
- return Error::InvalidProgram;
73
- }
74
- if (!(0 == strncmp (foot, " vela_end_stream" , 15 ))) {
75
- ET_LOG (Error, " ArmBackend::init: Binary passed missing vela_end_stream" );
57
+ // Verify format of vela_bin
58
+ if (vela_bin_validate (data, size) == false ) {
59
+ ET_LOG (Error, " Malformed vela_bin_stream found" );
76
60
return Error::InvalidProgram;
77
61
}
62
+
78
63
// Verify address range is accessible current expectation is the program
79
64
// is wholly stored in SRAM
80
65
// TODO: expect to improve capabilities here by supporting DRAM storage
@@ -108,7 +93,7 @@ class ArmBackend final : public PyTorchBackendInterface {
108
93
char * data = (char *)processed->data ();
109
94
110
95
// Read key sections from the vela_bin_stream
111
- if (! this -> vela_read (data, &handles, processed->size ())) {
96
+ if (vela_bin_read (data, &handles, processed->size ()) == false ) {
112
97
ET_LOG (Error, " ArmBackend::vela_read: error, invalid binary layout" );
113
98
return Error::InvalidProgram;
114
99
}
@@ -124,8 +109,9 @@ class ArmBackend final : public PyTorchBackendInterface {
124
109
handles.scratch_data_size );
125
110
126
111
// Write inputs into SRAM scratch area defined by Vela
127
- for (int i = 0 ; i < handles.input_shapes .size (); i++) {
128
- const char * input_addr = handles.scratch_data + handles.input_offset [i];
112
+ for (int i = 0 ; i < handles.inputs ->count ; i++) {
113
+ const char * input_addr =
114
+ handles.scratch_data + handles.inputs ->io [i].offset ;
129
115
// Process input EValue into scratch
130
116
// TODO: Optimise into direct write from Vela into the SRAM or DRAM output
131
117
// for compatible data layouts.
@@ -168,25 +154,17 @@ class ArmBackend final : public PyTorchBackendInterface {
168
154
return Error::InvalidProgram;
169
155
}
170
156
171
- // output data from Ethos U
172
- // We only handle one output at the moment
173
- const char * output_addr = handles.scratch_data + handles.output_offset [0 ];
174
- // Outputs are in the index immediately after inputs
175
- int output_index = handles.input_shapes .size ();
176
-
177
- if (handles.output_shapes .size () != 1 ) {
178
- ET_LOG (
179
- Error,
180
- " ArmBackend::execute: currently only support one return tensor" );
181
- return Error::InvalidProgram;
182
- }
183
- // Process results into EValue storage
184
- // TODO: optimise into direct write for compatible, contig layout
185
- int * output_address = (int *)output_addr;
186
- auto tensor_out = args[output_index]->toTensor ();
187
- for (int j = 0 ; j < tensor_out.numel (); j++) {
188
- // TODO: extend beyond tensors with 4 byte elements
189
- tensor_out.mutable_data_ptr <int >()[j] = output_address[j];
157
+ // Write outputs from scratch into EValue pointers
158
+ for (int i = 0 ; i < handles.outputs ->count ; i++) {
159
+ const char * output_addr =
160
+ handles.scratch_data + handles.outputs ->io [i].offset ;
161
+ // Process input EValue into scratch
162
+ int * output_address = (int *)output_addr;
163
+ // Outputs are in the index immediately after inputs
164
+ auto tensor_out = args[handles.inputs ->count + i]->toTensor ();
165
+ for (int j = 0 ; j < tensor_out.numel (); j++) {
166
+ tensor_out.mutable_data_ptr <int >()[j] = output_address[j];
167
+ }
190
168
}
191
169
192
170
return Error::Ok;
@@ -195,114 +173,6 @@ class ArmBackend final : public PyTorchBackendInterface {
195
173
void destroy (DelegateHandle* handle) const override {
196
174
return ;
197
175
}
198
-
199
- private:
200
- typedef struct {
201
- const char * cmd_data;
202
- size_t cmd_data_size;
203
- const char * weight_data;
204
- size_t weight_data_size;
205
- const char * scratch_data;
206
- size_t scratch_data_size;
207
- vector<size_t > input_offset;
208
- vector<vector<int >> input_shapes;
209
- vector<size_t > output_offset;
210
- vector<vector<int >> output_shapes;
211
- } VelaHandles;
212
-
213
- typedef struct {
214
- char name[16 ];
215
- uint32_t size;
216
- char _pad[12 ];
217
- char data[];
218
- } VelaBinBlock;
219
-
220
- typedef struct {
221
- int count;
222
- int shape[][4 ];
223
- } VelaShapes;
224
-
225
- typedef struct {
226
- int count;
227
- int offsets[];
228
- } VelaOffsets;
229
-
230
- static int next_mul_16 (int n) {
231
- return ((n - 1 ) | 15 ) + 1 ;
232
- }
233
-
234
- int vela_read (char * data, VelaHandles* handles, int size) const {
235
- constexpr const size_t header_size = 16 ;
236
-
237
- // Read header string
238
- if (strncmp (data, " vela_bin_stream" , 15 )) {
239
- return 0 ;
240
- }
241
- data += header_size;
242
-
243
- // Expect one or more 'VelaBinBlock's
244
- while (1 ) {
245
- VelaBinBlock* b = (VelaBinBlock*)data;
246
- data += sizeof (VelaBinBlock) + next_mul_16 (b->size );
247
-
248
- // Exit with success on finding end of stream
249
- if (!strncmp (b->name , " vela_end_stream" , strlen (" vela_end_stream" )))
250
- return 1 ;
251
-
252
- if (!strncmp (b->name , " cmd_data" , strlen (" cmd_data" ))) {
253
- // This magic header confirms a valid command stream in binary
254
- if (strncmp (b->data , " COP1" , strlen (" COP1" )))
255
- return 0 ;
256
- handles->cmd_data = b->data ;
257
- handles->cmd_data_size = b->size ;
258
- }
259
- if (!strncmp (b->name , " weight_data" , strlen (" weight_data" ))) {
260
- handles->weight_data = b->data ;
261
- handles->weight_data_size = b->size ;
262
- }
263
- if (!strncmp (b->name , " scratch_data" , strlen (" scratch_data" ))) {
264
- handles->scratch_data = b->data ;
265
- handles->scratch_data_size = b->size ;
266
- }
267
-
268
- // capture inputs and outputs
269
- if (!strncmp (b->name , " input_offset" , strlen (" input_offset" ))) {
270
- VelaOffsets* offsets = (VelaOffsets*)b->data ;
271
- for (int i = 0 ; i < offsets->count ; i++) {
272
- handles->input_offset .push_back (offsets->offsets [i]);
273
- }
274
- }
275
- if (!strncmp (b->name , " output_offset" , strlen (" output_offset" ))) {
276
- VelaOffsets* offsets = (VelaOffsets*)b->data ;
277
- for (int i = 0 ; i < offsets->count ; i++) {
278
- handles->output_offset .push_back (offsets->offsets [i]);
279
- }
280
- }
281
-
282
- if (!strncmp (b->name , " input_shape" , strlen (" input_shape" ))) {
283
- VelaShapes* shapes = (VelaShapes*)b->data ;
284
- for (int i = 0 ; i < shapes->count ; i++) {
285
- vector<int > s = {
286
- shapes->shape [i][0 ],
287
- shapes->shape [i][1 ],
288
- shapes->shape [i][2 ],
289
- shapes->shape [i][3 ]};
290
- handles->input_shapes .push_back (s);
291
- }
292
- }
293
- if (!strncmp (b->name , " output_shape" , strlen (" output_shape" ))) {
294
- VelaShapes* shapes = (VelaShapes*)b->data ;
295
- for (int i = 0 ; i < shapes->count ; i++) {
296
- vector<int > s = {
297
- shapes->shape [i][0 ],
298
- shapes->shape [i][1 ],
299
- shapes->shape [i][2 ],
300
- shapes->shape [i][3 ]};
301
- handles->output_shapes .push_back (s);
302
- }
303
- }
304
- }
305
- }
306
176
};
307
177
308
178
namespace {
0 commit comments