@@ -244,37 +244,33 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
244
244
return NewMem;
245
245
}
246
246
247
- struct term_positions {
248
- int x_term ;
249
- int y_term ;
250
- int z_term ;
247
+ struct TermPositions {
248
+ int XTerm ;
249
+ int YTerm ;
250
+ int ZTerm ;
251
251
};
252
- void prepTermPositions (term_positions &pos, int Dimensions,
252
+ void prepTermPositions (TermPositions &pos, int Dimensions,
253
253
detail::SYCLMemObjI::MemObjType type) {
254
254
// For buffers, the offsets/ranges coming from accessor are always
255
255
// id<3>/range<3> But their organization varies by dimension:
256
256
// 1 ==> {width, 1, 1}
257
257
// 2 ==> {height, width, 1}
258
258
// 3 ==> {depth, height, width}
259
- // Some callers enqueue 0 as DimDst/DimSrc.
259
+ // Some callers schedule 0 as DimDst/DimSrc.
260
260
261
261
if (type == detail::SYCLMemObjI::MemObjType::BUFFER) {
262
262
if (Dimensions == 3 ) {
263
- pos.x_term = 2 , pos.y_term = 1 , pos.z_term = 0 ;
263
+ pos.XTerm = 2 , pos.YTerm = 1 , pos.ZTerm = 0 ;
264
264
} else if (Dimensions == 2 ) {
265
- pos.x_term = 1 ;
266
- pos.y_term = 0 ;
267
- pos.z_term = 2 ;
265
+ pos.XTerm = 1 , pos.YTerm = 0 , pos.ZTerm = 2 ;
268
266
} else { // Dimension is 1 or 0
269
- pos.x_term = 0 ;
270
- pos.y_term = 1 ;
271
- pos.z_term = 2 ;
267
+ pos.XTerm = 0 , pos.YTerm = 1 , pos.ZTerm = 2 ;
272
268
}
273
269
} else { // While range<>/id<> use by images is different than buffers, it's
274
270
// consistent with their accessors.
275
- pos.x_term = 0 ;
276
- pos.y_term = 1 ;
277
- pos.z_term = 2 ;
271
+ pos.XTerm = 0 ;
272
+ pos.YTerm = 1 ;
273
+ pos.ZTerm = 2 ;
278
274
}
279
275
}
280
276
@@ -292,41 +288,37 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
292
288
const detail::plugin &Plugin = TgtQueue->getPlugin ();
293
289
294
290
detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType ();
295
- term_positions SrcPos, DstPos;
291
+ TermPositions SrcPos, DstPos;
296
292
prepTermPositions (SrcPos, DimSrc, MemType);
297
293
prepTermPositions (DstPos, DimDst, MemType);
298
294
299
- // If the Dimension is 1, and being called by ~SYCLMemObjT,
300
- // thenDstAccessRange[0] and DstSize[0] will already sized to bytes with
301
- // DstElemSize of 1.
302
- size_t DstXOffBytes = DstOffset[DstPos.x_term ] * DstElemSize;
303
- size_t SrcXOffBytes = SrcOffset[SrcPos.x_term ] * SrcElemSize;
304
- size_t DstARWidthBytes = DstAccessRange[DstPos.x_term ] * DstElemSize;
305
- // size_t SrcARWidthBytes = SrcAccessRange[SrcPos.x_term] * SrcElemSize;
306
- size_t DstSzWidthBytes = DstSize[DstPos.x_term ] * DstElemSize;
307
- size_t SrcSzWidthBytes = SrcSize[SrcPos.x_term ] * SrcElemSize;
295
+ size_t DstXOffBytes = DstOffset[DstPos.XTerm ] * DstElemSize;
296
+ size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm ] * SrcElemSize;
297
+ size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm ] * DstElemSize;
298
+ size_t DstSzWidthBytes = DstSize[DstPos.XTerm ] * DstElemSize;
299
+ size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm ] * SrcElemSize;
308
300
309
301
if (MemType == detail::SYCLMemObjI::MemObjType::BUFFER) {
310
302
if (1 == DimDst && 1 == DimSrc) {
311
303
Plugin.call <PiApiKind::piEnqueueMemBufferWrite>(
312
304
Queue, DstMem,
313
- /* blocking_write=*/ CL_FALSE, DstXOffBytes, DstARWidthBytes ,
305
+ /* blocking_write=*/ CL_FALSE, DstXOffBytes, DstAccessRangeWidthBytes ,
314
306
SrcMem + SrcXOffBytes, DepEvents.size (), DepEvents.data (), &OutEvent);
315
307
} else {
316
308
size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
317
309
size_t BufferSlicePitch =
318
- (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.y_term ] : 0 ;
310
+ (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm ] : 0 ;
319
311
size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
320
312
size_t HostSlicePitch =
321
- (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.y_term ] : 0 ;
313
+ (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm ] : 0 ;
322
314
323
315
pi_buff_rect_offset_struct BufferOffset{
324
- DstXOffBytes, DstOffset[DstPos.y_term ], DstOffset[DstPos.z_term ]};
316
+ DstXOffBytes, DstOffset[DstPos.YTerm ], DstOffset[DstPos.ZTerm ]};
325
317
pi_buff_rect_offset_struct HostOffset{
326
- SrcXOffBytes, SrcOffset[SrcPos.y_term ], SrcOffset[SrcPos.z_term ]};
327
- pi_buff_rect_region_struct RectRegion{DstARWidthBytes ,
328
- DstAccessRange[DstPos.y_term ],
329
- DstAccessRange[DstPos.z_term ]};
318
+ SrcXOffBytes, SrcOffset[SrcPos.YTerm ], SrcOffset[SrcPos.ZTerm ]};
319
+ pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes ,
320
+ DstAccessRange[DstPos.YTerm ],
321
+ DstAccessRange[DstPos.ZTerm ]};
330
322
331
323
Plugin.call <PiApiKind::piEnqueueMemBufferWriteRect>(
332
324
Queue, DstMem,
@@ -337,14 +329,14 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
337
329
} else {
338
330
size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
339
331
size_t InputSlicePitch =
340
- (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.y_term ] : 0 ;
332
+ (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm ] : 0 ;
341
333
342
- pi_image_offset_struct Origin{DstOffset[DstPos.x_term ],
343
- DstOffset[DstPos.y_term ],
344
- DstOffset[DstPos.z_term ]};
345
- pi_image_region_struct Region{DstAccessRange[DstPos.x_term ],
346
- DstAccessRange[DstPos.y_term ],
347
- DstAccessRange[DstPos.z_term ]};
334
+ pi_image_offset_struct Origin{DstOffset[DstPos.XTerm ],
335
+ DstOffset[DstPos.YTerm ],
336
+ DstOffset[DstPos.ZTerm ]};
337
+ pi_image_region_struct Region{DstAccessRange[DstPos.XTerm ],
338
+ DstAccessRange[DstPos.YTerm ],
339
+ DstAccessRange[DstPos.ZTerm ]};
348
340
349
341
Plugin.call <PiApiKind::piEnqueueMemImageWrite>(
350
342
Queue, DstMem,
@@ -367,41 +359,43 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
367
359
const detail::plugin &Plugin = SrcQueue->getPlugin ();
368
360
369
361
detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType ();
370
- term_positions SrcPos, DstPos;
362
+ TermPositions SrcPos, DstPos;
371
363
prepTermPositions (SrcPos, DimSrc, MemType);
372
364
prepTermPositions (DstPos, DimDst, MemType);
373
365
374
- // If the Dimension is 1, and being called by ~SYCLMemObjT,
375
- // thenDstAccessRange[0] and DstSize[0] will already sized to bytes with
376
- // DstElemSize of 1.
377
- size_t DstXOffBytes = DstOffset[DstPos.x_term ] * DstElemSize;
378
- size_t SrcXOffBytes = SrcOffset[SrcPos.x_term ] * SrcElemSize;
379
- // size_t DstARWidthBytes = DstAccessRange[DstPos.x_term] * DstElemSize;
380
- size_t SrcARWidthBytes = SrcAccessRange[SrcPos.x_term ] * SrcElemSize;
381
- size_t DstSzWidthBytes = DstSize[DstPos.x_term ] * DstElemSize;
382
- size_t SrcSzWidthBytes = SrcSize[SrcPos.x_term ] * SrcElemSize;
366
+ // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
367
+ // copyD2D) will usually have the same values for AccessRange, Size,
368
+ // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
369
+ // Essentially, it schedules a copyBack of chars thus in copyD2H the
370
+ // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
371
+ // sized to bytes with a DstElemSize of 1.
372
+ size_t DstXOffBytes = DstOffset[DstPos.XTerm ] * DstElemSize;
373
+ size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm ] * SrcElemSize;
374
+ size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm ] * SrcElemSize;
375
+ size_t DstSzWidthBytes = DstSize[DstPos.XTerm ] * DstElemSize;
376
+ size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm ] * SrcElemSize;
383
377
384
378
if (MemType == detail::SYCLMemObjI::MemObjType::BUFFER) {
385
379
if (1 == DimDst && 1 == DimSrc) {
386
380
Plugin.call <PiApiKind::piEnqueueMemBufferRead>(
387
381
Queue, SrcMem,
388
- /* blocking_read=*/ CL_FALSE, SrcXOffBytes, SrcARWidthBytes ,
382
+ /* blocking_read=*/ CL_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes ,
389
383
DstMem + DstXOffBytes, DepEvents.size (), DepEvents.data (), &OutEvent);
390
384
} else {
391
385
size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
392
386
size_t BufferSlicePitch =
393
- (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.y_term ] : 0 ;
387
+ (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm ] : 0 ;
394
388
size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
395
389
size_t HostSlicePitch =
396
- (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.y_term ] : 0 ;
390
+ (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm ] : 0 ;
397
391
398
392
pi_buff_rect_offset_struct BufferOffset{
399
- SrcXOffBytes, SrcOffset[SrcPos.y_term ], SrcOffset[SrcPos.z_term ]};
393
+ SrcXOffBytes, SrcOffset[SrcPos.YTerm ], SrcOffset[SrcPos.ZTerm ]};
400
394
pi_buff_rect_offset_struct HostOffset{
401
- DstXOffBytes, DstOffset[DstPos.y_term ], DstOffset[DstPos.z_term ]};
402
- pi_buff_rect_region_struct RectRegion{SrcARWidthBytes ,
403
- SrcAccessRange[SrcPos.y_term ],
404
- SrcAccessRange[SrcPos.z_term ]};
395
+ DstXOffBytes, DstOffset[DstPos.YTerm ], DstOffset[DstPos.ZTerm ]};
396
+ pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes ,
397
+ SrcAccessRange[SrcPos.YTerm ],
398
+ SrcAccessRange[SrcPos.ZTerm ]};
405
399
406
400
Plugin.call <PiApiKind::piEnqueueMemBufferReadRect>(
407
401
Queue, SrcMem,
@@ -412,14 +406,14 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
412
406
} else {
413
407
size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
414
408
size_t SlicePitch =
415
- (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.y_term ] : 0 ;
409
+ (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm ] : 0 ;
416
410
417
- pi_image_offset_struct Offset{SrcOffset[SrcPos.x_term ],
418
- SrcOffset[SrcPos.y_term ],
419
- SrcOffset[SrcPos.z_term ]};
420
- pi_image_region_struct Region{SrcAccessRange[SrcPos.x_term ],
421
- SrcAccessRange[SrcPos.y_term ],
422
- SrcAccessRange[SrcPos.z_term ]};
411
+ pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm ],
412
+ SrcOffset[SrcPos.YTerm ],
413
+ SrcOffset[SrcPos.ZTerm ]};
414
+ pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm ],
415
+ SrcAccessRange[SrcPos.YTerm ],
416
+ SrcAccessRange[SrcPos.ZTerm ]};
423
417
424
418
Plugin.call <PiApiKind::piEnqueueMemImageRead>(
425
419
Queue, SrcMem, CL_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
@@ -440,61 +434,58 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
440
434
const detail::plugin &Plugin = SrcQueue->getPlugin ();
441
435
442
436
detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType ();
443
- term_positions SrcPos, DstPos;
437
+ TermPositions SrcPos, DstPos;
444
438
prepTermPositions (SrcPos, DimSrc, MemType);
445
439
prepTermPositions (DstPos, DimDst, MemType);
446
440
447
- // If the Dimension is 1, and being called by ~SYCLMemObjT,
448
- // thenDstAccessRange[0] and DstSize[0] will already sized to bytes with
449
- // DstElemSize of 1.
450
- size_t DstXOffBytes = DstOffset[DstPos.x_term ] * DstElemSize;
451
- size_t SrcXOffBytes = SrcOffset[SrcPos.x_term ] * SrcElemSize;
452
- // size_t DstARWidthBytes = DstAccessRange[DstPos.x_term] * DstElemSize;
453
- size_t SrcARWidthBytes = SrcAccessRange[SrcPos.x_term ] * SrcElemSize;
454
- size_t DstSzWidthBytes = DstSize[DstPos.x_term ] * DstElemSize;
455
- size_t SrcSzWidthBytes = SrcSize[SrcPos.x_term ] * SrcElemSize;
441
+ size_t DstXOffBytes = DstOffset[DstPos.XTerm ] * DstElemSize;
442
+ size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm ] * SrcElemSize;
443
+ size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm ] * SrcElemSize;
444
+ size_t DstSzWidthBytes = DstSize[DstPos.XTerm ] * DstElemSize;
445
+ size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm ] * SrcElemSize;
456
446
457
447
if (MemType == detail::SYCLMemObjI::MemObjType::BUFFER) {
458
448
if (1 == DimDst && 1 == DimSrc) {
459
449
Plugin.call <PiApiKind::piEnqueueMemBufferCopy>(
460
- Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes, SrcARWidthBytes,
461
- DepEvents.size (), DepEvents.data (), &OutEvent);
450
+ Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
451
+ SrcAccessRangeWidthBytes, DepEvents.size (), DepEvents.data (),
452
+ &OutEvent);
462
453
} else {
463
454
// passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
464
455
// calculate both src and dest pitch using region[0], which is not correct
465
456
// if src and dest are not the same size.
466
457
size_t SrcRowPitch = SrcSzWidthBytes;
467
458
size_t SrcSlicePitch = (DimSrc <= 1 )
468
459
? SrcSzWidthBytes
469
- : SrcSzWidthBytes * SrcSize[SrcPos.y_term ];
460
+ : SrcSzWidthBytes * SrcSize[SrcPos.YTerm ];
470
461
size_t DstRowPitch = DstSzWidthBytes;
471
462
size_t DstSlicePitch = (DimDst <= 1 )
472
463
? DstSzWidthBytes
473
- : DstSzWidthBytes * DstSize[DstPos.y_term ];
464
+ : DstSzWidthBytes * DstSize[DstPos.YTerm ];
474
465
475
466
pi_buff_rect_offset_struct SrcOrigin{
476
- SrcXOffBytes, SrcOffset[SrcPos.y_term ], SrcOffset[SrcPos.z_term ]};
467
+ SrcXOffBytes, SrcOffset[SrcPos.YTerm ], SrcOffset[SrcPos.ZTerm ]};
477
468
pi_buff_rect_offset_struct DstOrigin{
478
- DstXOffBytes, DstOffset[DstPos.y_term ], DstOffset[DstPos.z_term ]};
479
- pi_buff_rect_region_struct Region{SrcARWidthBytes ,
480
- SrcAccessRange[SrcPos.y_term ],
481
- SrcAccessRange[SrcPos.z_term ]};
469
+ DstXOffBytes, DstOffset[DstPos.YTerm ], DstOffset[DstPos.ZTerm ]};
470
+ pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes ,
471
+ SrcAccessRange[SrcPos.YTerm ],
472
+ SrcAccessRange[SrcPos.ZTerm ]};
482
473
483
474
Plugin.call <PiApiKind::piEnqueueMemBufferCopyRect>(
484
475
Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
485
476
SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size (),
486
477
DepEvents.data (), &OutEvent);
487
478
}
488
479
} else {
489
- pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.x_term ],
490
- SrcOffset[SrcPos.y_term ],
491
- SrcOffset[SrcPos.z_term ]};
492
- pi_image_offset_struct DstOrigin{DstOffset[DstPos.x_term ],
493
- DstOffset[DstPos.y_term ],
494
- DstOffset[DstPos.z_term ]};
495
- pi_image_region_struct Region{SrcAccessRange[SrcPos.x_term ],
496
- SrcAccessRange[SrcPos.y_term ],
497
- SrcAccessRange[SrcPos.z_term ]};
480
+ pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm ],
481
+ SrcOffset[SrcPos.YTerm ],
482
+ SrcOffset[SrcPos.ZTerm ]};
483
+ pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm ],
484
+ DstOffset[DstPos.YTerm ],
485
+ DstOffset[DstPos.ZTerm ]};
486
+ pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm ],
487
+ SrcAccessRange[SrcPos.YTerm ],
488
+ SrcAccessRange[SrcPos.ZTerm ]};
498
489
499
490
Plugin.call <PiApiKind::piEnqueueMemImageCopy>(
500
491
Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
0 commit comments