@@ -51,12 +51,26 @@ CommandListCoreFamilyImmediate<gfxCoreFamily>::CommandListCoreFamilyImmediate(ui
51
51
}
52
52
53
53
template <GFXCORE_FAMILY gfxCoreFamily>
54
- void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize) {
54
+ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize, bool requestCommandBufferInLocalMem ) {
55
55
this ->commandContainer .fillReusableAllocationLists ();
56
56
57
- /* Command container might has two command buffers. If it has, one is in local memory, because relaxed ordering requires that and one in system for copying it into ring buffer.
58
- If relaxed ordering is needed in given dispatch and current command stream is in system memory, swap of command streams is required to ensure local memory. Same in the opposite scenario. */
59
- if (hasRelaxedOrderingDependencies == NEO::MemoryPoolHelper::isSystemMemoryPool (this ->commandContainer .getCommandStream ()->getGraphicsAllocation ()->getMemoryPool ())) {
57
+ // Command container might have two command buffers - one in local mem (mainly for relaxed ordering and any other specific purposes) and one in system mem for copying into ring buffer.
58
+ // If relaxed ordering is needed in given dispatch or if we need to force Local mem usage, and current command stream is in system memory, swap of command streams is required to ensure local memory.
59
+ // If relaxed ordering is not needed and command buffer is in local mem, then also we need to swap.
60
+ bool swapStreams = false ;
61
+ if (hasRelaxedOrderingDependencies) {
62
+ if (NEO::MemoryPoolHelper::isSystemMemoryPool (this ->commandContainer .getCommandStream ()->getGraphicsAllocation ()->getMemoryPool ())) {
63
+ swapStreams = true ;
64
+ }
65
+ } else {
66
+ if (requestCommandBufferInLocalMem && NEO::MemoryPoolHelper::isSystemMemoryPool (this ->commandContainer .getCommandStream ()->getGraphicsAllocation ()->getMemoryPool ())) {
67
+ swapStreams = true ;
68
+ } else if (!requestCommandBufferInLocalMem && !NEO::MemoryPoolHelper::isSystemMemoryPool (this ->commandContainer .getCommandStream ()->getGraphicsAllocation ()->getMemoryPool ())) {
69
+ swapStreams = true ;
70
+ }
71
+ }
72
+
73
+ if (swapStreams) {
60
74
if (this ->commandContainer .swapStreams ()) {
61
75
this ->cmdListCurrentStartOffset = this ->commandContainer .getCommandStream ()->getUsed ();
62
76
}
@@ -514,7 +528,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
514
528
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
515
529
bool stallingCmdsForRelaxedOrdering = hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch);
516
530
517
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
531
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
518
532
bool hostWait = waitForEventsFromHost ();
519
533
if (hostWait) {
520
534
this ->synchronizeEventList (numWaitEvents, phWaitEvents);
@@ -571,7 +585,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
571
585
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
572
586
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
573
587
574
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
588
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
575
589
576
590
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect (kernelHandle, pDispatchArgumentsBuffer,
577
591
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
@@ -598,7 +612,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
598
612
isStallingOperation = hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch);
599
613
}
600
614
601
- checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize);
615
+ checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize, false );
602
616
603
617
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier (hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
604
618
@@ -623,7 +637,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
623
637
auto sizePerBlit = sizeof (typename GfxFamily::XY_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize ();
624
638
estimatedSize += nBlits * sizePerBlit;
625
639
}
626
- checkAvailableSpace (numWaitEvents, memoryCopyParams.relaxedOrderingDispatch , estimatedSize);
640
+ checkAvailableSpace (numWaitEvents, memoryCopyParams.relaxedOrderingDispatch , estimatedSize, false );
627
641
628
642
bool hasStallindCmds = hasStallingCmdsForRelaxedOrdering (numWaitEvents, memoryCopyParams.relaxedOrderingDispatch );
629
643
@@ -679,7 +693,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
679
693
auto sizePerBlit = sizeof (typename GfxFamily::XY_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize ();
680
694
estimatedSize += xBlits * yBlits * zBlits * sizePerBlit;
681
695
}
682
- checkAvailableSpace (numWaitEvents, memoryCopyParams.relaxedOrderingDispatch , estimatedSize);
696
+ checkAvailableSpace (numWaitEvents, memoryCopyParams.relaxedOrderingDispatch , estimatedSize, false );
683
697
684
698
bool hasStallindCmds = hasStallingCmdsForRelaxedOrdering (numWaitEvents, memoryCopyParams.relaxedOrderingDispatch );
685
699
@@ -722,7 +736,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
722
736
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
723
737
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
724
738
725
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
739
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
726
740
727
741
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill (ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
728
742
@@ -736,7 +750,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
736
750
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (0 , false );
737
751
bool hasStallingCmds = !Event::fromHandle (hSignalEvent)->isCounterBased () || hasStallingCmdsForRelaxedOrdering (0 , relaxedOrderingDispatch);
738
752
739
- checkAvailableSpace (0 , false , commonImmediateCommandSize);
753
+ checkAvailableSpace (0 , false , commonImmediateCommandSize, false );
740
754
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent (hSignalEvent, relaxedOrderingDispatch);
741
755
return flushImmediate (ret, true , hasStallingCmds, relaxedOrderingDispatch, false , false , hSignalEvent, false );
742
756
}
@@ -745,7 +759,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
745
759
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_event_handle_t hSignalEvent) {
746
760
ze_result_t ret = ZE_RESULT_SUCCESS;
747
761
748
- checkAvailableSpace (0 , false , commonImmediateCommandSize);
762
+ checkAvailableSpace (0 , false , commonImmediateCommandSize, false );
749
763
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset (hSignalEvent);
750
764
return flushImmediate (ret, true , true , false , false , false , hSignalEvent, false );
751
765
}
@@ -755,7 +769,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
755
769
NEO::GraphicsAllocation *srcAllocation,
756
770
size_t size, bool flushHost) {
757
771
758
- checkAvailableSpace (0 , false , commonImmediateCommandSize);
772
+ checkAvailableSpace (0 , false , commonImmediateCommandSize, false );
759
773
760
774
ze_result_t ret;
761
775
@@ -792,7 +806,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
792
806
}
793
807
794
808
if (!skipFlush) {
795
- checkAvailableSpace (numEvents, false , commonImmediateCommandSize);
809
+ checkAvailableSpace (numEvents, false , commonImmediateCommandSize, false );
796
810
}
797
811
798
812
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents (numEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, false , copyOffloadOperation);
@@ -810,7 +824,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
810
824
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
811
825
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
812
826
813
- checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize);
827
+ checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize, false );
814
828
815
829
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp (dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
816
830
@@ -854,7 +868,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
854
868
auto sizePerBlit = sizeof (typename GfxFamily::XY_BLOCK_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize ();
855
869
estimatedSize += nBlits * sizePerBlit;
856
870
}
857
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, estimatedSize);
871
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, estimatedSize, false );
858
872
859
873
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion (hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
860
874
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
@@ -872,7 +886,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
872
886
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
873
887
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
874
888
875
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
889
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
876
890
877
891
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory (hDstImage, srcPtr, pDstRegion, hSignalEvent,
878
892
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
@@ -890,7 +904,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
890
904
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
891
905
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
892
906
893
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
907
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
894
908
895
909
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory (dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
896
910
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
@@ -910,7 +924,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
910
924
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
911
925
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
912
926
913
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
927
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
914
928
915
929
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt (hDstImage, srcPtr, pDstRegion, srcRowPitch, srcSlicePitch,
916
930
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
@@ -930,7 +944,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
930
944
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
931
945
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (numWaitEvents, false );
932
946
933
- checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
947
+ checkAvailableSpace (numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false );
934
948
935
949
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt (dstPtr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch,
936
950
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
@@ -945,22 +959,22 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
945
959
ze_event_handle_t hSignalEvent,
946
960
uint32_t numWaitEvents,
947
961
ze_event_handle_t *phWaitEvents) {
948
- checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize);
962
+ checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize, false );
949
963
950
964
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier (numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
951
965
return flushImmediate (ret, true , true , false , false , false , hSignalEvent, false );
952
966
}
953
967
954
968
template <GFXCORE_FAMILY gfxCoreFamily>
955
969
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnMemory(void *desc, void *ptr, uint64_t data, ze_event_handle_t signalEventHandle, bool useQwordData) {
956
- checkAvailableSpace (0 , false , commonImmediateCommandSize);
970
+ checkAvailableSpace (0 , false , commonImmediateCommandSize, false );
957
971
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory (desc, ptr, data, signalEventHandle, useQwordData);
958
972
return flushImmediate (ret, true , false , false , false , false , signalEventHandle, false );
959
973
}
960
974
961
975
template <GFXCORE_FAMILY gfxCoreFamily>
962
976
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteToMemory(void *desc, void *ptr, uint64_t data) {
963
- checkAvailableSpace (0 , false , commonImmediateCommandSize);
977
+ checkAvailableSpace (0 , false , commonImmediateCommandSize, false );
964
978
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory (desc, ptr, data);
965
979
return flushImmediate (ret, true , false , false , false , false , nullptr , false );
966
980
}
@@ -1559,7 +1573,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
1559
1573
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
1560
1574
1561
1575
auto ret = ZE_RESULT_SUCCESS;
1562
- checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize);
1576
+
1577
+ // For API functionality, we require command buffer alloc in local mem for.
1578
+ // So ensure we force it when checking available space and when allocating any new comand buffer allocs
1579
+ checkAvailableSpace (numWaitEvents, false , commonImmediateCommandSize, true );
1563
1580
if (numWaitEvents) {
1564
1581
ret = this ->appendWaitOnEvents (numWaitEvents, phWaitEvents, nullptr , false , true , true , true , true , false );
1565
1582
}
@@ -1584,7 +1601,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
1584
1601
}
1585
1602
1586
1603
bool hasStallingCmds = true ;
1587
- return flushImmediate (ret, true , hasStallingCmds, relaxedOrderingDispatch, true , false , hSignalEvent, true );
1604
+ ret = flushImmediate (ret, true , hasStallingCmds, relaxedOrderingDispatch, true , false , hSignalEvent, true );
1605
+
1606
+ return ret;
1588
1607
}
1589
1608
1590
1609
} // namespace L0
0 commit comments