@@ -676,162 +676,155 @@ ur_result_t AsanInterceptor::prepareLaunch(
676
676
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
677
677
ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {
678
678
679
- do {
680
- auto KernelInfo = getKernelInfo (Kernel);
681
- assert (KernelInfo && " Kernel should be instrumented" );
682
-
683
- // Validate pointer arguments
684
- if (getOptions ().DetectKernelArguments ) {
685
- for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs ) {
686
- auto Ptr = PtrPair.first ;
687
- if (Ptr == nullptr ) {
688
- continue ;
689
- }
690
- if (auto ValidateResult = ValidateUSMPointer (
691
- ContextInfo->Handle , DeviceInfo->Handle , (uptr)Ptr)) {
692
- ReportInvalidKernelArgument (Kernel, ArgIndex, (uptr)Ptr,
693
- ValidateResult, PtrPair.second );
694
- exitWithErrors ();
695
- }
679
+ auto KernelInfo = getKernelInfo (Kernel);
680
+ assert (KernelInfo && " Kernel should be instrumented" );
681
+
682
+ // Validate pointer arguments
683
+ if (getOptions ().DetectKernelArguments ) {
684
+ for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs ) {
685
+ auto Ptr = PtrPair.first ;
686
+ if (Ptr == nullptr ) {
687
+ continue ;
688
+ }
689
+ if (auto ValidateResult = ValidateUSMPointer (
690
+ ContextInfo->Handle , DeviceInfo->Handle , (uptr)Ptr)) {
691
+ ReportInvalidKernelArgument (Kernel, ArgIndex, (uptr)Ptr,
692
+ ValidateResult, PtrPair.second );
693
+ exitWithErrors ();
696
694
}
697
695
}
696
+ }
698
697
699
- // Set membuffer arguments
700
- for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs ) {
701
- char *ArgPointer = nullptr ;
702
- UR_CALL (MemBuffer->getHandle (DeviceInfo->Handle , ArgPointer));
703
- ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
704
- Kernel, ArgIndex, nullptr , ArgPointer);
705
- if (URes != UR_RESULT_SUCCESS) {
706
- getContext ()->logger .error (
707
- " Failed to set buffer {} as the {} arg to kernel {}: {}" ,
708
- ur_cast<ur_mem_handle_t >(MemBuffer.get ()), ArgIndex, Kernel,
709
- URes);
710
- }
698
+ // Set membuffer arguments
699
+ for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs ) {
700
+ char *ArgPointer = nullptr ;
701
+ UR_CALL (MemBuffer->getHandle (DeviceInfo->Handle , ArgPointer));
702
+ ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
703
+ Kernel, ArgIndex, nullptr , ArgPointer);
704
+ if (URes != UR_RESULT_SUCCESS) {
705
+ getContext ()->logger .error (
706
+ " Failed to set buffer {} as the {} arg to kernel {}: {}" ,
707
+ ur_cast<ur_mem_handle_t >(MemBuffer.get ()), ArgIndex, Kernel,
708
+ URes);
711
709
}
710
+ }
712
711
713
- auto ArgNums = GetKernelNumArgs (Kernel);
714
- // We must prepare all kernel args before call
715
- // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
716
- // CPU device.
717
- if (ArgNums) {
718
- ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
719
- Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data .getDevicePtr ());
720
- if (URes != UR_RESULT_SUCCESS) {
721
- getContext ()->logger .error (" Failed to set launch info: {}" ,
722
- URes);
723
- return URes;
724
- }
712
+ auto ArgNums = GetKernelNumArgs (Kernel);
713
+ // We must prepare all kernel args before call
714
+ // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
715
+ // CPU device.
716
+ if (ArgNums) {
717
+ ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
718
+ Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data .getDevicePtr ());
719
+ if (URes != UR_RESULT_SUCCESS) {
720
+ getContext ()->logger .error (" Failed to set launch info: {}" , URes);
721
+ return URes;
725
722
}
723
+ }
726
724
727
- if (LaunchInfo.LocalWorkSize .empty ()) {
728
- LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
729
- auto URes =
730
- getContext ()->urDdiTable .Kernel .pfnGetSuggestedLocalWorkSize (
731
- Kernel, Queue, LaunchInfo.WorkDim ,
732
- LaunchInfo.GlobalWorkOffset , LaunchInfo.GlobalWorkSize ,
733
- LaunchInfo.LocalWorkSize .data ());
734
- if (URes != UR_RESULT_SUCCESS) {
735
- if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
736
- return URes;
737
- }
738
- // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
739
- // to inefficient implementation
740
- for (size_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
741
- LaunchInfo.LocalWorkSize [Dim] = 1 ;
742
- }
725
+ if (LaunchInfo.LocalWorkSize .empty ()) {
726
+ LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
727
+ auto URes =
728
+ getContext ()->urDdiTable .Kernel .pfnGetSuggestedLocalWorkSize (
729
+ Kernel, Queue, LaunchInfo.WorkDim , LaunchInfo.GlobalWorkOffset ,
730
+ LaunchInfo.GlobalWorkSize , LaunchInfo.LocalWorkSize .data ());
731
+ if (URes != UR_RESULT_SUCCESS) {
732
+ if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
733
+ return URes;
734
+ }
735
+ // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
736
+ // to inefficient implementation
737
+ for (size_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
738
+ LaunchInfo.LocalWorkSize [Dim] = 1 ;
743
739
}
744
740
}
741
+ }
745
742
746
- const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize .data ();
747
- uint32_t NumWG = 1 ;
748
- for (uint32_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
749
- NumWG *= (LaunchInfo.GlobalWorkSize [Dim] + LocalWorkSize[Dim] - 1 ) /
750
- LocalWorkSize[Dim];
751
- }
743
+ const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize .data ();
744
+ uint32_t NumWG = 1 ;
745
+ for (uint32_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
746
+ NumWG *= (LaunchInfo.GlobalWorkSize [Dim] + LocalWorkSize[Dim] - 1 ) /
747
+ LocalWorkSize[Dim];
748
+ }
752
749
753
- // Prepare asan runtime data
754
- LaunchInfo.Data .Host .GlobalShadowOffset =
755
- DeviceInfo->Shadow ->ShadowBegin ;
756
- LaunchInfo.Data .Host .GlobalShadowOffsetEnd =
757
- DeviceInfo->Shadow ->ShadowEnd ;
758
- LaunchInfo.Data .Host .DeviceTy = DeviceInfo->Type ;
759
- LaunchInfo.Data .Host .Debug = getOptions ().Debug ? 1 : 0 ;
760
-
761
- auto LocalMemoryUsage =
762
- GetKernelLocalMemorySize (Kernel, DeviceInfo->Handle );
763
- auto PrivateMemoryUsage =
764
- GetKernelPrivateMemorySize (Kernel, DeviceInfo->Handle );
765
-
766
- getContext ()->logger .info (
767
- " KernelInfo {} (LocalMemory={}, PrivateMemory={})" , (void *)Kernel,
768
- LocalMemoryUsage, PrivateMemoryUsage);
769
-
770
- // Write shadow memory offset for local memory
771
- if (getOptions ().DetectLocals ) {
772
- if (DeviceInfo->Shadow ->AllocLocalShadow (
773
- Queue, NumWG, LaunchInfo.Data .Host .LocalShadowOffset ,
774
- LaunchInfo.Data .Host .LocalShadowOffsetEnd ) !=
775
- UR_RESULT_SUCCESS) {
776
- getContext ()->logger .warning (
777
- " Failed to allocate shadow memory for local "
778
- " memory, maybe the number of workgroup ({}) is too "
779
- " large" ,
780
- NumWG);
781
- getContext ()->logger .warning (
782
- " Skip checking local memory of kernel <{}>" ,
783
- GetKernelName (Kernel));
784
- } else {
785
- getContext ()->logger .info (
786
- " ShadowMemory(Local, WorkGroup{}, {} - {})" , NumWG,
787
- (void *)LaunchInfo.Data .Host .LocalShadowOffset ,
788
- (void *)LaunchInfo.Data .Host .LocalShadowOffsetEnd );
789
- }
750
+ // Prepare asan runtime data
751
+ LaunchInfo.Data .Host .GlobalShadowOffset = DeviceInfo->Shadow ->ShadowBegin ;
752
+ LaunchInfo.Data .Host .GlobalShadowOffsetEnd = DeviceInfo->Shadow ->ShadowEnd ;
753
+ LaunchInfo.Data .Host .DeviceTy = DeviceInfo->Type ;
754
+ LaunchInfo.Data .Host .Debug = getOptions ().Debug ? 1 : 0 ;
755
+
756
+ auto LocalMemoryUsage =
757
+ GetKernelLocalMemorySize (Kernel, DeviceInfo->Handle );
758
+ auto PrivateMemoryUsage =
759
+ GetKernelPrivateMemorySize (Kernel, DeviceInfo->Handle );
760
+
761
+ getContext ()->logger .info (
762
+ " KernelInfo {} (LocalMemory={}, PrivateMemory={})" , (void *)Kernel,
763
+ LocalMemoryUsage, PrivateMemoryUsage);
764
+
765
+ // Write shadow memory offset for local memory
766
+ if (getOptions ().DetectLocals ) {
767
+ if (DeviceInfo->Shadow ->AllocLocalShadow (
768
+ Queue, NumWG, LaunchInfo.Data .Host .LocalShadowOffset ,
769
+ LaunchInfo.Data .Host .LocalShadowOffsetEnd ) !=
770
+ UR_RESULT_SUCCESS) {
771
+ getContext ()->logger .warning (
772
+ " Failed to allocate shadow memory for local "
773
+ " memory, maybe the number of workgroup ({}) is too "
774
+ " large" ,
775
+ NumWG);
776
+ getContext ()->logger .warning (
777
+ " Skip checking local memory of kernel <{}>" ,
778
+ GetKernelName (Kernel));
779
+ } else {
780
+ getContext ()->logger .info (
781
+ " ShadowMemory(Local, WorkGroup{}, {} - {})" , NumWG,
782
+ (void *)LaunchInfo.Data .Host .LocalShadowOffset ,
783
+ (void *)LaunchInfo.Data .Host .LocalShadowOffsetEnd );
790
784
}
785
+ }
791
786
792
- // Write shadow memory offset for private memory
793
- if (getOptions ().DetectPrivates ) {
794
- if (DeviceInfo->Shadow ->AllocPrivateShadow (
795
- Queue, NumWG, LaunchInfo.Data .Host .PrivateShadowOffset ,
796
- LaunchInfo.Data .Host .PrivateShadowOffsetEnd ) !=
797
- UR_RESULT_SUCCESS) {
798
- getContext ()->logger .warning (
799
- " Failed to allocate shadow memory for private "
800
- " memory, maybe the number of workgroup ({}) is too "
801
- " large" ,
802
- NumWG);
803
- getContext ()->logger .warning (
804
- " Skip checking private memory of kernel <{}>" ,
805
- GetKernelName (Kernel));
806
- } else {
807
- getContext ()->logger .info (
808
- " ShadowMemory(Private, WorkGroup{}, {} - {})" , NumWG,
809
- (void *)LaunchInfo.Data .Host .PrivateShadowOffset ,
810
- (void *)LaunchInfo.Data .Host .PrivateShadowOffsetEnd );
811
- }
787
+ // Write shadow memory offset for private memory
788
+ if (getOptions ().DetectPrivates ) {
789
+ if (DeviceInfo->Shadow ->AllocPrivateShadow (
790
+ Queue, NumWG, LaunchInfo.Data .Host .PrivateShadowOffset ,
791
+ LaunchInfo.Data .Host .PrivateShadowOffsetEnd ) !=
792
+ UR_RESULT_SUCCESS) {
793
+ getContext ()->logger .warning (
794
+ " Failed to allocate shadow memory for private "
795
+ " memory, maybe the number of workgroup ({}) is too "
796
+ " large" ,
797
+ NumWG);
798
+ getContext ()->logger .warning (
799
+ " Skip checking private memory of kernel <{}>" ,
800
+ GetKernelName (Kernel));
801
+ } else {
802
+ getContext ()->logger .info (
803
+ " ShadowMemory(Private, WorkGroup{}, {} - {})" , NumWG,
804
+ (void *)LaunchInfo.Data .Host .PrivateShadowOffset ,
805
+ (void *)LaunchInfo.Data .Host .PrivateShadowOffsetEnd );
812
806
}
807
+ }
813
808
814
- // Write local arguments info
815
- if (!KernelInfo->LocalArgs .empty ()) {
816
- std::vector<LocalArgsInfo> LocalArgsInfo;
817
- for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs ) {
818
- LocalArgsInfo.push_back (ArgInfo);
819
- getContext ()->logger .debug (
820
- " local_args (argIndex={}, size={}, sizeWithRZ={})" ,
821
- ArgIndex, ArgInfo.Size , ArgInfo.SizeWithRedZone );
822
- }
823
- UR_CALL (LaunchInfo.Data .importLocalArgsInfo (Queue, LocalArgsInfo));
809
+ // Write local arguments info
810
+ if (!KernelInfo->LocalArgs .empty ()) {
811
+ std::vector<LocalArgsInfo> LocalArgsInfo;
812
+ for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs ) {
813
+ LocalArgsInfo.push_back (ArgInfo);
814
+ getContext ()->logger .debug (
815
+ " local_args (argIndex={}, size={}, sizeWithRZ={})" , ArgIndex,
816
+ ArgInfo.Size , ArgInfo.SizeWithRedZone );
824
817
}
818
+ UR_CALL (LaunchInfo.Data .importLocalArgsInfo (Queue, LocalArgsInfo));
819
+ }
825
820
826
- // sync asan runtime data to device side
827
- UR_CALL (LaunchInfo.Data .syncToDevice (Queue));
821
+ // sync asan runtime data to device side
822
+ UR_CALL (LaunchInfo.Data .syncToDevice (Queue));
828
823
829
- getContext ()->logger .debug (
830
- " launch_info {} (numLocalArgs={}, localArgs={})" ,
831
- (void *)LaunchInfo.Data .getDevicePtr (),
832
- LaunchInfo.Data .Host .NumLocalArgs ,
833
- (void *)LaunchInfo.Data .Host .LocalArgs );
834
- } while (false );
824
+ getContext ()->logger .debug (" launch_info {} (numLocalArgs={}, localArgs={})" ,
825
+ (void *)LaunchInfo.Data .getDevicePtr (),
826
+ LaunchInfo.Data .Host .NumLocalArgs ,
827
+ (void *)LaunchInfo.Data .Host .LocalArgs );
835
828
836
829
return UR_RESULT_SUCCESS;
837
830
}
0 commit comments