@@ -2504,7 +2504,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2504
2504
OMPX_ApuMaps (" OMPX_APU_MAPS" , false ),
2505
2505
OMPX_DisableUsmMaps (" OMPX_DISABLE_USM_MAPS" , false ),
2506
2506
OMPX_NoMapChecks (" OMPX_DISABLE_MAPS" , true ),
2507
- OMPX_EagerApuMaps ( " OMPX_EAGER_ZERO_COPY_MAPS " , false ),
2507
+ OMPX_StrictSanityChecks ( " OMPX_STRICT_SANITY_CHECKS " , false ),
2508
2508
AMDGPUStreamManager (*this , Agent), AMDGPUEventManager(*this ),
2509
2509
AMDGPUSignalManager (*this ), Agent(Agent), HostDevice(HostDevice) {}
2510
2510
@@ -2942,75 +2942,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2942
2942
return Plugin::success ();
2943
2943
}
2944
2944
2945
- // TODO: clean up the following three functions after removing auto_zero_copy
2946
- // support and document appropriately.
2947
- void checkAndAdjustUsmModeForTargetImage (const __tgt_device_image *TgtImage) {
2948
- assert ((TgtImage != nullptr ) && " TgtImage is nullptr" );
2949
- assert (!(Plugin::get ().getRequiresFlags () & OMP_REQ_UNDEFINED) &&
2950
- " Requires flags are not set." );
2951
-
2952
- if (!(IsAPU || hasDGpuWithUsmSupportImpl ()))
2953
- return ;
2954
-
2955
- bool IsXnackRequired =
2956
- Plugin::get ().getRequiresFlags () & OMP_REQ_UNIFIED_SHARED_MEMORY;
2957
- utils::XnackBuildMode BinaryXnackMode =
2958
- utils::extractXnackModeFromBinary (TgtImage);
2959
-
2960
- if (IsXnackRequired) {
2961
- handleImageRequiresUsmMode (BinaryXnackMode);
2962
- } else {
2963
- handleDefaultMode (BinaryXnackMode);
2964
- }
2965
- }
2966
-
2967
- void handleImageRequiresUsmMode (utils::XnackBuildMode XnackImageMode) {
2968
- bool IsXnackActiveOnSystem = IsXnackEnabled;
2969
-
2970
- if ((XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4) ||
2971
- (XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4 &&
2972
- IsXnackActiveOnSystem) ||
2973
- (XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4 &&
2974
- !IsXnackActiveOnSystem)) {
2975
- if (OMPX_EagerApuMaps.get () && IsAPU)
2976
- PrepopulateGPUPageTable = true ; // Pre-faulting
2977
- }
2978
-
2979
- if (!IsXnackActiveOnSystem &&
2980
- (XnackImageMode != ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4)) {
2981
- FAILURE_MESSAGE (
2982
- " Running a program that requires XNACK on a system where XNACK is "
2983
- " disabled. This may cause problems when using a OS-allocated pointer "
2984
- " inside a target region. "
2985
- " Re-run with HSA_XNACK=1 to remove this warning.\n " );
2986
- }
2987
- }
2988
-
2989
- void handleDefaultMode (utils::XnackBuildMode XnackImageMode) {
2990
- // assuming that copying is required
2991
- // handled in userAutoZeroCopyImpl
2992
- // DisableAllocationsForMapsOnApus = false;
2993
- bool IsXnackActiveOnSystem = IsXnackEnabled;
2994
-
2995
- if (IsXnackActiveOnSystem && (IsAPU || OMPX_ApuMaps.get ()) &&
2996
- ((XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4) ||
2997
- (XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4))) {
2998
- if (IsAPU && OMPX_EagerApuMaps.get ()) {
2999
- PrepopulateGPUPageTable = true ; // Pre-faulting
3000
- }
3001
- return ;
3002
- }
3003
-
3004
- if (!IsXnackActiveOnSystem && IsAPU && OMPX_EagerApuMaps.get () &&
3005
- ((XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4) ||
3006
- (XnackImageMode == ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4))) {
3007
- PrepopulateGPUPageTable = true ; // Pre-faulting
3008
- return ;
3009
- }
3010
-
3011
- return ;
3012
- }
3013
-
3014
2945
// / Load the binary image into the device and allocate an image object.
3015
2946
Expected<DeviceImageTy *> loadBinaryImpl (const __tgt_device_image *TgtImage,
3016
2947
int32_t ImageId) override {
@@ -3022,9 +2953,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3022
2953
// Load the HSA executable.
3023
2954
if (Error Err = AMDImage->loadExecutable (*this ))
3024
2955
return std::move (Err);
3025
-
3026
- checkAndAdjustUsmModeForTargetImage (TgtImage);
3027
-
3028
2956
return AMDImage;
3029
2957
}
3030
2958
@@ -3624,14 +3552,51 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3624
3552
// / while it is often not the best on discrete GPUs.
3625
3553
// / XNACK can be enabled with a kernel boot parameter or with
3626
3554
// / the HSA_XNACK environment variable.
3627
- // / ROCm-only behavior: default (non USM, with xnack- or xnack-any)
3628
- // / and OMPX_EAGER_APU_MAPS is automatic zero-copy with pre-fault.
3629
3555
bool useAutoZeroCopyImpl () override {
3630
- return (
3631
- ((IsAPU || OMPX_ApuMaps) && IsXnackEnabled) ||
3632
- (IsAPU &&
3633
- !(Plugin::get ().getRequiresFlags () & OMP_REQ_UNIFIED_SHARED_MEMORY) &&
3634
- !IsXnackEnabled && OMPX_EagerApuMaps.get ()));
3556
+ return ((IsAPU || OMPX_ApuMaps) && IsXnackEnabled);
3557
+ }
3558
+
3559
+ // / Performs sanity checks on the selected zero-copy configuration and prints
3560
+ // / diagnostic information.
3561
+ Error zeroCopySanityChecksAndDiagImpl (bool isUnifiedSharedMemory,
3562
+ bool isAutoZeroCopy,
3563
+ bool isEagerMaps) override {
3564
+ // Implementation sanity checks: either unified_shared_memory or auto
3565
+ // zero-copy, not both
3566
+ if (isUnifiedSharedMemory && isAutoZeroCopy)
3567
+ return Plugin::error (" Internal runtime error: cannot be both "
3568
+ " unified_shared_memory and auto zero-copy." );
3569
+
3570
+ if (IsXnackEnabled)
3571
+ INFO (OMP_INFOTYPE_USER_DIAGNOSTIC, getDeviceId (), " XNACK is enabled.\n " );
3572
+ else
3573
+ INFO (OMP_INFOTYPE_USER_DIAGNOSTIC, getDeviceId (), " XNACK is disabled.\n " );
3574
+ if (isUnifiedSharedMemory)
3575
+ INFO (OMP_INFOTYPE_USER_DIAGNOSTIC, getDeviceId (),
3576
+ " Application configured to run in zero-copy using "
3577
+ " unified_shared_memory.\n " );
3578
+ else if (isAutoZeroCopy)
3579
+ INFO (
3580
+ OMP_INFOTYPE_USER_DIAGNOSTIC, getDeviceId (),
3581
+ " Application configured to run in zero-copy using auto zero-copy.\n " );
3582
+ if (isEagerMaps)
3583
+ INFO (OMP_INFOTYPE_USER_DIAGNOSTIC, getDeviceId (),
3584
+ " Requested pre-faulting of GPU page tables.\n " );
3585
+
3586
+ // Sanity checks: selecting unified_shared_memory with XNACK-Disabled
3587
+ // triggers a warning that can be turned into a fatal error using an
3588
+ // environment variable.
3589
+ if (isUnifiedSharedMemory && !IsXnackEnabled) {
3590
+ MESSAGE0 (
3591
+ " Running a program that requires XNACK on a system where XNACK is "
3592
+ " disabled. This may cause problems when using an OS-allocated "
3593
+ " pointer "
3594
+ " inside a target region. "
3595
+ " Re-run with HSA_XNACK=1 to remove this warning." );
3596
+ if (OMPX_StrictSanityChecks)
3597
+ llvm_unreachable (" User-requested hard stop on sanity check errors." );
3598
+ }
3599
+ return Plugin::success ();
3635
3600
}
3636
3601
3637
3602
// / Getters and setters for stack and heap sizes.
@@ -3845,19 +3810,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3845
3810
if (OMPX_DisableUsmMaps.get () == true ) {
3846
3811
EnableFineGrainedMemory = true ;
3847
3812
}
3848
-
3849
- if (IsAPU) {
3850
- // OMPX_EAGER_ZERO_COPY_MAPS=1 && HSA_XNACK=0 (XNACK-disabled)
3851
- // && default (non-USM) program
3852
- if ((OMPX_EagerApuMaps.get () == true ) && !IsXnackEnabled &&
3853
- !(Plugin::get ().getRequiresFlags () & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
3854
- PrepopulateGPUPageTable = true ;
3855
- }
3856
- }
3857
- }
3858
-
3859
- bool requestedPrepopulateGPUPageTableImpl () override final {
3860
- return PrepopulateGPUPageTable;
3861
3813
}
3862
3814
3863
3815
bool IsFineGrainedMemoryEnabledImpl () override final {
@@ -3875,6 +3827,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3875
3827
return hasGfx90aDevice () || hasMI300xDevice ();
3876
3828
}
3877
3829
3830
+ // / Returns whether AMD GPU supports unified memory in
3831
+ // / the current configuration.
3832
+ bool supportsUnifiedMemoryImpl () override final { return IsXnackEnabled; }
3833
+
3878
3834
// / Envar for controlling the number of HSA queues per device. High number of
3879
3835
// / queues may degrade performance.
3880
3836
UInt32Envar OMPX_NumQueues;
@@ -3963,11 +3919,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3963
3919
// / currently always without map checks.
3964
3920
BoolEnvar OMPX_NoMapChecks;
3965
3921
3966
- // / Value of OMPX_EAGER_ZERO_COPY_MAPS. When true, it
3967
- // / makes the plugin prefault the GPU page table upon
3968
- // / map. This allows running with XNACK-Disabled and
3969
- // / use zero-copy.
3970
- BoolEnvar OMPX_EagerApuMaps;
3922
+ // Makes warnings turn into fatal errors
3923
+ BoolEnvar OMPX_StrictSanityChecks;
3971
3924
3972
3925
// / Stream manager for AMDGPU streams.
3973
3926
AMDGPUStreamManagerTy AMDGPUStreamManager;
@@ -4030,11 +3983,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4030
3983
// / False otherwise.
4031
3984
bool IsXnackEnabled = false ;
4032
3985
4033
- // / Set by OMPX_EAGER_ZERO_COPY_MAPS environment variable.
4034
- // / If set, map clauses provoke prefaulting of the GPU
4035
- // / page table (applies to limited cases).
4036
- bool PrepopulateGPUPageTable = false ;
4037
-
4038
3986
// Set by OMPX_DISABLE_USM_MAPS environment variable.
4039
3987
// If set, fine graned memory is used for maps instead of coarse grained.
4040
3988
bool EnableFineGrainedMemory = false ;
0 commit comments