ROCm
diff --git a/‎clang/lib/CodeGen/CGOpenMPRuntime.h
Lines changed: 0 additions & 7 deletions b/‎clang/lib/CodeGen/CGOpenMPRuntime.h
Lines changed: 0 additions & 7 deletions
diff --git a/‎clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Lines changed: 0 additions & 54 deletions b/‎clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Lines changed: 0 additions & 54 deletions
diff --git a/‎clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Lines changed: 0 additions & 5 deletions b/‎clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Lines changed: 0 additions & 5 deletions
diff --git a/‎clang/lib/CodeGen/CGStmtOpenMP.cpp
Lines changed: 0 additions & 14 deletions b/‎clang/lib/CodeGen/CGStmtOpenMP.cpp
Lines changed: 0 additions & 14 deletions
@@ -1694,13 +1694,6 @@ class CGOpenMPRuntime {
     return std::make_pair(false, RValue::get(nullptr));
   }
 
-  /// Return whether the current architecture must emit CAS loop runtime call
-  /// for given type and atomic operation
-  virtual bool mustEmitSafeAtomic(CodeGenFunction &CGF, LValue X, RValue Update,
-                                  BinaryOperatorKind BO) {
-    return false;
-  }
-
   /// Used for AMDGPU architectures where certain atomics must be lowered
   /// to a CAS loop.
   virtual std::pair<bool, RValue> emitAtomicCASLoop(CodeGenFunction &CGF,
 
@@ -2545,8 +2545,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
       case OffloadArch::GFX90a:
       case OffloadArch::GFX90c:
       case OffloadArch::GFX9_4_GENERIC:
-      case OffloadArch::GFX940:
-      case OffloadArch::GFX941:
       case OffloadArch::GFX942:
       case OffloadArch::GFX950:
       case OffloadArch::GFX10_1_GENERIC:
@@ -3380,58 +3378,6 @@ void CGOpenMPRuntimeGPU::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
   }
 }
 
-// The only allowed atomicrmw is add on int 32 and 64 bits, cmp_and_swap, swap.
-bool CGOpenMPRuntimeGPU::mustEmitSafeAtomic(CodeGenFunction &CGF, LValue X,
-                                            RValue Update,
-                                            BinaryOperatorKind BO) {
-  ASTContext &Context = CGF.getContext();
-  OffloadArch Arch = getOffloadArch(CGM);
-
-  if (!Context.getTargetInfo().getTriple().isAMDGCN() ||
-      !CGF.CGM.getLangOpts().OpenMPIsTargetDevice)
-    return false;
-
-  if (Arch != OffloadArch::GFX941)
-    return false;
-
-  // Non simple types cannot be used in atomicRMW and are handled elsewhere
-  if (!X.isSimple())
-    return false;
-
-  // Integer types are lowered by backend to atomic ISA (32 and 64 bits) or to
-  // CAS loop (all other bit widths).
-  if (BO == BO_Add && Update.getScalarVal()->getType()->isIntegerTy())
-    return false;
-
-  // For all other operations, integer types that are not 32 or 64 bits are
-  // already converted to CAS loop by clang codegen or backend. This allows for
-  // simpler handling in devicertl call.
-  if (Update.getScalarVal()->getType()->isIntegerTy() &&
-      (Context.getTypeSize(X.getType()) < 32 ||
-       Context.getTypeSize(X.getType()) > 64))
-    return false;
-
-  // float and double have a atomic ISA for min, max, and add that need to be
-  // bypassed. All other operations on float and double are lowered to cas loop
-  // by the backend
-  if ((Update.getScalarVal()->getType()->isFloatTy() ||
-       Update.getScalarVal()->getType()->isDoubleTy()) &&
-      !((BO == BO_Add) || (BO == BO_LT) || (BO == BO_GT)))
-    return false;
-
-  // For all types, the ISA only supports certain operations in a "native" way.
-  // All others are lowered to a CAS loop by the backend
-  if (!((BO == BO_Add) || (BO == BO_Sub) || (BO == BO_LT) || (BO == BO_GT) ||
-        (BO == BO_And) || (BO == BO_Or) || (BO == BO_Xor)))
-    return false;
-
-  // all other cases must be lowered to safe CAS loop
-  // which is hidden in a runtime function that uses cmpxchg directly and not
-  // atomicrmw. This is effectively bypassing the backend on the decision of
-  // what atomic to use.
-  return true;
-}
-
 std::pair<bool, RValue>
 CGOpenMPRuntimeGPU::emitAtomicCASLoop(CodeGenFunction &CGF, LValue X,
                                       RValue Update, BinaryOperatorKind BO) {
 
@@ -208,11 +208,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
                                                BinaryOperatorKind BO,
                                                bool IsXBinopExpr) override;
 
-  /// Return whether the current architecture must emit CAS loop runtime call
-  /// for given type and atomic operation
-  bool mustEmitSafeAtomic(CodeGenFunction &CGF, LValue X, RValue Update,
-                          BinaryOperatorKind BO) override;
-
   // Emit call to CAS loop
   std::pair<bool, RValue> emitAtomicCASLoop(CodeGenFunction &CGF, LValue X,
                                             RValue Update,
 
@@ -7089,12 +7089,6 @@ emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
                  bool IsXLHSInRHSPart, const Expr *Hint, SourceLocation Loc) {
   ASTContext &Context = CGF.getContext();
 
-  if (CGF.CGM.getOpenMPRuntime().mustEmitSafeAtomic(CGF, X, Update, BO)) {
-    // this will force emission of cmpxchg in the caller using
-    // clang machinery
-    return std::make_pair(false, RValue::get(nullptr));
-  }
-
   bool useFPAtomics = canUseAMDGPUFastFPAtomics(CGF, X, Update, BO, Hint, Loc);
   if (useFPAtomics) {
     auto Ret = CGF.CGM.getOpenMPRuntime().emitFastFPAtomicCall(
@@ -7452,14 +7446,6 @@ static void emitOMPAtomicCompareExpr(
 
   llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
 
-  if (CGF.CGM.getOpenMPRuntime().mustEmitSafeAtomic(
-          CGF, XLVal, RValue::get(EVal),
-          cast<BinaryOperator>(CE)->getOpcode())) {
-    CGF.CGM.getOpenMPRuntime().emitAtomicCASLoop(
-        CGF, XLVal, RValue::get(EVal), cast<BinaryOperator>(CE)->getOpcode());
-    return;
-  }
-
   // Check if fast AMDGPU FP atomics can be used for the current operation:
   bool canUseFastAtomics = canUseAMDGPUFastFPAtomics(
       CGF, XLVal, RValue::get(EVal), cast<BinaryOperator>(CE)->getOpcode(),