JuliaLang · tkf · Apr 30, 2022
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -10149,8 +10149,8 @@ operation. The operation must be one of the following keywords:
 For most of these operations, the type of '<value>' must be an integer
 type whose bit width is a power of two greater than or equal to eight
 and less than or equal to a target-specific size limit. For xchg, this
-may also be a floating point type with the same size constraints as
-integers.  For fadd/fsub, this must be a floating point type.  The
+may also be a floating point or a pointer type with the same size constraints
+as integers.  For fadd/fsub, this must be a floating point type.  The
 type of the '``<pointer>``' operand must be a pointer to that type. If
 the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not
 allowed to modify the number or order of execution of this

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
@@ -7373,10 +7373,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
 
   if (Operation == AtomicRMWInst::Xchg) {
     if (!Val->getType()->isIntegerTy() &&
-        !Val->getType()->isFloatingPointTy()) {
-      return error(ValLoc,
-                   "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
-                       " operand must be an integer or floating point type");
+        !Val->getType()->isFloatingPointTy() &&
+        !Val->getType()->isPointerTy()) {
+      return error(
+          ValLoc,
+          "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
+              " operand must be an integer, floating point, or pointer type");
     }
   } else if (IsFP) {
     if (!Val->getType()->isFloatingPointTy()) {
@@ -7392,10 +7394,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
     }
   }
 
-  unsigned Size = Val->getType()->getPrimitiveSizeInBits();
-  if (Size < 8 || (Size & (Size - 1)))
-    return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
-                         " integer");
+  if (!(Operation == AtomicRMWInst::Xchg && Val->getType()->isPointerTy())) {
+    unsigned Size = Val->getType()->getPrimitiveSizeInBits();
+    if (Size < 8 || (Size & (Size - 1)))
+      return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
+                           " integer");
+  }
   const Align DefaultAlignment(
       PFS.getFunction().getParent()->getDataLayout().getTypeStoreSize(
           Val->getType()));

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -285,7 +285,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
       } else {
         AtomicRMWInst::BinOp Op = RMWI->getOperation();
         if (Op == AtomicRMWInst::Xchg &&
-            RMWI->getValOperand()->getType()->isFloatingPointTy()) {
+            (RMWI->getValOperand()->getType()->isFloatingPointTy() ||
+             RMWI->getValOperand()->getType()->isPointerTy())) {
           // TODO: add a TLI hook to control this so that each target can
           // convert to lowering the original type one at a time.
           RMWI = convertAtomicXchgToIntegerType(RMWI);
@@ -386,15 +387,19 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
   Value *Val = RMWI->getValOperand();
   Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
   Value *NewAddr = Builder.CreateBitCast(Addr, PT);
-  Value *NewVal = Builder.CreateBitCast(Val, NewTy);
+  Value *NewVal = Val->getType()->isPointerTy()
+                      ? Builder.CreatePtrToInt(Val, NewTy)
+                      : Builder.CreateBitCast(Val, NewTy);
 
   auto *NewRMWI =
       Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
                               RMWI->getAlign(), RMWI->getOrdering());
   NewRMWI->setVolatile(RMWI->isVolatile());
   LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
 
-  Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
+  Value *NewRVal = RMWI->getType()->isPointerTy()
+                       ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
+                       : Builder.CreateBitCast(NewRMWI, RMWI->getType());
   RMWI->replaceAllUsesWith(NewRVal);
   RMWI->eraseFromParent();
   return NewRMWI;
@@ -509,7 +514,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
   Type *OrigTy = NewVal->getType();
 
   // This code can go away when cmpxchg supports FP types.
-  bool NeedBitcast = OrigTy->isFloatingPointTy();
+  bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isPointerTy();
   if (NeedBitcast) {
     IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
     unsigned AS = Addr->getType()->getPointerAddressSpace();

diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
@@ -3805,9 +3805,10 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
   auto Op = RMWI.getOperation();
   Type *ElTy = RMWI.getOperand(1)->getType();
   if (Op == AtomicRMWInst::Xchg) {
-    Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " +
-           AtomicRMWInst::getOperationName(Op) +
-           " operand must have integer or floating point type!",
+    Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy() ||
+               ElTy->isPointerTy(),
+           "atomicrmw " + AtomicRMWInst::getOperationName(Op) +
+               " operand must have integer or floating point type!",
            &RMWI, ElTy);
   } else if (AtomicRMWInst::isFPOperation(Op)) {
     Assert(ElTy->isFloatingPointTy(), "atomicrmw " +

diff --git a/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-fp-or-pointer-type.ll b/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-fp-or-pointer-type.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: error: atomicrmw xchg operand must be an integer, floating point, or pointer type
+define void @f(<3 x i1>* %ptr) {
+  atomicrmw xchg <3 x i1>* %ptr, <3 x i1> <i1 1, i1 2, i1 3> seq_cst
+  ret void
+}
diff --git a/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll b/llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
@@ -850,6 +850,12 @@ define void @fp_atomics(float* %word) {
   ret void
 }
 
+define void @pointer_atomics(i8** %word) {
+; CHECK: %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic
+  %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic
+  ret void
+}
+
 ;; Fast Math Flags
 define void @fastmathflags_unop(float %op1) {
   %f.nnan = fneg nnan float %op1

diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-pointer.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-pointer.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
+
+define i8* @test_rmw_xchg_pointer(i8** %dst, i8* %new) {
+; NOLSE-LABEL: test_rmw_xchg_pointer:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    mov x8, x0
+; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; NOLSE-NEXT:    ldaxr x0, [x8]
+; NOLSE-NEXT:    stlxr w9, x1, [x8]
+; NOLSE-NEXT:    cbnz w9, .LBB0_1
+; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_rmw_xchg_pointer:
+; LSE:       // %bb.0:
+; LSE-NEXT:    swpal x1, x0, [x0]
+; LSE-NEXT:    ret
+  %res = atomicrmw xchg i8** %dst, i8* %new seq_cst
+  ret i8* %res
+}
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -659,6 +659,15 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}atomic_xchg_pointer_offset:
+; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @atomic_xchg_pointer_offset(i8** %out, i8* %in) {
+entry:
+  %gep = getelementptr i8*, i8** %out, i32 4
+  %val = atomicrmw volatile xchg i8** %gep, i8* %in seq_cst
+  ret void
+}
+
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
 ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]

diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -794,6 +794,17 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}atomic_xchg_pointer_offset:
+; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+
+; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
+define amdgpu_kernel void @atomic_xchg_pointer_offset(i8* addrspace(1)* %out, i8* %in) {
+entry:
+  %gep = getelementptr i8*, i8* addrspace(1)* %out, i64 4
+  %tmp0 = atomicrmw volatile xchg i8* addrspace(1)* %gep, i8* %in seq_cst
+  ret void
+}
+
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
 ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; CIVI: buffer_store_dwordx2 [[RET]]

diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics64.ll b/llvm/test/CodeGen/AMDGPU/local-atomics64.ll
@@ -40,6 +40,19 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %
   ret void
 }
 
+; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(i8* addrspace(1)* %out, i8* addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i8*, i8* addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xchg i8* addrspace(3)* %gep, i8* null seq_cst
+  store i8* %result, i8* addrspace(1)* %out, align 8
+  ret void
+}
+
 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0

diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll
@@ -4,6 +4,7 @@
 
 @sc64 = external dso_local global i64
 @fsc64 = external dso_local global double
+@psc64 = external dso_local global i8*
 
 define void @atomic_fetch_add64() nounwind {
 ; X64-LABEL: atomic_fetch_add64:
@@ -802,3 +803,18 @@ define void @atomic_fetch_swapf64(double %x) nounwind {
   %t1 = atomicrmw xchg double* @fsc64, double %x acquire
   ret void
 }
+
+define void @atomic_fetch_swapptr(i8* %x) nounwind {
+; X64-LABEL: atomic_fetch_swapptr:
+; X64:       # %bb.0:
+; X64-NEXT:    xchgq %rdi, psc64(%rip)
+; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_swapptr:
+; I486:       # %bb.0:
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    xchgl %eax, psc64
+; I486-NEXT:    retl
+  %t1 = atomicrmw xchg i8** @psc64, i8* %x acquire
+  ret void
+}
diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-pointer.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-pointer.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O1 -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
+; RUN: opt -O1 -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+
+define void @atomic_swap_pointer(i8** %ptr, i8* %val) nounwind {
+; CHECK-LABEL: @atomic_swap_pointer(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i8* [[VAL:%.*]] to i64
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* elementtype(i64) [[TMP1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[TMP2]], i64* elementtype(i64) [[TMP1]])
+; CHECK-NEXT:    [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to i8*
+; CHECK-NEXT:    ret void
+;
+; OUTLINE-ATOMICS-LABEL: @atomic_swap_pointer(
+; OUTLINE-ATOMICS-NEXT:    [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64*
+; OUTLINE-ATOMICS-NEXT:    [[TMP2:%.*]] = ptrtoint i8* [[VAL:%.*]] to i64
+; OUTLINE-ATOMICS-NEXT:    [[TMP3:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 [[TMP2]] acquire, align 8
+; OUTLINE-ATOMICS-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i8*
+; OUTLINE-ATOMICS-NEXT:    ret void
+;
+  %t1 = atomicrmw xchg i8** %ptr, i8* %val acquire
+  ret void
+}
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-pointer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-pointer.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=i686-linux-gnu -atomic-expand %s | FileCheck %s
+
+define i8* @atomic_xchg_pointer(i8** %ptr) nounwind {
+; CHECK-LABEL: @atomic_xchg_pointer(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi i64 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = cmpxchg i64* [[TMP1]], i64 [[LOADED]], i64 0 seq_cst seq_cst, align 8
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
+; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[NEWLOADED]] to i8*
+; CHECK-NEXT:    ret i8* [[TMP4]]
+;
+  %result = atomicrmw xchg i8** %ptr, i8* null seq_cst
+  ret i8* %result
+}
+
+define i8* @atomic_xchg_pointer_as1(i8* addrspace(1)* %ptr) nounwind {
+; CHECK-LABEL: @atomic_xchg_pointer_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* addrspace(1)* [[PTR:%.*]] to i64 addrspace(1)*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64 addrspace(1)* [[TMP1]], align 8
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[LOADED:%.*]] = phi i64 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = cmpxchg i64 addrspace(1)* [[TMP1]], i64 [[LOADED]], i64 0 seq_cst seq_cst, align 8
+; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
+; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[NEWLOADED]] to i8*
+; CHECK-NEXT:    ret i8* [[TMP4]]
+;
+  %result = atomicrmw xchg i8* addrspace(1)* %ptr, i8* null seq_cst
+  ret i8* %result
+}