Skip to content

[InferAS] Support getAssumedAddrSpace for Arguments for NVPTX #133991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 3, 2025

Conversation

AlexMaclean
Copy link
Member

No description provided.

@AlexMaclean AlexMaclean requested review from Artem-B and jdoerfert April 1, 2025 21:23
@AlexMaclean AlexMaclean self-assigned this Apr 1, 2025
@llvmbot
Copy link
Member

llvmbot commented Apr 1, 2025

@llvm/pr-subscribers-debuginfo

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Patch is 69.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133991.diff

8 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp (+4-8)
  • (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+15)
  • (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+35-13)
  • (modified) llvm/test/CodeGen/NVPTX/i1-ext-load.ll (+5-5)
  • (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+13-33)
  • (modified) llvm/test/CodeGen/NVPTX/lower-args.ll (+10-24)
  • (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+99-349)
  • (added) llvm/test/Transforms/InferAddressSpaces/NVPTX/arguments.ll (+35)
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 2637b9fab0d50..a683726facd0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -678,11 +678,8 @@ static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
 
   LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
   for (Argument &Arg : F.args()) {
-    if (Arg.getType()->isPointerTy()) {
-      if (Arg.hasByValAttr())
-        handleByValParam(TM, &Arg);
-      else if (TM.getDrvInterface() == NVPTX::CUDA)
-        markPointerAsGlobal(&Arg);
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+      handleByValParam(TM, &Arg);
     } else if (Arg.getType()->isIntegerTy() &&
                TM.getDrvInterface() == NVPTX::CUDA) {
       HandleIntToPtr(Arg);
@@ -699,10 +696,9 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
       cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());
 
   for (Argument &Arg : F.args())
-    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
-      markPointerAsAS(&Arg, ADDRESS_SPACE_LOCAL);
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
       adjustByValArgAlignment(&Arg, &Arg, TLI);
-    }
+
   return true;
 }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index a89ca3037c7ff..e359735c20750 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -599,6 +599,21 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
   if (isa<AllocaInst>(V))
     return ADDRESS_SPACE_LOCAL;
 
+  if (const Argument *Arg = dyn_cast<Argument>(V)) {
+    if (isKernelFunction(*Arg->getParent())) {
+      const NVPTXTargetMachine &TM =
+          static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
+      if (TM.getDrvInterface() == NVPTX::CUDA && !Arg->hasByValAttr())
+        return ADDRESS_SPACE_GLOBAL;
+    } else {
+      // We assume that all device parameters that are passed byval will be
+      // placed in the local AS. Very simple cases will be updated after ISel to
+      // use the device param space where possible.
+      if (Arg->hasByValAttr())
+        return ADDRESS_SPACE_LOCAL;
+    }
+  }
+
   return -1;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 73a3f5e4d3694..965d6b6e45e6e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -305,10 +305,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
 }
 
 // Returns true if V is an address expression.
-// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
-// getelementptr operators.
+// TODO: Currently, we consider only arguments and phi, bitcast, addrspacecast,
+// and getelementptr operators.
 static bool isAddressExpression(const Value &V, const DataLayout &DL,
                                 const TargetTransformInfo *TTI) {
+
+  if (const Argument *Arg = dyn_cast<Argument>(&V))
+    return Arg->getType()->isPointerTy() &&
+           TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+
   const Operator *Op = dyn_cast<Operator>(&V);
   if (!Op)
     return false;
@@ -341,6 +346,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
 static SmallVector<Value *, 2>
 getPointerOperands(const Value &V, const DataLayout &DL,
                    const TargetTransformInfo *TTI) {
+  if (isa<Argument>(&V))
+    return {};
+
   const Operator &Op = cast<Operator>(V);
   switch (Op.getOpcode()) {
   case Instruction::PHI: {
@@ -505,13 +513,11 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
     if (Visited.insert(V).second) {
       PostorderStack.emplace_back(V, false);
 
-      Operator *Op = cast<Operator>(V);
-      for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
-        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
-          if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
-            PostorderStack.emplace_back(CE, false);
-        }
-      }
+      if (auto *Op = dyn_cast<Operator>(V))
+        for (auto &O : Op->operands())
+          if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
+            if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+              PostorderStack.emplace_back(CE, false);
     }
   }
 }
@@ -828,6 +834,18 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
   assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
          isAddressExpression(*V, *DL, TTI));
 
+  if (auto *Arg = dyn_cast<Argument>(V)) {
+    // Arguments are address space casted in the function body, as we do not
+    // want to change the function signature.
+    Function *F = Arg->getParent();
+    BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
+
+    Type *NewPtrTy = PointerType::get(Arg->getContext(), NewAddrSpace);
+    auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
+    NewI->insertBefore(Insert);
+    return NewI;
+  }
+
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     Value *NewV = cloneInstructionWithNewAddressSpace(
         I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
@@ -966,8 +984,12 @@ bool InferAddressSpacesImpl::updateAddressSpace(
   // of all its pointer operands.
   unsigned NewAS = UninitializedAddressSpace;
 
-  const Operator &Op = cast<Operator>(V);
-  if (Op.getOpcode() == Instruction::Select) {
+  // isAddressExpression should guarantee that V is an operator or an argument.
+  assert(isa<Operator>(V) || isa<Argument>(V));
+
+  if (isa<Operator>(V) &&
+      cast<Operator>(V).getOpcode() == Instruction::Select) {
+    const Operator &Op = cast<Operator>(V);
     Value *Src0 = Op.getOperand(1);
     Value *Src1 = Op.getOperand(2);
 
@@ -1258,7 +1280,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
   }
 
   // Otherwise, replaces the use with flat(NewV).
-  if (Instruction *VInst = dyn_cast<Instruction>(V)) {
+  if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
     // Don't create a copy of the original addrspacecast.
     if (U == V && isa<AddrSpaceCastInst>(V))
       return;
@@ -1268,7 +1290,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
     if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
       InsertPos = std::next(NewVInst->getIterator());
     else
-      InsertPos = std::next(VInst->getIterator());
+      InsertPos = std::next(cast<Instruction>(V)->getIterator());
 
     while (isa<PHINode>(InsertPos))
       ++InsertPos;
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
index f5f1dd9fcf0ea..44ac46db254a7 100644
--- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
 ; CHECK:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK:    ld.param.u64 %rd1, [foo_param_0];
-; CHECK:    ld.param.u64 %rd2, [foo_param_1];
-; CHECK:    cvta.to.global.u64 %rd3, %rd2;
-; CHECK:    cvta.to.global.u64 %rd4, %rd1;
-; CHECK:    ld.global.nc.u8 %rs1, [%rd4];
+; CHECK:    cvta.to.global.u64 %rd2, %rd1;
+; CHECK:    ld.param.u64 %rd3, [foo_param_1];
+; CHECK:    cvta.to.global.u64 %rd4, %rd3;
+; CHECK:    ld.global.nc.u8 %rs1, [%rd2];
 ; CHECK:    cvt.u32.u8 %r1, %rs1;
 ; CHECK:    add.s32 %r2, %r1, 1;
 ; CHECK:    and.b32 %r3, %r2, 1;
-; CHECK:    st.global.u32 [%rd3], %r3;
+; CHECK:    st.global.u32 [%rd4], %r3;
 ; CHECK:    ret;
   %ld = load i1, ptr %ptr, align 1
   %zext = zext i1 %ld to i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index e4e1f40d0d8b2..38b7400696c54 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
 ; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
 ; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; OPT-NEXT:  [[ENTRY:.*:]]
-; OPT-NEXT:    [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
-; OPT-NEXT:    [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
-; OPT-NEXT:    [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+; OPT-NEXT:    [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
 ; OPT-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
 ; OPT-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
 ; OPT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_int(
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
 ; OPT-NEXT:    [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
-; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT3]], align 4
+; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT]], align 4
 ; OPT-NEXT:    ret void
   %tmp = load i32, ptr %input1, align 4
   %add = add i32 %tmp, %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_struct(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
 ; OPT-NEXT:    [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
-; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT5]], align 4
+; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT]], align 4
 ; OPT-NEXT:    ret void
   %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
   %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; OPT-NEXT:    [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
 ; OPT-NEXT:    [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT-NEXT:    store ptr [[INPUT1]], ptr [[ADDR5]], align 8
+; OPT-NEXT:    store ptr [[INPUT1]], ptr [[ADDR]], align 8
 ; OPT-NEXT:    ret void
   store ptr %input, ptr %addr, align 8
   ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
 ; PTX-NOT      .local
 ; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
-; OPT-NEXT:    [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
 ; OPT-NEXT:    [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
 ; OPT-NEXT:    [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
-; OPT-NEXT:    store i64 [[TMP2]], ptr [[RESULT5]], align 8
+; OPT-NEXT:    store i64 [[TMP2]], ptr [[RESULT]], align 8
 ; OPT-NEXT:    ret void
   %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
   %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
 ; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
 ; OPT-NEXT:    [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
 ; OPT-NEXT:    [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
 ; OPT-NEXT:    [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
-; OPT-NEXT:    store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
+; OPT-NEXT:    store i32 [[TWICE]], ptr [[OUTPUT]], align 4
 ; OPT-NEXT:    [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
 ; OPT-NEXT:    ret void
   %val = load i32, ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
 ; OPT-NEXT:    [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
 ; OPT-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
 ; OPT-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
-; OPT-NEXT:    store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
+; OPT-NEXT:    store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
 ; OPT-NEXT:    [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
 ; OPT-NEXT:    ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_phi(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
 ; OPT-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
 ; OPT-NEXT:    br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; OPT:       [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
 ; OPT:       [[MERGE]]:
 ; OPT-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
 ; OPT-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
 ; OPT-NEXT:    ret void
 
   %val = load i32, ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
 ; OPT-NEXT:    [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
 ; OPT-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
 ; OPT-NEXT:    br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; OPT:       [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
 ; OPT:       [[MERGE]]:
 ; OPT-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
 ; OPT-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
 ; OPT-NEXT:    ret void
   %val = load i32, ptr %inout
   %less = icmp slt i32 %val, 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_select(
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
 ; OPT-NEXT:    [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
 ; OPT-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
 ; OPT-NEXT:    [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
 ; OPT-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
 ; OPT-NEXT:    ret void
   %val = load i32, ptr %inout
   %less = icmp slt i32 %val, 0
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index a1c0a86e9c4e4..8fa7d5c3e0cbc 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Apr 1, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Alex MacLean (AlexMaclean)

Changes

Patch is 69.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133991.diff

8 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp (+4-8)
  • (modified) llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp (+15)
  • (modified) llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp (+35-13)
  • (modified) llvm/test/CodeGen/NVPTX/i1-ext-load.ll (+5-5)
  • (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+13-33)
  • (modified) llvm/test/CodeGen/NVPTX/lower-args.ll (+10-24)
  • (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+99-349)
  • (added) llvm/test/Transforms/InferAddressSpaces/NVPTX/arguments.ll (+35)
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 2637b9fab0d50..a683726facd0c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -678,11 +678,8 @@ static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
 
   LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
   for (Argument &Arg : F.args()) {
-    if (Arg.getType()->isPointerTy()) {
-      if (Arg.hasByValAttr())
-        handleByValParam(TM, &Arg);
-      else if (TM.getDrvInterface() == NVPTX::CUDA)
-        markPointerAsGlobal(&Arg);
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+      handleByValParam(TM, &Arg);
     } else if (Arg.getType()->isIntegerTy() &&
                TM.getDrvInterface() == NVPTX::CUDA) {
       HandleIntToPtr(Arg);
@@ -699,10 +696,9 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
       cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());
 
   for (Argument &Arg : F.args())
-    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
-      markPointerAsAS(&Arg, ADDRESS_SPACE_LOCAL);
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
       adjustByValArgAlignment(&Arg, &Arg, TLI);
-    }
+
   return true;
 }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index a89ca3037c7ff..e359735c20750 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -599,6 +599,21 @@ unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
   if (isa<AllocaInst>(V))
     return ADDRESS_SPACE_LOCAL;
 
+  if (const Argument *Arg = dyn_cast<Argument>(V)) {
+    if (isKernelFunction(*Arg->getParent())) {
+      const NVPTXTargetMachine &TM =
+          static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
+      if (TM.getDrvInterface() == NVPTX::CUDA && !Arg->hasByValAttr())
+        return ADDRESS_SPACE_GLOBAL;
+    } else {
+      // We assume that all device parameters that are passed byval will be
+      // placed in the local AS. Very simple cases will be updated after ISel to
+      // use the device param space where possible.
+      if (Arg->hasByValAttr())
+        return ADDRESS_SPACE_LOCAL;
+    }
+  }
+
   return -1;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 73a3f5e4d3694..965d6b6e45e6e 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -305,10 +305,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
 }
 
 // Returns true if V is an address expression.
-// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
-// getelementptr operators.
+// TODO: Currently, we consider only arguments and phi, bitcast, addrspacecast,
+// and getelementptr operators.
 static bool isAddressExpression(const Value &V, const DataLayout &DL,
                                 const TargetTransformInfo *TTI) {
+
+  if (const Argument *Arg = dyn_cast<Argument>(&V))
+    return Arg->getType()->isPointerTy() &&
+           TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
+
   const Operator *Op = dyn_cast<Operator>(&V);
   if (!Op)
     return false;
@@ -341,6 +346,9 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
 static SmallVector<Value *, 2>
 getPointerOperands(const Value &V, const DataLayout &DL,
                    const TargetTransformInfo *TTI) {
+  if (isa<Argument>(&V))
+    return {};
+
   const Operator &Op = cast<Operator>(V);
   switch (Op.getOpcode()) {
   case Instruction::PHI: {
@@ -505,13 +513,11 @@ void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
     if (Visited.insert(V).second) {
       PostorderStack.emplace_back(V, false);
 
-      Operator *Op = cast<Operator>(V);
-      for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
-        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
-          if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
-            PostorderStack.emplace_back(CE, false);
-        }
-      }
+      if (auto *Op = dyn_cast<Operator>(V))
+        for (auto &O : Op->operands())
+          if (ConstantExpr *CE = dyn_cast<ConstantExpr>(O))
+            if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
+              PostorderStack.emplace_back(CE, false);
     }
   }
 }
@@ -828,6 +834,18 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
   assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
          isAddressExpression(*V, *DL, TTI));
 
+  if (auto *Arg = dyn_cast<Argument>(V)) {
+    // Arguments are address space casted in the function body, as we do not
+    // want to change the function signature.
+    Function *F = Arg->getParent();
+    BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
+
+    Type *NewPtrTy = PointerType::get(Arg->getContext(), NewAddrSpace);
+    auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
+    NewI->insertBefore(Insert);
+    return NewI;
+  }
+
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     Value *NewV = cloneInstructionWithNewAddressSpace(
         I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
@@ -966,8 +984,12 @@ bool InferAddressSpacesImpl::updateAddressSpace(
   // of all its pointer operands.
   unsigned NewAS = UninitializedAddressSpace;
 
-  const Operator &Op = cast<Operator>(V);
-  if (Op.getOpcode() == Instruction::Select) {
+  // isAddressExpression should guarantee that V is an operator or an argument.
+  assert(isa<Operator>(V) || isa<Argument>(V));
+
+  if (isa<Operator>(V) &&
+      cast<Operator>(V).getOpcode() == Instruction::Select) {
+    const Operator &Op = cast<Operator>(V);
     Value *Src0 = Op.getOperand(1);
     Value *Src1 = Op.getOperand(2);
 
@@ -1258,7 +1280,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
   }
 
   // Otherwise, replaces the use with flat(NewV).
-  if (Instruction *VInst = dyn_cast<Instruction>(V)) {
+  if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
     // Don't create a copy of the original addrspacecast.
     if (U == V && isa<AddrSpaceCastInst>(V))
       return;
@@ -1268,7 +1290,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
     if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
       InsertPos = std::next(NewVInst->getIterator());
     else
-      InsertPos = std::next(VInst->getIterator());
+      InsertPos = std::next(cast<Instruction>(V)->getIterator());
 
     while (isa<PHINode>(InsertPos))
       ++InsertPos;
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
index f5f1dd9fcf0ea..44ac46db254a7 100644
--- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
 ; CHECK:    .reg .b64 %rd<5>;
 ; CHECK-EMPTY:
 ; CHECK:    ld.param.u64 %rd1, [foo_param_0];
-; CHECK:    ld.param.u64 %rd2, [foo_param_1];
-; CHECK:    cvta.to.global.u64 %rd3, %rd2;
-; CHECK:    cvta.to.global.u64 %rd4, %rd1;
-; CHECK:    ld.global.nc.u8 %rs1, [%rd4];
+; CHECK:    cvta.to.global.u64 %rd2, %rd1;
+; CHECK:    ld.param.u64 %rd3, [foo_param_1];
+; CHECK:    cvta.to.global.u64 %rd4, %rd3;
+; CHECK:    ld.global.nc.u8 %rs1, [%rd2];
 ; CHECK:    cvt.u32.u8 %r1, %rs1;
 ; CHECK:    add.s32 %r2, %r1, 1;
 ; CHECK:    and.b32 %r3, %r2, 1;
-; CHECK:    st.global.u32 [%rd3], %r3;
+; CHECK:    st.global.u32 [%rd4], %r3;
 ; CHECK:    ret;
   %ld = load i1, ptr %ptr, align 1
   %zext = zext i1 %ld to i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index e4e1f40d0d8b2..38b7400696c54 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
 ; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
 ; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; OPT-NEXT:  [[ENTRY:.*:]]
-; OPT-NEXT:    [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
-; OPT-NEXT:    [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
-; OPT-NEXT:    [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
+; OPT-NEXT:    [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
 ; OPT-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
 ; OPT-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
 ; OPT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_int(
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
 ; OPT-NEXT:    [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
-; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT3]], align 4
+; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT]], align 4
 ; OPT-NEXT:    ret void
   %tmp = load i32, ptr %input1, align 4
   %add = add i32 %tmp, %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_struct(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
 ; OPT-NEXT:    [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
-; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT5]], align 4
+; OPT-NEXT:    store i32 [[ADD]], ptr [[OUT]], align 4
 ; OPT-NEXT:    ret void
   %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
   %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; OPT-NEXT:    [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
 ; OPT-NEXT:    [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT-NEXT:    store ptr [[INPUT1]], ptr [[ADDR5]], align 8
+; OPT-NEXT:    store ptr [[INPUT1]], ptr [[ADDR]], align 8
 ; OPT-NEXT:    ret void
   store ptr %input, ptr %addr, align 8
   ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
 ; PTX-NOT      .local
 ; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
-; OPT-NEXT:    [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
 ; OPT-NEXT:    [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
 ; OPT-NEXT:    [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
-; OPT-NEXT:    store i64 [[TMP2]], ptr [[RESULT5]], align 8
+; OPT-NEXT:    store i64 [[TMP2]], ptr [[RESULT]], align 8
 ; OPT-NEXT:    ret void
   %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
   %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
 ; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
 ; OPT-NEXT:    [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
 ; OPT-NEXT:    [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
 ; OPT-NEXT:    [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
-; OPT-NEXT:    store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
+; OPT-NEXT:    store i32 [[TWICE]], ptr [[OUTPUT]], align 4
 ; OPT-NEXT:    [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
 ; OPT-NEXT:    ret void
   %val = load i32, ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
 ; OPT-NEXT:    [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
 ; OPT-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
 ; OPT-NEXT:    [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
 ; OPT-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
 ; OPT-NEXT:    [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
-; OPT-NEXT:    store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
+; OPT-NEXT:    store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
 ; OPT-NEXT:    [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
 ; OPT-NEXT:    [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
 ; OPT-NEXT:    ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_phi(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
 ; OPT-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
 ; OPT-NEXT:    br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; OPT:       [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
 ; OPT:       [[MERGE]]:
 ; OPT-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
 ; OPT-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
 ; OPT-NEXT:    ret void
 
   %val = load i32, ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
 ; OPT-NEXT:    [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
 ; OPT-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
 ; OPT-NEXT:    br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
 ; OPT:       [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
 ; OPT:       [[MERGE]]:
 ; OPT-NEXT:    [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
 ; OPT-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
 ; OPT-NEXT:    ret void
   %val = load i32, ptr %inout
   %less = icmp slt i32 %val, 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
 ; PTX-NEXT:    ret;
 ; OPT-LABEL: define ptx_kernel void @grid_const_select(
 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:    [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
-; OPT-NEXT:    [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
 ; OPT-NEXT:    [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
 ; OPT-NEXT:    [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
 ; OPT-NEXT:    [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
-; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
+; OPT-NEXT:    [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
 ; OPT-NEXT:    [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
 ; OPT-NEXT:    [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
 ; OPT-NEXT:    [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
-; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
+; OPT-NEXT:    store i32 [[VALLOADED]], ptr [[INOUT]], align 4
 ; OPT-NEXT:    ret void
   %val = load i32, ptr %inout
   %less = icmp slt i32 %val, 0
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index a1c0a86e9c4e4..8fa7d5c3e0cbc 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_...
[truncated]

Copy link
Member

@Artem-B Artem-B left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with a test suggestion.

@AlexMaclean AlexMaclean force-pushed the dev/amaclean/upstream/ias-args branch from 08103f8 to 04ddbe9 Compare April 3, 2025 15:45
@AlexMaclean AlexMaclean force-pushed the dev/amaclean/upstream/ias-args branch from 04ddbe9 to 519bed1 Compare April 3, 2025 20:36
@AlexMaclean AlexMaclean merged commit ba0a52a into llvm:main Apr 3, 2025
12 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Apr 4, 2025

LLVM Buildbot has detected a new failure on builder lldb-remote-linux-ubuntu running on as-builder-9 while building llvm at step 16 "test-check-lldb-api".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/195/builds/7133

Here is the relevant piece of the build log for the reference
Step 16 (test-check-lldb-api) failure: Test just built components: check-lldb-api completed (failure)
...
PASS: lldb-api :: python_api/watchpoint/watchlocation/TestTargetWatchAddress.py (1238 of 1247)
PASS: lldb-api :: types/TestCharTypeExpr.py (1239 of 1247)
PASS: lldb-api :: types/TestIntegerType.py (1240 of 1247)
PASS: lldb-api :: types/TestRecursiveTypes.py (1241 of 1247)
PASS: lldb-api :: types/TestIntegerTypeExpr.py (1242 of 1247)
PASS: lldb-api :: types/TestShortType.py (1243 of 1247)
PASS: lldb-api :: types/TestShortTypeExpr.py (1244 of 1247)
PASS: lldb-api :: types/TestLongTypes.py (1245 of 1247)
PASS: lldb-api :: types/TestLongTypesExpr.py (1246 of 1247)
TIMEOUT: lldb-api :: python_api/process/cancel_attach/TestCancelAttach.py (1247 of 1247)
******************** TEST 'lldb-api :: python_api/process/cancel_attach/TestCancelAttach.py' FAILED ********************
Script:
--
/usr/bin/python3.12 /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin --libcxx-include-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include/c++/v1 --libcxx-include-target-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/include/aarch64-unknown-linux-gnu/c++/v1 --libcxx-library-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib/aarch64-unknown-linux-gnu --arch aarch64 --build-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/lldb --compiler /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang --dsymutil /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./bin --lldb-obj-root /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/tools/lldb --lldb-libs-dir /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/./lib --platform-url connect://jetson-agx-2198.lab.llvm.org:1234 --platform-working-dir /home/ubuntu/lldb-tests --sysroot /mnt/fs/jetson-agx-ubuntu --env ARCH_CFLAGS=-mcpu=cortex-a78 --platform-name remote-linux --skip-category=lldb-server /home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/python_api/process/cancel_attach -p TestCancelAttach.py
--
Exit Code: -9
Timeout: Reached timeout of 600 seconds

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision ba0a52a04b140bb7ed75cca4e1c27ec7d747fa40)
  clang revision ba0a52a04b140bb7ed75cca4e1c27ec7d747fa40
  llvm revision ba0a52a04b140bb7ed75cca4e1c27ec7d747fa40

--
Command Output (stderr):
--
WARNING:root:Custom libc++ is not supported for remote runs: ignoring --libcxx arguments
FAIL: LLDB (/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/build/bin/clang-aarch64) :: test_scripted_implementation (TestCancelAttach.AttachCancelTestCase.test_scripted_implementation)

--

********************
Slowest Tests:
--------------------------------------------------------------------------
600.04s: lldb-api :: python_api/process/cancel_attach/TestCancelAttach.py
180.95s: lldb-api :: commands/command/script_alias/TestCommandScriptAlias.py
70.53s: lldb-api :: commands/process/attach/TestProcessAttach.py
40.53s: lldb-api :: functionalities/data-formatter/data-formatter-stl/libcxx-simulators/string/TestDataFormatterLibcxxStringSimulator.py
35.61s: lldb-api :: functionalities/single-thread-step/TestSingleThreadStepTimeout.py
35.06s: lldb-api :: functionalities/completion/TestCompletion.py
22.27s: lldb-api :: python_api/watchpoint/watchlocation/TestTargetWatchAddress.py
20.72s: lldb-api :: functionalities/gdb_remote_client/TestPlatformClient.py
20.60s: lldb-api :: commands/statistics/basic/TestStats.py
19.04s: lldb-api :: functionalities/thread/state/TestThreadStates.py
18.39s: lldb-api :: commands/dwim-print/TestDWIMPrint.py
14.86s: lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
14.60s: lldb-api :: functionalities/inline-stepping/TestInlineStepping.py
14.54s: lldb-api :: commands/expression/expr-in-syscall/TestExpressionInSyscall.py

@llvm-ci
Copy link
Collaborator

llvm-ci commented Apr 4, 2025

LLVM Buildbot has detected a new failure on builder sanitizer-aarch64-linux-bootstrap-hwasan running on sanitizer-buildbot12 while building llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/55/builds/9389

Here is the relevant piece of the build log for the reference
Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using lld-link: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/lld-link
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using ld64.lld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using wasm-ld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using ld.lld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/ld.lld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using lld-link: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/lld-link
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using ld64.lld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using wasm-ld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 87449 tests, 72 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 
FAIL: LLVM :: ExecutionEngine/JITLink/x86-64/COFF_directive_include.s (53302 of 87449)
******************** TEST 'LLVM :: ExecutionEngine/JITLink/x86-64/COFF_directive_include.s' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-mc -filetype=obj -triple=x86_64-windows-msvc /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s -o /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp # RUN: at line 1
+ /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-mc -filetype=obj -triple=x86_64-windows-msvc /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s -o /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp
not /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-jitlink -noexec /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp 2>&1 | /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/FileCheck /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s # RUN: at line 2
+ not /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-jitlink -noexec /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp
+ /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/FileCheck /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s

--

********************
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.. 
Slowest Tests:
--------------------------------------------------------------------------
56.03s: Clang :: Driver/fsanitize.c
40.48s: Clang :: Preprocessor/riscv-target-features.c
37.95s: Clang :: Driver/arm-cortex-cpus-2.c
37.46s: Clang :: Driver/arm-cortex-cpus-1.c
36.27s: LLVM :: CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
33.47s: Clang :: OpenMP/target_defaultmap_codegen_01.cpp
32.01s: Clang :: OpenMP/target_update_codegen.cpp
28.99s: LLVM :: CodeGen/RISCV/attributes.ll
28.89s: Clang :: Preprocessor/aarch64-target-features.c
28.76s: Clang :: Preprocessor/arm-target-features.c
26.71s: Clang :: Driver/clang_f_opts.c
24.55s: Clang :: Driver/linux-ld.c
23.85s: Clang :: Preprocessor/predefined-arch-macros.c
23.80s: LLVM :: tools/llvm-reduce/parallel-workitem-kill.ll
23.64s: LLVM :: CodeGen/ARM/build-attributes.ll
22.91s: Clang :: Driver/cl-options.c
21.38s: Clang :: Driver/x86-target-features.c
19.56s: Clang :: CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret.c
19.09s: Clang :: Analysis/a_flaky_crash.cpp
18.73s: Clang :: Driver/debug-options.c

Step 11 (stage2/hwasan check) failure: stage2/hwasan check (failure)
...
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using lld-link: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/lld-link
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using ld64.lld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using wasm-ld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using ld.lld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/ld.lld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using lld-link: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/lld-link
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using ld64.lld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:520: note: using wasm-ld: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 87449 tests, 72 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 
FAIL: LLVM :: ExecutionEngine/JITLink/x86-64/COFF_directive_include.s (53302 of 87449)
******************** TEST 'LLVM :: ExecutionEngine/JITLink/x86-64/COFF_directive_include.s' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-mc -filetype=obj -triple=x86_64-windows-msvc /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s -o /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp # RUN: at line 1
+ /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-mc -filetype=obj -triple=x86_64-windows-msvc /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s -o /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp
not /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-jitlink -noexec /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp 2>&1 | /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/FileCheck /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s # RUN: at line 2
+ not /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/llvm-jitlink -noexec /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/test/ExecutionEngine/JITLink/x86-64/Output/COFF_directive_include.s.tmp
+ /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm_build_hwasan/bin/FileCheck /home/b/sanitizer-aarch64-linux-bootstrap-hwasan/build/llvm-project/llvm/test/ExecutionEngine/JITLink/x86-64/COFF_directive_include.s

--

********************
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.. 
Slowest Tests:
--------------------------------------------------------------------------
56.03s: Clang :: Driver/fsanitize.c
40.48s: Clang :: Preprocessor/riscv-target-features.c
37.95s: Clang :: Driver/arm-cortex-cpus-2.c
37.46s: Clang :: Driver/arm-cortex-cpus-1.c
36.27s: LLVM :: CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir
33.47s: Clang :: OpenMP/target_defaultmap_codegen_01.cpp
32.01s: Clang :: OpenMP/target_update_codegen.cpp
28.99s: LLVM :: CodeGen/RISCV/attributes.ll
28.89s: Clang :: Preprocessor/aarch64-target-features.c
28.76s: Clang :: Preprocessor/arm-target-features.c
26.71s: Clang :: Driver/clang_f_opts.c
24.55s: Clang :: Driver/linux-ld.c
23.85s: Clang :: Preprocessor/predefined-arch-macros.c
23.80s: LLVM :: tools/llvm-reduce/parallel-workitem-kill.ll
23.64s: LLVM :: CodeGen/ARM/build-attributes.ll
22.91s: Clang :: Driver/cl-options.c
21.38s: Clang :: Driver/x86-target-features.c
19.56s: Clang :: CodeGen/AArch64/sve-intrinsics/acle_sve_reinterpret.c
19.09s: Clang :: Analysis/a_flaky_crash.cpp
18.73s: Clang :: Driver/debug-options.c


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants