Skip to content

AMDGPU: Handle demanded subvectors for readfirstlane #128648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Feb 25, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Feb 25, 2025

@llvmbot
Copy link
Member

llvmbot commented Feb 25, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/128648.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+37-10)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll (+31-20)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 99016fdd0ff91..10deee1616a74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1549,33 +1549,60 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
   const unsigned LastElt = DemandedElts.getActiveBits() - 1;
   const unsigned MaskLen = LastElt - FirstElt + 1;
 
-  // TODO: Handle general subvector extract.
-  if (MaskLen != 1)
+  unsigned OldNumElts = VT->getNumElements();
+  if (MaskLen == OldNumElts && MaskLen != 1)
     return nullptr;
 
   Type *EltTy = VT->getElementType();
-  if (!isTypeLegal(EltTy))
+  Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
+
+  // Theoretically we should support these intrinsics for any legal type. Avoid
+  // introducing cases that aren't direct register types like v3i16.
+  if (!isTypeLegal(NewVT))
     return nullptr;
 
   Value *Src = II.getArgOperand(0);
 
-  assert(FirstElt == LastElt);
-  Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
-
   // Make sure convergence tokens are preserved.
   // TODO: CreateIntrinsic should allow directly copying bundles
   SmallVector<OperandBundleDef, 2> OpBundles;
   II.getOperandBundlesAsDefs(OpBundles);
 
   Module *M = IC.Builder.GetInsertBlock()->getModule();
-  Function *Remangled = Intrinsic::getOrInsertDeclaration(
-      M, II.getIntrinsicID(), {Extract->getType()});
+  Function *Remangled =
+      Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
+
+  if (MaskLen == 1) {
+    Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
+
+    // TODO: Preserve callsite attributes?
+    CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
+
+    Value *Result = IC.Builder.CreateInsertElement(
+        PoisonValue::get(II.getType()), NewCall, FirstElt);
+    IC.replaceInstUsesWith(II, Result);
+    IC.eraseInstFromFunction(II);
+    return Result;
+  }
+
+  SmallVector<int> ExtractMask(MaskLen, -1);
+  for (unsigned I = 0; I != MaskLen; ++I) {
+    if (DemandedElts[FirstElt + I])
+      ExtractMask[I] = FirstElt + I;
+  }
+
+  Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
 
   // TODO: Preserve callsite attributes?
   CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
 
-  Value *Result = IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
-                                                 NewCall, FirstElt);
+  SmallVector<int> InsertMask(OldNumElts, -1);
+  for (unsigned I = 0; I != MaskLen; ++I) {
+    if (DemandedElts[FirstElt + I])
+      InsertMask[FirstElt + I] = I;
+  }
+
+  Value *Result = IC.Builder.CreateShuffleVector(NewCall, InsertMask);
   IC.replaceInstUsesWith(II, Result);
   IC.eraseInstFromFunction(II);
   return Result;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index e9d3b5e963b35..056caabb6d60a 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -88,8 +88,8 @@ define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -100,8 +100,9 @@ define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -112,7 +113,9 @@ define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
 ; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
 ;
@@ -258,8 +261,8 @@ define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
 define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
 ; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
 ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
 ;
   %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -270,8 +273,8 @@ define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
 define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
 ; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
 ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
 ;
   %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -282,7 +285,9 @@ define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
 define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
 ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -321,8 +326,9 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -364,8 +370,9 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc
 ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
-; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -404,7 +411,9 @@ define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
 define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
 ; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
 ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
 ;
@@ -428,7 +437,9 @@ define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
 define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
 ; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
 ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
 ; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
 ;

@arsenm arsenm marked this pull request as ready for review February 25, 2025 07:44
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Feb 25, 2025
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane branch from bfe67bc to c80695c Compare February 28, 2025 06:03
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch from 0c05270 to ce66b73 Compare February 28, 2025 06:03
Base automatically changed from users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane to main March 5, 2025 01:35
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch from ce66b73 to 2500163 Compare March 5, 2025 01:37
@arsenm arsenm force-pushed the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch from 2500163 to 632f45d Compare March 5, 2025 10:00
@arsenm
Copy link
Contributor Author

arsenm commented Mar 7, 2025

ping

SmallVector<int> InsertMask(OldNumElts, -1);
for (unsigned I = 0; I != MaskLen; ++I) {
if (DemandedElts[FirstElt + I])
InsertMask[FirstElt + I] = I;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Can we not fold this in upper loop ?

Copy link
Contributor

@pravinjagtap pravinjagtap left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@arsenm arsenm merged commit af755af into main Mar 7, 2025
11 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/simplify-demanded-vector-elts-readfirstlane-subvectors branch March 7, 2025 10:54
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-devrel-x86-64 running on ml-opt-devrel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/14477

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-devrel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-rel-x86-64 running on ml-opt-rel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/14398

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-rel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ml-opt-dev-x86-64 running on ml-opt-dev-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/14655

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-dev-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@jplehr
Copy link
Contributor

jplehr commented Mar 7, 2025

This turned some of our bots red, e.g., https://lab.llvm.org/buildbot/#/builders/140/builds/18458

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder lld-x86_64-ubuntu-fast running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/12608

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/24990

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /build/buildbot/premerge-monolithic-linux/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
/build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder llvm-x86_64-debian-dylib running on gribozavr4 while building llvm at step 7 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/21386

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-llvm) failure: test (failure)
******************** TEST 'LLVM :: Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-x86_64-debian-dylib/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine
/b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll:373:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
              ^
<stdin>:191:55: note: scanning from here
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:191:55: note: with "TMP1" equal to "%1"
 %1 = insertelement <4 x i32> poison, i32 %src0, i64 0
                                                      ^
<stdin>:194:2: note: possible intended match here
 %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
 ^

Input file: <stdin>
Check file: /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
          186: } 
          187:  
          188: ; Function Attrs: convergent 
          189: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) #1 { 
          190:  %t = call token @llvm.experimental.convergence.entry() 
          191:  %1 = insertelement <4 x i32> poison, i32 %src0, i64 0 
next:373'0                                                           X error: no match found
next:373'1                                                             with "TMP1" equal to "%1"
          192:  %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          193:  %3 = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> %2) [ "convergencectrl"(token %t) ] 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          194:  %4 = shufflevector <3 x i32> %3, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
next:373'2      ?                                                                                                    possible intended match
          195:  %shuffle = shufflevector <4 x i32> %4, <4 x i32> poison, <2 x i32> <i32 1, i32 3> 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          196:  ret <2 x i32> %shuffle 
next:373'0     ~~~~~~~~~~~~~~~~~~~~~~~~
...

jph-13 pushed a commit to jph-13/llvm-project that referenced this pull request Mar 21, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants