-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "AMDGPU: Handle demanded subvectors for readfirstlane" #130276
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "AMDGPU: Handle demanded subvectors for readfirstlane" #130276
Conversation
This reverts commit af755af.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Jan Patrick Lehr (jplehr) ChangesReverts llvm/llvm-project#128648 Broke buildbots: Full diff: https://github.com/llvm/llvm-project/pull/130276.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ef076814ffdab..ebe740f884ea6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1574,59 +1574,35 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
const unsigned LastElt = DemandedElts.getActiveBits() - 1;
const unsigned MaskLen = LastElt - FirstElt + 1;
- unsigned OldNumElts = VT->getNumElements();
- if (MaskLen == OldNumElts && MaskLen != 1)
+ // TODO: Handle general subvector extract.
+ if (MaskLen != 1)
return nullptr;
Type *EltTy = VT->getElementType();
- Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
-
- // Theoretically we should support these intrinsics for any legal type. Avoid
- // introducing cases that aren't direct register types like v3i16.
- if (!isTypeLegal(NewVT))
+ if (!isTypeLegal(EltTy))
return nullptr;
Value *Src = II.getArgOperand(0);
+ assert(FirstElt == LastElt);
+ Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
+
// Make sure convergence tokens are preserved.
// TODO: CreateIntrinsic should allow directly copying bundles
SmallVector<OperandBundleDef, 2> OpBundles;
II.getOperandBundlesAsDefs(OpBundles);
Module *M = IC.Builder.GetInsertBlock()->getModule();
- Function *Remangled =
- Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
-
- if (MaskLen == 1) {
- Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
-
- // TODO: Preserve callsite attributes?
- CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
-
- return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
- NewCall, FirstElt);
- }
-
- SmallVector<int> ExtractMask(MaskLen, -1);
- for (unsigned I = 0; I != MaskLen; ++I) {
- if (DemandedElts[FirstElt + I])
- ExtractMask[I] = FirstElt + I;
- }
-
- Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
+ Function *Remangled = Intrinsic::getOrInsertDeclaration(
+ M, II.getIntrinsicID(), {Extract->getType()});
// TODO: Preserve callsite attributes?
CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
- SmallVector<int> InsertMask(OldNumElts, -1);
- for (unsigned I = 0; I != MaskLen; ++I) {
- if (DemandedElts[FirstElt + I])
- InsertMask[FirstElt + I] = I;
- }
-
// FIXME: If the call has a convergence bundle, we end up leaving the dead
// call behind.
- return IC.Builder.CreateShuffleVector(NewCall, InsertMask);
+ return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), NewCall,
+ FirstElt);
}
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index ec645a7ff4519..e9d3b5e963b35 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -88,8 +88,8 @@ define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -100,9 +100,8 @@ define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -113,9 +112,7 @@ define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
@@ -261,8 +258,8 @@ define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -273,8 +270,8 @@ define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -285,9 +282,7 @@ define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -326,9 +321,8 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
+; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -371,10 +365,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
-; CHECK-NEXT: [[VEC1:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ]
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -413,9 +404,7 @@ define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
@@ -439,9 +428,7 @@ define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4>
-; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]])
-; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
|
Just regenerate the test checks |
How to know when to regenerate the test checks compared to something is actually broken? |
Done in 4136395 |
Depends, looking at the diff. Most of these type of breaks are just regenerate |
Thank you. |
Reverts #128648
Broke buildbots:
https://lab.llvm.org/buildbot/#/builders/10/builds/816
https://lab.llvm.org/buildbot/#/builders/140/builds/18458
and others.