Skip to content

Commit a25b6a1

Browse files
authored
Revert "AMDGPU: Handle demanded subvectors for readfirstlane (#128648)"
This reverts commit af755af.
1 parent f8cf007 commit a25b6a1

File tree

2 files changed

+29
-66
lines changed

2 files changed

+29
-66
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,59 +1574,35 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
15741574
const unsigned LastElt = DemandedElts.getActiveBits() - 1;
15751575
const unsigned MaskLen = LastElt - FirstElt + 1;
15761576

1577-
unsigned OldNumElts = VT->getNumElements();
1578-
if (MaskLen == OldNumElts && MaskLen != 1)
1577+
// TODO: Handle general subvector extract.
1578+
if (MaskLen != 1)
15791579
return nullptr;
15801580

15811581
Type *EltTy = VT->getElementType();
1582-
Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
1583-
1584-
// Theoretically we should support these intrinsics for any legal type. Avoid
1585-
// introducing cases that aren't direct register types like v3i16.
1586-
if (!isTypeLegal(NewVT))
1582+
if (!isTypeLegal(EltTy))
15871583
return nullptr;
15881584

15891585
Value *Src = II.getArgOperand(0);
15901586

1587+
assert(FirstElt == LastElt);
1588+
Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
1589+
15911590
// Make sure convergence tokens are preserved.
15921591
// TODO: CreateIntrinsic should allow directly copying bundles
15931592
SmallVector<OperandBundleDef, 2> OpBundles;
15941593
II.getOperandBundlesAsDefs(OpBundles);
15951594

15961595
Module *M = IC.Builder.GetInsertBlock()->getModule();
1597-
Function *Remangled =
1598-
Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
1599-
1600-
if (MaskLen == 1) {
1601-
Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
1602-
1603-
// TODO: Preserve callsite attributes?
1604-
CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
1605-
1606-
return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
1607-
NewCall, FirstElt);
1608-
}
1609-
1610-
SmallVector<int> ExtractMask(MaskLen, -1);
1611-
for (unsigned I = 0; I != MaskLen; ++I) {
1612-
if (DemandedElts[FirstElt + I])
1613-
ExtractMask[I] = FirstElt + I;
1614-
}
1615-
1616-
Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
1596+
Function *Remangled = Intrinsic::getOrInsertDeclaration(
1597+
M, II.getIntrinsicID(), {Extract->getType()});
16171598

16181599
// TODO: Preserve callsite attributes?
16191600
CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
16201601

1621-
SmallVector<int> InsertMask(OldNumElts, -1);
1622-
for (unsigned I = 0; I != MaskLen; ++I) {
1623-
if (DemandedElts[FirstElt + I])
1624-
InsertMask[FirstElt + I] = I;
1625-
}
1626-
16271602
// FIXME: If the call has a convergence bundle, we end up leaving the dead
16281603
// call behind.
1629-
return IC.Builder.CreateShuffleVector(NewCall, InsertMask);
1604+
return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), NewCall,
1605+
FirstElt);
16301606
}
16311607

16321608
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(

llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
6464
define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
6565
; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
6666
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
67-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
68-
; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
67+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
68+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
6969
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
7070
;
7171
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
7676
define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
7777
; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
7878
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
79-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
80-
; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
79+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
80+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
8181
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
8282
;
8383
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -88,8 +88,8 @@ define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
8888
define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
8989
; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
9090
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
91-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
92-
; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
91+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
92+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
9393
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
9494
;
9595
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -100,9 +100,8 @@ define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
100100
define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
101101
; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
102102
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
103-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
104-
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
105-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
103+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
104+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
106105
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
107106
;
108107
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -113,9 +112,7 @@ define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
113112
define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
114113
; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
115114
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
116-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
117-
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
118-
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
115+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
119116
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
120117
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
121118
;
@@ -261,8 +258,8 @@ define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
261258
define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
262259
; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
263260
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
264-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
265-
; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
261+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
262+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
266263
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
267264
;
268265
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -273,8 +270,8 @@ define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
273270
define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
274271
; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
275272
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
276-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
277-
; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
273+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
274+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
278275
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
279276
;
280277
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
@@ -285,9 +282,7 @@ define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
285282
define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
286283
; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
287284
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
288-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
289-
; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
290-
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
285+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
291286
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
292287
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
293288
;
@@ -326,9 +321,8 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
326321
; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
327322
; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
328323
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
329-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
330-
; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]])
331-
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
324+
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
325+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
332326
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
333327
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
334328
;
@@ -371,10 +365,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc
371365
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
372366
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
373367
; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
374-
; CHECK-NEXT: [[VEC1:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
375-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
376-
; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ]
377-
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
368+
; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
378369
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
379370
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
380371
;
@@ -413,9 +404,7 @@ define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
413404
define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
414405
; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
415406
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
416-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
417-
; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
418-
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison>
407+
; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
419408
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
420409
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
421410
;
@@ -439,9 +428,7 @@ define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
439428
define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
440429
; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
441430
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
442-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4>
443-
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]])
444-
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
431+
; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
445432
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
446433
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
447434
;

0 commit comments

Comments
 (0)