Skip to content

Commit 4a026b5

Browse files
[AMDGCN] Use ZExt when handling indices in insertment element (#85718)
When i1 true is used as an index, SExt extends it to i32 -1. This would cause BitVector to overflow. The language manual have specified that the index shall be treated as an unsigned number, this patch fixes that. (https://llvm.org/docs/LangRef.html#insertelement-instruction) This patch fixes #85717 --------- Signed-off-by: Peter Rong <[email protected]>
1 parent 3eb9ff3 commit 4a026b5

File tree

2 files changed

+53
-2
lines changed

2 files changed

+53
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
17491749
// Non constant index/out of bounds index -> folding is unlikely.
17501750
// The latter is more of a sanity check because canonical IR should just
17511751
// have replaced those with poison.
1752-
if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
1752+
if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
17531753
return false;
17541754

17551755
const auto *VecSrc = IE->getOperand(0);
@@ -1761,7 +1761,7 @@ static bool isInterestingPHIIncomingValue(const Value *V) {
17611761
return false;
17621762

17631763
CurVal = VecSrc;
1764-
EltsCovered.set(Idx->getSExtValue());
1764+
EltsCovered.set(Idx->getZExtValue());
17651765

17661766
// All elements covered.
17671767
if (EltsCovered.all())

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,3 +1197,54 @@ reallyfinally:
11971197
store <5 x double> %val, ptr %out, align 1
11981198
ret void
11991199
}
1200+
1201+
define amdgpu_kernel void @pr85718(i1 %Bool, ptr %Ptr, <4 x float> %Vec1, <4 x float> %Vec2) {
1202+
; OPT-LABEL: @pr85718(
1203+
; OPT-NEXT: BB0:
1204+
; OPT-NEXT: [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
1205+
; OPT-NEXT: br label [[BB1:%.*]]
1206+
; OPT: BB1:
1207+
; OPT-NEXT: [[TMP0:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE0:%.*]], [[BB2:%.*]] ], [ [[LARGEPHI_EXTRACTSLICE1:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0:%.*]] ]
1208+
; OPT-NEXT: [[TMP1:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE3:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE4:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
1209+
; OPT-NEXT: [[TMP2:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE6:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE7:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
1210+
; OPT-NEXT: [[TMP3:%.*]] = phi float [ [[LARGEPHI_EXTRACTSLICE9:%.*]], [[BB2]] ], [ [[LARGEPHI_EXTRACTSLICE10:%.*]], [[BB1]] ], [ 0.000000e+00, [[BB0]] ]
1211+
; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
1212+
; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE0]], float [[TMP1]], i64 1
1213+
; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE1]], float [[TMP2]], i64 2
1214+
; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <4 x float> [[LARGEPHI_INSERTSLICE2]], float [[TMP3]], i64 3
1215+
; OPT-NEXT: store <4 x float> [[LARGEPHI_INSERTSLICE3]], ptr [[PTR:%.*]], align 128
1216+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1]] = extractelement <4 x float> [[VEC2:%.*]], i64 0
1217+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4]] = extractelement <4 x float> [[VEC2]], i64 1
1218+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7]] = extractelement <4 x float> [[VEC2]], i64 2
1219+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10]] = extractelement <4 x float> [[VEC2]], i64 3
1220+
; OPT-NEXT: br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
1221+
; OPT: BB2:
1222+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0]] = extractelement <4 x float> [[I]], i64 0
1223+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3]] = extractelement <4 x float> [[I]], i64 1
1224+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6]] = extractelement <4 x float> [[I]], i64 2
1225+
; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9]] = extractelement <4 x float> [[I]], i64 3
1226+
; OPT-NEXT: br label [[BB1]]
1227+
;
1228+
; NOOPT-LABEL: @pr85718(
1229+
; NOOPT-NEXT: BB0:
1230+
; NOOPT-NEXT: [[I:%.*]] = insertelement <4 x float> [[VEC1:%.*]], float 4.200000e+01, i1 true
1231+
; NOOPT-NEXT: br label [[BB1:%.*]]
1232+
; NOOPT: BB1:
1233+
; NOOPT-NEXT: [[PHI:%.*]] = phi <4 x float> [ [[I]], [[BB2:%.*]] ], [ [[VEC2:%.*]], [[BB1]] ], [ zeroinitializer, [[BB0:%.*]] ]
1234+
; NOOPT-NEXT: store <4 x float> [[PHI]], ptr [[PTR:%.*]], align 128
1235+
; NOOPT-NEXT: br i1 [[BOOL:%.*]], label [[BB1]], label [[BB2]]
1236+
; NOOPT: BB2:
1237+
; NOOPT-NEXT: br label [[BB1]]
1238+
;
1239+
BB0:
1240+
%I = insertelement <4 x float> %Vec1, float 4.200000e+01, i1 true
1241+
br label %BB1
1242+
1243+
BB1: ; preds = %BB0, %BB1, %BB2
1244+
%PHI = phi <4 x float> [ %I, %BB2 ], [ %Vec2, %BB1 ], [ zeroinitializer, %BB0 ]
1245+
store <4 x float> %PHI, ptr %Ptr, align 128
1246+
br i1 %Bool, label %BB1, label %BB2
1247+
1248+
BB2: ; preds = %BB1
1249+
br label %BB1
1250+
}

0 commit comments

Comments
 (0)