Skip to content

Commit e1d6d36

Browse files
[SLP] Don't allow Div/Rem as alternate opcodes
Summary: We don't have control/verify what will be the RHS of the division, so it might happen to be zero, causing UB. Reviewers: Vasilis, RKSimon, ABataev Reviewed By: ABataev Subscribers: vporpo, ABataev, hiraditya, llvm-commits, vdmitrie Tags: #llvm Differential Revision: https://reviews.llvm.org/D72740
1 parent be96042 commit e1d6d36

File tree

2 files changed

+49
-35
lines changed

2 files changed

+49
-35
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,18 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {
377377
return S.OpValue;
378378
}
379379

380+
/// \returns true if \p Opcode is allowed as part of of the main/alternate
381+
/// instruction for SLP vectorization.
382+
///
383+
/// Example of unsupported opcode is SDIV that can potentially cause UB if the
384+
/// "shuffled out" lane would result in division by zero.
385+
static bool isValidForAlternation(unsigned Opcode) {
386+
if (Instruction::isIntDivRem(Opcode))
387+
return false;
388+
389+
return true;
390+
}
391+
380392
/// \returns analysis of the Instructions in \p VL described in
381393
/// InstructionsState, the Opcode that we suppose the whole list
382394
/// could be vectorized even if its structure is diverse.
@@ -399,7 +411,8 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
399411
if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
400412
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
401413
continue;
402-
if (Opcode == AltOpcode) {
414+
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
415+
isValidForAlternation(Opcode)) {
403416
AltOpcode = InstOpcode;
404417
AltIndex = Cnt;
405418
continue;
@@ -411,6 +424,9 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
411424
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
412425
continue;
413426
if (Opcode == AltOpcode) {
427+
assert(isValidForAlternation(Opcode) &&
428+
isValidForAlternation(InstOpcode) &&
429+
"Cast isn't safe for alternation, logic needs to be updated!");
414430
AltOpcode = InstOpcode;
415431
AltIndex = Cnt;
416432
continue;

llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,22 @@ define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
1212
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
1313
; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
1414
; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
15-
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[GEP1_0]] to <4 x i32>*
16-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
17-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i32 0
18-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
19-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A2:%.*]], i32 2
20-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[A3:%.*]], i32 3
21-
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1146, i32 146, i32 42, i32 0>
22-
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP6]]
23-
24-
;; FIXME: Last lane of TMP6 may contain zero (if %a3 is zero). In such case, the
25-
;; next instruction would cause division by zero resulting in SIGFPE during
26-
;; execution.
27-
; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP1]], [[TMP6]]
28-
29-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
30-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[GEP2_0]] to <4 x i32>*
31-
; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4
15+
; CHECK-NEXT: [[V0:%.*]] = load i32, i32* [[GEP1_0]]
16+
; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[GEP1_1]]
17+
; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[GEP1_2]]
18+
; CHECK-NEXT: [[V3:%.*]] = load i32, i32* [[GEP1_3]]
19+
; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
20+
; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
21+
; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
22+
; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0
23+
; CHECK-NEXT: [[RES0:%.*]] = add nsw i32 [[V0]], [[Y0]]
24+
; CHECK-NEXT: [[RES1:%.*]] = add nsw i32 [[V1]], [[Y1]]
25+
; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
26+
; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
27+
; CHECK-NEXT: store i32 [[RES0]], i32* [[GEP2_0]]
28+
; CHECK-NEXT: store i32 [[RES1]], i32* [[GEP2_1]]
29+
; CHECK-NEXT: store i32 [[RES2]], i32* [[GEP2_2]]
30+
; CHECK-NEXT: store i32 [[RES3]], i32* [[GEP2_3]]
3231
; CHECK-NEXT: ret void
3332
;
3433
entry:
@@ -77,23 +76,22 @@ define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
7776
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
7877
; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
7978
; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
80-
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[GEP1_0]] to <4 x i32>*
81-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
82-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i32 0
83-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1
84-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[A2:%.*]], i32 2
85-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[A3:%.*]], i32 3
86-
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 1146, i32 146, i32 42, i32 0>
87-
88-
;; FIXME: Last lane of TMP6 may contain zero (if %a3 is zero). In such case, the
89-
;; next instruction would cause division by zero resulting in SIGFPE during
90-
;; execution.
91-
; CHECK-NEXT: [[TMP7:%.*]] = urem <4 x i32> [[TMP1]], [[TMP6]]
92-
93-
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP6]]
94-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
95-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[GEP2_0]] to <4 x i32>*
96-
; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4
79+
; CHECK-NEXT: [[V0:%.*]] = load i32, i32* [[GEP1_0]]
80+
; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[GEP1_1]]
81+
; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[GEP1_2]]
82+
; CHECK-NEXT: [[V3:%.*]] = load i32, i32* [[GEP1_3]]
83+
; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
84+
; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
85+
; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
86+
; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0
87+
; CHECK-NEXT: [[RES0:%.*]] = urem i32 [[V0]], [[Y0]]
88+
; CHECK-NEXT: [[RES1:%.*]] = urem i32 [[V1]], [[Y1]]
89+
; CHECK-NEXT: [[RES2:%.*]] = urem i32 [[V2]], [[Y2]]
90+
; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
91+
; CHECK-NEXT: store i32 [[RES0]], i32* [[GEP2_0]]
92+
; CHECK-NEXT: store i32 [[RES1]], i32* [[GEP2_1]]
93+
; CHECK-NEXT: store i32 [[RES2]], i32* [[GEP2_2]]
94+
; CHECK-NEXT: store i32 [[RES3]], i32* [[GEP2_3]]
9795
; CHECK-NEXT: ret void
9896
;
9997
entry:

0 commit comments

Comments
 (0)