Skip to content

Commit d3187dd

Browse files
committed
[SLP] add minimum test for miscompile (PR55688); NFC
1 parent be2cb82 commit d3187dd

File tree

1 file changed

+40
-5
lines changed

1 file changed

+40
-5
lines changed

llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
220220
; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
221221
; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]]
222222
; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]])
223-
; ALL-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
224-
; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
223+
; ALL-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
224+
; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[OP_RDX]]
225225
; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
226226
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
227227
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
@@ -341,19 +341,19 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
341341
; ALL-NEXT: br label [[FOR_BODY:%.*]]
342342
; ALL: for.body:
343343
; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
344-
; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
344+
; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
345345
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
346346
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
347347
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
348348
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
349349
; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
350350
; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
351-
; ALL-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
351+
; ALL-NEXT: [[OP_RDX]] = fadd fast float [[TMP6]], [[SUM_042]]
352352
; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
353353
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
354354
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
355355
; ALL: for.cond.for.end_crit_edge:
356-
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32
356+
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_RDX]] to i32
357357
; ALL-NEXT: br label [[FOR_END]]
358358
; ALL: for.end:
359359
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
@@ -1463,4 +1463,39 @@ define float @fadd_v4f32_fmf_intersect(float* %p) {
14631463
ret float %add3
14641464
}
14651465

1466+
; FIXME: Can't preserve no-wrap guarantees with reassociated math.
1467+
; This must not propagate 'nsw' to a new add instruction.
1468+
1469+
define void @nsw_propagation_v4i32(i32* %res, i32 %start) {
1470+
; CHECK-LABEL: @nsw_propagation_v4i32(
1471+
; CHECK-NEXT: [[T0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
1472+
; CHECK-NEXT: [[T1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
1473+
; CHECK-NEXT: [[T2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
1474+
; CHECK-NEXT: [[T3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
1475+
; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[START:%.*]], [[T0]]
1476+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T1]], [[S]]
1477+
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[T2]], [[ADD]]
1478+
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[T3]], [[ADD_1]]
1479+
; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16
1480+
; CHECK-NEXT: ret void
1481+
;
1482+
; STORE-LABEL: @nsw_propagation_v4i32(
1483+
; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
1484+
; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
1485+
; STORE-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP2]], [[START:%.*]]
1486+
; STORE-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16
1487+
; STORE-NEXT: ret void
1488+
;
1489+
%t0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
1490+
%t1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
1491+
%t2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
1492+
%t3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
1493+
%s = add nsw i32 %start, %t0
1494+
%add = add nsw i32 %t1, %s
1495+
%add.1 = add nsw i32 %t2, %add
1496+
%add.2 = add nsw i32 %t3, %add.1
1497+
store i32 %add.2, i32* %res, align 16
1498+
ret void
1499+
}
1500+
14661501
declare i32 @__gxx_personality_v0(...)

0 commit comments

Comments
 (0)