@@ -220,8 +220,8 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
220
220
; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
221
221
; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]]
222
222
; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]])
223
- ; ALL-NEXT: [[TMP9 :%.*]] = fadd fast float [[TMP8]], [[MUL49]]
224
- ; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9 ]]
223
+ ; ALL-NEXT: [[OP_RDX :%.*]] = fadd fast float [[TMP8]], [[MUL49]]
224
+ ; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[OP_RDX ]]
225
225
; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
226
226
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
227
227
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
@@ -341,19 +341,19 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
341
341
; ALL-NEXT: br label [[FOR_BODY:%.*]]
342
342
; ALL: for.body:
343
343
; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
344
- ; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA :%.*]], [[FOR_BODY]] ]
344
+ ; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX :%.*]], [[FOR_BODY]] ]
345
345
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
346
346
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
347
347
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
348
348
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
349
349
; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
350
350
; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
351
- ; ALL-NEXT: [[OP_EXTRA ]] = fadd fast float [[TMP6]], [[SUM_042]]
351
+ ; ALL-NEXT: [[OP_RDX ]] = fadd fast float [[TMP6]], [[SUM_042]]
352
352
; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
353
353
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
354
354
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
355
355
; ALL: for.cond.for.end_crit_edge:
356
- ; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA ]] to i32
356
+ ; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_RDX ]] to i32
357
357
; ALL-NEXT: br label [[FOR_END]]
358
358
; ALL: for.end:
359
359
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
@@ -1463,4 +1463,39 @@ define float @fadd_v4f32_fmf_intersect(float* %p) {
1463
1463
ret float %add3
1464
1464
}
1465
1465
1466
+ ; FIXME: Can't preserve no-wrap guarantees with reassociated math.
1467
+ ; This must not propagate 'nsw' to a new add instruction.
1468
+
1469
+ define void @nsw_propagation_v4i32 (i32* %res , i32 %start ) {
1470
+ ; CHECK-LABEL: @nsw_propagation_v4i32(
1471
+ ; CHECK-NEXT: [[T0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
1472
+ ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
1473
+ ; CHECK-NEXT: [[T2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
1474
+ ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
1475
+ ; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[START:%.*]], [[T0]]
1476
+ ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T1]], [[S]]
1477
+ ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[T2]], [[ADD]]
1478
+ ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[T3]], [[ADD_1]]
1479
+ ; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16
1480
+ ; CHECK-NEXT: ret void
1481
+ ;
1482
+ ; STORE-LABEL: @nsw_propagation_v4i32(
1483
+ ; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
1484
+ ; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
1485
+ ; STORE-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[TMP2]], [[START:%.*]]
1486
+ ; STORE-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16
1487
+ ; STORE-NEXT: ret void
1488
+ ;
1489
+ %t0 = load i32 , i32* getelementptr inbounds ([32 x i32 ], [32 x i32 ]* @arr_i32 , i64 0 , i64 0 ), align 16
1490
+ %t1 = load i32 , i32* getelementptr inbounds ([32 x i32 ], [32 x i32 ]* @arr_i32 , i64 0 , i64 1 ), align 4
1491
+ %t2 = load i32 , i32* getelementptr inbounds ([32 x i32 ], [32 x i32 ]* @arr_i32 , i64 0 , i64 2 ), align 8
1492
+ %t3 = load i32 , i32* getelementptr inbounds ([32 x i32 ], [32 x i32 ]* @arr_i32 , i64 0 , i64 3 ), align 4
1493
+ %s = add nsw i32 %start , %t0
1494
+ %add = add nsw i32 %t1 , %s
1495
+ %add.1 = add nsw i32 %t2 , %add
1496
+ %add.2 = add nsw i32 %t3 , %add.1
1497
+ store i32 %add.2 , i32* %res , align 16
1498
+ ret void
1499
+ }
1500
+
1466
1501
declare i32 @__gxx_personality_v0 (...)
0 commit comments