Skip to content

Commit d5c9ffd

Browse files
authored
[SDAG] Intersect poison-generating flags after CSE (#97434)
This patch fixes a miscompilation when `N` gets CSEed to `Existing`: ``` Existing: t5: i32 = sub nuw Constant:i32<0>, t3 N: t30: i32 = sub Constant:i32<0>, t3 ``` Fixes #96366.
1 parent 08888d0 commit d5c9ffd

File tree

4 files changed

+56
-1
lines changed

4 files changed

+56
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,7 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
12391239
// If there was already an existing matching node, use ReplaceAllUsesWith
12401240
// to replace the dead one with the existing one. This can cause
12411241
// recursive merging of other unrelated nodes down the line.
1242+
Existing->intersectFlagsWith(N->getFlags());
12421243
ReplaceAllUsesWith(N, Existing);
12431244

12441245
// N is now dead. Inform the listeners and delete it.

llvm/test/CodeGen/AArch64/pr96366.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3+
4+
declare void @use(i32)
5+
6+
define i32 @f(i32 %x) nounwind {
7+
; CHECK-LABEL: f:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
10+
; CHECK-NEXT: mov w19, w0
11+
; CHECK-NEXT: neg w0, w0
12+
; CHECK-NEXT: bl use
13+
; CHECK-NEXT: mov w8, #4 // =0x4
14+
; CHECK-NEXT: sub w0, w8, w19
15+
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
16+
; CHECK-NEXT: ret
17+
%sub1 = sub nuw i32 0, %x
18+
call void @use(i32 %sub1)
19+
%sub2 = sub i32 1, %x
20+
%sub3 = sub i32 3, %x
21+
%mul = mul i32 %x, 1
22+
%add1 = add i32 %sub2, %mul
23+
%add2 = add i32 %add1, %sub3
24+
ret i32 %add2
25+
}

llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ define float @test2(float %arg, float %arg1) {
9090
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1120534528
9191
; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
9292
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
93-
; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_1]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
93+
; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz contract nofpexcept V_FMAC_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_1]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
9494
; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
9595
; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
9696
; CHECK-NEXT: [[V_RCP_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, 0, implicit $mode, implicit $exec

llvm/test/CodeGen/RISCV/pr96366.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
3+
4+
declare void @use(i32)
5+
6+
define i32 @f(i32 %x) nounwind {
7+
; CHECK-LABEL: f:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: addi sp, sp, -16
10+
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
11+
; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
12+
; CHECK-NEXT: mv s0, a0
13+
; CHECK-NEXT: negw a0, a0
14+
; CHECK-NEXT: call use
15+
; CHECK-NEXT: li a0, 4
16+
; CHECK-NEXT: subw a0, a0, s0
17+
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
18+
; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
19+
; CHECK-NEXT: addi sp, sp, 16
20+
; CHECK-NEXT: ret
21+
%sub1 = sub nuw i32 0, %x
22+
call void @use(i32 %sub1)
23+
%sub2 = sub i32 1, %x
24+
%sub3 = sub i32 3, %x
25+
%mul = mul i32 %x, 1
26+
%add1 = add i32 %sub2, %mul
27+
%add2 = add i32 %add1, %sub3
28+
ret i32 %add2
29+
}

0 commit comments

Comments
 (0)