Skip to content

Commit 187dcd8

Browse files
authored
DAG: Preserve disjoint flag when emitting final instructions (#110795)
1 parent 3ca5d80 commit 187dcd8

File tree

4 files changed

+130
-7
lines changed

4 files changed

+130
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
11021102

11031103
if (Flags.hasNoFPExcept())
11041104
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
1105+
1106+
if (Flags.hasDisjoint())
1107+
MI->setFlag(MachineInstr::MIFlag::Disjoint);
11051108
}
11061109

11071110
// Emit all of the actual operands of this instruction, adding them to the
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -stop-after=finalize-isel -simplify-mir -o - %s | FileCheck %s
3+
4+
; Make sure disjoint flag is preserved on or instructions through selection
5+
6+
define amdgpu_ps i32 @s_or_i32_disjoint(i32 inreg %a, i32 inreg %b) {
7+
; CHECK-LABEL: name: s_or_i32_disjoint
8+
; CHECK: bb.0 (%ir-block.0):
9+
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
10+
; CHECK-NEXT: {{ $}}
11+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
12+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
13+
; CHECK-NEXT: %3:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
14+
; CHECK-NEXT: $sgpr0 = COPY %3
15+
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
16+
%result = or disjoint i32 %a, %b
17+
ret i32 %result
18+
}
19+
20+
define amdgpu_ps <2 x i32> @s_or_v2i32_disjoint(<2 x i32> inreg %a, <2 x i32> inreg %b) {
21+
; CHECK-LABEL: name: s_or_v2i32_disjoint
22+
; CHECK: bb.0 (%ir-block.0):
23+
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
26+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
27+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
28+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
29+
; CHECK-NEXT: %5:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
30+
; CHECK-NEXT: %6:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
31+
; CHECK-NEXT: $sgpr0 = COPY %5
32+
; CHECK-NEXT: $sgpr1 = COPY %6
33+
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
34+
%result = or disjoint <2 x i32> %a, %b
35+
ret <2 x i32> %result
36+
}
37+
38+
define i32 @v_or_i32_disjoint(i32 %a, i32 %b) {
39+
; CHECK-LABEL: name: v_or_i32_disjoint
40+
; CHECK: bb.0 (%ir-block.0):
41+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
44+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45+
; CHECK-NEXT: %9:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
46+
; CHECK-NEXT: $vgpr0 = COPY %9
47+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
48+
%result = or disjoint i32 %a, %b
49+
ret i32 %result
50+
}
51+
52+
define <2 x i32> @v_or_v2i32_disjoint(<2 x i32> %a, <2 x i32> %b) {
53+
; CHECK-LABEL: name: v_or_v2i32_disjoint
54+
; CHECK: bb.0 (%ir-block.0):
55+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
58+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
59+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
60+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
61+
; CHECK-NEXT: %11:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
62+
; CHECK-NEXT: %12:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
63+
; CHECK-NEXT: $vgpr0 = COPY %11
64+
; CHECK-NEXT: $vgpr1 = COPY %12
65+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
66+
%result = or disjoint <2 x i32> %a, %b
67+
ret <2 x i32> %result
68+
}
69+
70+
define amdgpu_ps i64 @s_or_i64_disjoint(i64 inreg %a, i64 inreg %b) {
71+
; CHECK-LABEL: name: s_or_i64_disjoint
72+
; CHECK: bb.0 (%ir-block.0):
73+
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
74+
; CHECK-NEXT: {{ $}}
75+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
76+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
77+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
78+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
79+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
80+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
81+
; CHECK-NEXT: %7:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc
82+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %7.sub1
83+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY %7.sub0
84+
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]]
85+
; CHECK-NEXT: $sgpr1 = COPY [[COPY4]]
86+
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
87+
%result = or disjoint i64 %a, %b
88+
ret i64 %result
89+
}
90+
91+
define i64 @v_or_i64_disjoint(i64 %a, i64 %b) {
92+
; CHECK-LABEL: name: v_or_i64_disjoint
93+
; CHECK: bb.0 (%ir-block.0):
94+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
95+
; CHECK-NEXT: {{ $}}
96+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
97+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
98+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
99+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
100+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
101+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
102+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
103+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
104+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
105+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
106+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
107+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
108+
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY5]], killed [[COPY4]], implicit $exec
109+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
110+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
111+
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY7]], killed [[COPY6]], implicit $exec
112+
; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_OR_B32_e64_1]], %subreg.sub0, killed [[V_OR_B32_e64_]], %subreg.sub1
113+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
114+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
115+
; CHECK-NEXT: $vgpr0 = COPY [[COPY9]]
116+
; CHECK-NEXT: $vgpr1 = COPY [[COPY8]]
117+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
118+
%result = or disjoint i64 %a, %b
119+
ret i64 %result
120+
}

llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
; 64BIT-DAG: STD renamable $x11, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2)
6464
; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2)
6565
; 64BIT-DAG: renamable $x9 = ADDI8 renamable $x6, 4
66-
; 64BIT-DAG: renamable $x7 = ADDI8 %fixed-stack.0, 4
66+
; 64BIT-DAG: renamable $x7 = disjoint ADDI8 %fixed-stack.0, 4
6767
; 64BIT-DAG: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8)
6868
; 64BIT-DAG: STD killed renamable $x11, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1)
6969
; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1)

llvm/test/CodeGen/SPARC/fp128-split.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ define fp128 @testcase(fp128 %0) {
1313
; CHECK-NEXT: [[COPY:%[0-9]+]]:qfpregs = COPY $q0
1414
; CHECK-NEXT: [[COPY1:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_odd64
1515
; CHECK-NEXT: [[ADDri:%[0-9]+]]:i64regs = ADDri %stack.0, 0
16-
; CHECK-NEXT: [[ORri:%[0-9]+]]:i64regs = ORri killed [[ADDri]], 8
17-
; CHECK-NEXT: STDFrr [[ORri]], $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8)
16+
; CHECK-NEXT: %3:i64regs = disjoint ORri killed [[ADDri]], 8
17+
; CHECK-NEXT: STDFrr %3, $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8)
1818
; CHECK-NEXT: [[COPY2:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_even64
1919
; CHECK-NEXT: STDFri %stack.0, 0, killed [[COPY2]] :: (store (s64) into %stack.0, align 16)
20-
; CHECK-NEXT: [[LDXrr:%[0-9]+]]:i64regs = LDXrr [[ORri]], $g0 :: (load (s64) from %stack.0 + 8)
20+
; CHECK-NEXT: [[LDXrr:%[0-9]+]]:i64regs = LDXrr %3, $g0 :: (load (s64) from %stack.0 + 8)
2121
; CHECK-NEXT: [[LDXri:%[0-9]+]]:i64regs = LDXri %stack.0, 0 :: (load (s64) from %stack.0, align 16)
2222
; CHECK-NEXT: [[ADDri1:%[0-9]+]]:i64regs = ADDri %stack.1, 0
23-
; CHECK-NEXT: [[ORri1:%[0-9]+]]:i64regs = ORri killed [[ADDri1]], 8
23+
; CHECK-NEXT: %8:i64regs = disjoint ORri killed [[ADDri1]], 8
2424
; CHECK-NEXT: [[ADDri2:%[0-9]+]]:i64regs = ADDri [[LDXrr]], -1
25-
; CHECK-NEXT: STXrr [[ORri1]], $g0, killed [[ADDri2]] :: (store (s64) into %stack.1 + 8, basealign 16)
25+
; CHECK-NEXT: STXrr %8, $g0, killed [[ADDri2]] :: (store (s64) into %stack.1 + 8, basealign 16)
2626
; CHECK-NEXT: [[COPY3:%[0-9]+]]:intregs = COPY $g0
2727
; CHECK-NEXT: [[MOVRri:%[0-9]+]]:intregs = MOVRri [[LDXrr]], 1, [[COPY3]], 49
2828
; CHECK-NEXT: [[SRLri:%[0-9]+]]:i64regs = SRLri killed [[MOVRri]], 0
@@ -31,7 +31,7 @@ define fp128 @testcase(fp128 %0) {
3131
; CHECK-NEXT: [[LDDFri:%[0-9]+]]:dfpregs = LDDFri %stack.1, 0 :: (load (s64) from %stack.1, align 16)
3232
; CHECK-NEXT: [[DEF:%[0-9]+]]:qfpregs = IMPLICIT_DEF
3333
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:qfpregs = INSERT_SUBREG [[DEF]], killed [[LDDFri]], %subreg.sub_even64
34-
; CHECK-NEXT: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr [[ORri1]], $g0 :: (load (s64) from %stack.1 + 8)
34+
; CHECK-NEXT: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr %8, $g0 :: (load (s64) from %stack.1 + 8)
3535
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:qfpregs = INSERT_SUBREG [[INSERT_SUBREG]], killed [[LDDFrr]], %subreg.sub_odd64
3636
; CHECK-NEXT: $q0 = COPY [[INSERT_SUBREG1]]
3737
; CHECK-NEXT: RETL 8, implicit $q0

0 commit comments

Comments
 (0)