-
Notifications
You must be signed in to change notification settings - Fork 14.3k
DAG: Preserve disjoint flag when emitting final instructions #110795
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAG: Preserve disjoint flag when emitting final instructions #110795
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/110795.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 12a48ab06f1c08..281d1578d0173a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1102,6 +1102,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasNoFPExcept())
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ if (Flags.hasDisjoint())
+ MI->setFlag(MachineInstr::MIFlag::Disjoint);
}
// Emit all of the actual operands of this instruction, adding them to the
diff --git a/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll b/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
new file mode 100644
index 00000000000000..96ed74867a279f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -stop-after=finalize-isel -simplify-mir -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -stop-after=finalize-isel -simplify-mir -o - %s | FileCheck %s
+
+; Make sure disjoint flag is preserved on or instructions through selection
+
+define amdgpu_ps i32 @s_or_i32_disjoint(i32 inreg %a, i32 inreg %b) {
+ ; CHECK-LABEL: name: s_or_i32_disjoint
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; CHECK-NEXT: %3:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr0 = COPY %3
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
+ %result = or disjoint i32 %a, %b
+ ret i32 %result
+}
+
+define amdgpu_ps <2 x i32> @s_or_v2i32_disjoint(<2 x i32> inreg %a, <2 x i32> inreg %b) {
+ ; CHECK-LABEL: name: s_or_v2i32_disjoint
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; CHECK-NEXT: %5:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
+ ; CHECK-NEXT: %6:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr0 = COPY %5
+ ; CHECK-NEXT: $sgpr1 = COPY %6
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ %result = or disjoint <2 x i32> %a, %b
+ ret <2 x i32> %result
+}
+
+define i32 @v_or_i32_disjoint(i32 %a, i32 %b) {
+ ; CHECK-LABEL: name: v_or_i32_disjoint
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %9:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY %9
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %result = or disjoint i32 %a, %b
+ ret i32 %result
+}
+
+define <2 x i32> @v_or_v2i32_disjoint(<2 x i32> %a, <2 x i32> %b) {
+ ; CHECK-LABEL: name: v_or_v2i32_disjoint
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: %11:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: %12:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY %11
+ ; CHECK-NEXT: $vgpr1 = COPY %12
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %result = or disjoint <2 x i32> %a, %b
+ ret <2 x i32> %result
+}
diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll
index ccf89aac2d5408..5ffa852d3d57fc 100644
--- a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll
@@ -63,7 +63,7 @@
; 64BIT-DAG: STD renamable $x11, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2)
; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2)
; 64BIT-DAG: renamable $x9 = ADDI8 renamable $x6, 4
-; 64BIT-DAG: renamable $x7 = ADDI8 %fixed-stack.0, 4
+; 64BIT-DAG: renamable $x7 = disjoint ADDI8 %fixed-stack.0, 4
; 64BIT-DAG: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8)
; 64BIT-DAG: STD killed renamable $x11, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1)
; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1)
diff --git a/llvm/test/CodeGen/SPARC/fp128-split.ll b/llvm/test/CodeGen/SPARC/fp128-split.ll
index 8a127c9c28cc71..ef3e9deb98d4d7 100644
--- a/llvm/test/CodeGen/SPARC/fp128-split.ll
+++ b/llvm/test/CodeGen/SPARC/fp128-split.ll
@@ -13,16 +13,16 @@ define fp128 @testcase(fp128 %0) {
; CHECK-NEXT: [[COPY:%[0-9]+]]:qfpregs = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_odd64
; CHECK-NEXT: [[ADDri:%[0-9]+]]:i64regs = ADDri %stack.0, 0
- ; CHECK-NEXT: [[ORri:%[0-9]+]]:i64regs = ORri killed [[ADDri]], 8
- ; CHECK-NEXT: STDFrr [[ORri]], $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8)
+ ; CHECK-NEXT: %3:i64regs = disjoint ORri killed [[ADDri]], 8
+ ; CHECK-NEXT: STDFrr %3, $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_even64
; CHECK-NEXT: STDFri %stack.0, 0, killed [[COPY2]] :: (store (s64) into %stack.0, align 16)
- ; CHECK-NEXT: [[LDXrr:%[0-9]+]]:i64regs = LDXrr [[ORri]], $g0 :: (load (s64) from %stack.0 + 8)
+ ; CHECK-NEXT: [[LDXrr:%[0-9]+]]:i64regs = LDXrr %3, $g0 :: (load (s64) from %stack.0 + 8)
; CHECK-NEXT: [[LDXri:%[0-9]+]]:i64regs = LDXri %stack.0, 0 :: (load (s64) from %stack.0, align 16)
; CHECK-NEXT: [[ADDri1:%[0-9]+]]:i64regs = ADDri %stack.1, 0
- ; CHECK-NEXT: [[ORri1:%[0-9]+]]:i64regs = ORri killed [[ADDri1]], 8
+ ; CHECK-NEXT: %8:i64regs = disjoint ORri killed [[ADDri1]], 8
; CHECK-NEXT: [[ADDri2:%[0-9]+]]:i64regs = ADDri [[LDXrr]], -1
- ; CHECK-NEXT: STXrr [[ORri1]], $g0, killed [[ADDri2]] :: (store (s64) into %stack.1 + 8, basealign 16)
+ ; CHECK-NEXT: STXrr %8, $g0, killed [[ADDri2]] :: (store (s64) into %stack.1 + 8, basealign 16)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:intregs = COPY $g0
; CHECK-NEXT: [[MOVRri:%[0-9]+]]:intregs = MOVRri [[LDXrr]], 1, [[COPY3]], 49
; CHECK-NEXT: [[SRLri:%[0-9]+]]:i64regs = SRLri killed [[MOVRri]], 0
@@ -31,7 +31,7 @@ define fp128 @testcase(fp128 %0) {
; CHECK-NEXT: [[LDDFri:%[0-9]+]]:dfpregs = LDDFri %stack.1, 0 :: (load (s64) from %stack.1, align 16)
; CHECK-NEXT: [[DEF:%[0-9]+]]:qfpregs = IMPLICIT_DEF
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:qfpregs = INSERT_SUBREG [[DEF]], killed [[LDDFri]], %subreg.sub_even64
- ; CHECK-NEXT: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr [[ORri1]], $g0 :: (load (s64) from %stack.1 + 8)
+ ; CHECK-NEXT: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr %8, $g0 :: (load (s64) from %stack.1 + 8)
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:qfpregs = INSERT_SUBREG [[INSERT_SUBREG]], killed [[LDDFrr]], %subreg.sub_odd64
; CHECK-NEXT: $q0 = COPY [[INSERT_SUBREG1]]
; CHECK-NEXT: RETL 8, implicit $q0
|
43f06e4
to
4093b22
Compare
4093b22
to
75f0a3e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
No description provided.