Skip to content

DAG: Preserve disjoint flag when emitting final instructions #110795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,

if (Flags.hasNoFPExcept())
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);

if (Flags.hasDisjoint())
MI->setFlag(MachineInstr::MIFlag::Disjoint);
}

// Emit all of the actual operands of this instruction, adding them to the
Expand Down
120 changes: 120 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -stop-after=finalize-isel -simplify-mir -o - %s | FileCheck %s

; Make sure disjoint flag is preserved on or instructions through selection

define amdgpu_ps i32 @s_or_i32_disjoint(i32 inreg %a, i32 inreg %b) {
; CHECK-LABEL: name: s_or_i32_disjoint
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; CHECK-NEXT: %3:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc
; CHECK-NEXT: $sgpr0 = COPY %3
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
%result = or disjoint i32 %a, %b
ret i32 %result
}

define amdgpu_ps <2 x i32> @s_or_v2i32_disjoint(<2 x i32> inreg %a, <2 x i32> inreg %b) {
; CHECK-LABEL: name: s_or_v2i32_disjoint
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; CHECK-NEXT: %5:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc
; CHECK-NEXT: %6:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc
; CHECK-NEXT: $sgpr0 = COPY %5
; CHECK-NEXT: $sgpr1 = COPY %6
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%result = or disjoint <2 x i32> %a, %b
ret <2 x i32> %result
}

define i32 @v_or_i32_disjoint(i32 %a, i32 %b) {
; CHECK-LABEL: name: v_or_i32_disjoint
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: %9:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY %9
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%result = or disjoint i32 %a, %b
ret i32 %result
}

define <2 x i32> @v_or_v2i32_disjoint(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: name: v_or_v2i32_disjoint
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: %11:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
; CHECK-NEXT: %12:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec
; CHECK-NEXT: $vgpr0 = COPY %11
; CHECK-NEXT: $vgpr1 = COPY %12
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%result = or disjoint <2 x i32> %a, %b
ret <2 x i32> %result
}

define amdgpu_ps i64 @s_or_i64_disjoint(i64 inreg %a, i64 inreg %b) {
; CHECK-LABEL: name: s_or_i64_disjoint
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; CHECK-NEXT: %7:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %7.sub1
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY %7.sub0
; CHECK-NEXT: $sgpr0 = COPY [[COPY5]]
; CHECK-NEXT: $sgpr1 = COPY [[COPY4]]
; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%result = or disjoint i64 %a, %b
ret i64 %result
}

define i64 @v_or_i64_disjoint(i64 %a, i64 %b) {
; CHECK-LABEL: name: v_or_i64_disjoint
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY5]], killed [[COPY4]], implicit $exec
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY7]], killed [[COPY6]], implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_OR_B32_e64_1]], %subreg.sub0, killed [[V_OR_B32_e64_]], %subreg.sub1
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0
; CHECK-NEXT: $vgpr0 = COPY [[COPY9]]
; CHECK-NEXT: $vgpr1 = COPY [[COPY8]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%result = or disjoint i64 %a, %b
ret i64 %result
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
; 64BIT-DAG: STD renamable $x11, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2)
; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2)
; 64BIT-DAG: renamable $x9 = ADDI8 renamable $x6, 4
; 64BIT-DAG: renamable $x7 = ADDI8 %fixed-stack.0, 4
; 64BIT-DAG: renamable $x7 = disjoint ADDI8 %fixed-stack.0, 4
; 64BIT-DAG: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8)
; 64BIT-DAG: STD killed renamable $x11, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1)
; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/SPARC/fp128-split.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ define fp128 @testcase(fp128 %0) {
; CHECK-NEXT: [[COPY:%[0-9]+]]:qfpregs = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_odd64
; CHECK-NEXT: [[ADDri:%[0-9]+]]:i64regs = ADDri %stack.0, 0
; CHECK-NEXT: [[ORri:%[0-9]+]]:i64regs = ORri killed [[ADDri]], 8
; CHECK-NEXT: STDFrr [[ORri]], $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8)
; CHECK-NEXT: %3:i64regs = disjoint ORri killed [[ADDri]], 8
; CHECK-NEXT: STDFrr %3, $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_even64
; CHECK-NEXT: STDFri %stack.0, 0, killed [[COPY2]] :: (store (s64) into %stack.0, align 16)
; CHECK-NEXT: [[LDXrr:%[0-9]+]]:i64regs = LDXrr [[ORri]], $g0 :: (load (s64) from %stack.0 + 8)
; CHECK-NEXT: [[LDXrr:%[0-9]+]]:i64regs = LDXrr %3, $g0 :: (load (s64) from %stack.0 + 8)
; CHECK-NEXT: [[LDXri:%[0-9]+]]:i64regs = LDXri %stack.0, 0 :: (load (s64) from %stack.0, align 16)
; CHECK-NEXT: [[ADDri1:%[0-9]+]]:i64regs = ADDri %stack.1, 0
; CHECK-NEXT: [[ORri1:%[0-9]+]]:i64regs = ORri killed [[ADDri1]], 8
; CHECK-NEXT: %8:i64regs = disjoint ORri killed [[ADDri1]], 8
; CHECK-NEXT: [[ADDri2:%[0-9]+]]:i64regs = ADDri [[LDXrr]], -1
; CHECK-NEXT: STXrr [[ORri1]], $g0, killed [[ADDri2]] :: (store (s64) into %stack.1 + 8, basealign 16)
; CHECK-NEXT: STXrr %8, $g0, killed [[ADDri2]] :: (store (s64) into %stack.1 + 8, basealign 16)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:intregs = COPY $g0
; CHECK-NEXT: [[MOVRri:%[0-9]+]]:intregs = MOVRri [[LDXrr]], 1, [[COPY3]], 49
; CHECK-NEXT: [[SRLri:%[0-9]+]]:i64regs = SRLri killed [[MOVRri]], 0
Expand All @@ -31,7 +31,7 @@ define fp128 @testcase(fp128 %0) {
; CHECK-NEXT: [[LDDFri:%[0-9]+]]:dfpregs = LDDFri %stack.1, 0 :: (load (s64) from %stack.1, align 16)
; CHECK-NEXT: [[DEF:%[0-9]+]]:qfpregs = IMPLICIT_DEF
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:qfpregs = INSERT_SUBREG [[DEF]], killed [[LDDFri]], %subreg.sub_even64
; CHECK-NEXT: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr [[ORri1]], $g0 :: (load (s64) from %stack.1 + 8)
; CHECK-NEXT: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr %8, $g0 :: (load (s64) from %stack.1 + 8)
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:qfpregs = INSERT_SUBREG [[INSERT_SUBREG]], killed [[LDDFrr]], %subreg.sub_odd64
; CHECK-NEXT: $q0 = COPY [[INSERT_SUBREG1]]
; CHECK-NEXT: RETL 8, implicit $q0
Expand Down
Loading