Skip to content

Commit 9980507

Browse files
committed
[AMDGPU] Add intrinsics and MIs for ds_bvh_stack_* llvm#130007
1 parent 179ebc0 commit 9980507

File tree

10 files changed

+234
-12
lines changed

10 files changed

+234
-12
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2779,7 +2779,7 @@ def int_amdgcn_ds_sub_gs_reg_rtn :
27792779
[ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree],
27802780
"", [SDNPMemOperand]>;
27812781

2782-
def int_amdgcn_ds_bvh_stack_rtn :
2782+
class IntDSBVHStackRtn :
27832783
Intrinsic<
27842784
[llvm_i32_ty, llvm_i32_ty], // %vdst, %addr
27852785
[
@@ -2791,6 +2791,8 @@ def int_amdgcn_ds_bvh_stack_rtn :
27912791
[ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
27922792
>;
27932793

2794+
def int_amdgcn_ds_bvh_stack_rtn : IntDSBVHStackRtn;
2795+
27942796
def int_amdgcn_s_wait_event_export_ready :
27952797
ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
27962798
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
@@ -2866,6 +2868,32 @@ def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty
28662868
// GFX12 Intrinsics
28672869
//===----------------------------------------------------------------------===//
28682870

2871+
def int_amdgcn_ds_bvh_stack_push4_pop1_rtn : IntDSBVHStackRtn;
2872+
2873+
def int_amdgcn_ds_bvh_stack_push8_pop1_rtn :
2874+
Intrinsic<
2875+
[llvm_i32_ty, llvm_i32_ty], // %vdst, %addr
2876+
[
2877+
llvm_i32_ty, // %addr
2878+
llvm_i32_ty, // %data0
2879+
llvm_v8i32_ty, // %data1
2880+
llvm_i32_ty, // %offset
2881+
],
2882+
[ImmArg<ArgIndex<3>>, IntrWillReturn]
2883+
>;
2884+
2885+
def int_amdgcn_ds_bvh_stack_push8_pop2_rtn :
2886+
Intrinsic<
2887+
[llvm_i64_ty, llvm_i32_ty], // %vdst, %addr
2888+
[
2889+
llvm_i32_ty, // %addr
2890+
llvm_i32_ty, // %data0
2891+
llvm_v8i32_ty, // %data1
2892+
llvm_i32_ty, // %offset
2893+
],
2894+
[ImmArg<ArgIndex<3>>, IntrWillReturn]
2895+
>;
2896+
28692897
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
28702898
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
28712899
Intrinsic<[llvm_i32_ty],

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,8 +2665,20 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
26652665

26662666
// We need to handle this here because tablegen doesn't support matching
26672667
// instructions with multiple outputs.
2668-
void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2669-
unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2668+
void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N, unsigned IntrID) {
2669+
unsigned Opc;
2670+
switch (IntrID) {
2671+
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2672+
case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2673+
Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2674+
break;
2675+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2676+
Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2677+
break;
2678+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2679+
Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2680+
break;
2681+
}
26702682
SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
26712683
N->getOperand(5), N->getOperand(0)};
26722684

@@ -2830,7 +2842,10 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
28302842
return;
28312843
}
28322844
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2833-
SelectDSBvhStackIntrinsic(N);
2845+
case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2846+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2847+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2848+
SelectDSBvhStackIntrinsic(N, IntrID);
28342849
return;
28352850
case Intrinsic::amdgcn_init_whole_wave:
28362851
CurDAG->getMachineFunction()

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
267267
void SelectFMAD_FMA(SDNode *N);
268268
void SelectFP_EXTEND(SDNode *N);
269269
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
270-
void SelectDSBvhStackIntrinsic(SDNode *N);
270+
void SelectDSBvhStackIntrinsic(SDNode *N, unsigned IntrID);
271271
void SelectDS_GWS(SDNode *N, unsigned IntrID);
272272
void SelectInterpP1F16(SDNode *N);
273273
void SelectINTRINSIC_W_CHAIN(SDNode *N);

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2260,7 +2260,21 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
22602260
Register Data1 = MI.getOperand(5).getReg();
22612261
unsigned Offset = MI.getOperand(6).getImm();
22622262

2263-
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2263+
unsigned Opc;
2264+
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
2265+
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2266+
case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2267+
Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2268+
break;
2269+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2270+
Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2271+
break;
2272+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2273+
Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2274+
break;
2275+
}
2276+
2277+
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
22642278
.addDef(Dst1)
22652279
.addUse(Addr)
22662280
.addUse(Data0)
@@ -2315,6 +2329,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
23152329
}
23162330
break;
23172331
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2332+
case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2333+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2334+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
23182335
return selectDSBvhStackIntrinsic(I);
23192336
case Intrinsic::amdgcn_s_barrier_init:
23202337
case Intrinsic::amdgcn_s_barrier_signal_var:

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5291,7 +5291,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52915291
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
52925292
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
52935293
break;
5294-
case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
5294+
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
5295+
case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
5296+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
5297+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {
52955298
OpdsMapping[0] =
52965299
getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); // %vdst
52975300
OpdsMapping[1] =

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,13 @@ multiclass DS_1A2D_Off8_RET_mc<string opName,
258258
}
259259
}
260260

261-
class DS_BVH_STACK<string opName>
261+
class DS_BVH_STACK<string opName,
262+
RegisterClass rc,
263+
RegisterClass src>
262264
: DS_Pseudo<opName,
263-
(outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
264-
(ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, Offset:$offset),
265+
(outs getLdStRegisterOperand<rc>.ret:$vdst, VGPR_32:$addr),
266+
(ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0,
267+
src:$data1, Offset:$offset),
265268
" $vdst, $addr, $data0, $data1$offset"> {
266269
let Constraints = "$addr = $addr_in";
267270
let DisableEncoding = "$addr_in";
@@ -722,7 +725,8 @@ def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
722725
let SubtargetPredicate = isGFX11Plus in {
723726

724727
let OtherPredicates = [HasImageInsts] in
725-
def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
728+
def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32",
729+
VGPR_32, VReg_128> ;
726730

727731
} // let SubtargetPredicate = isGFX11Plus
728732

@@ -732,6 +736,13 @@ def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
732736

733737
let SubtargetPredicate = isGFX12Plus in {
734738

739+
let OtherPredicates = [HasImageInsts] in {
740+
def DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_BVH_STACK<
741+
"ds_bvh_stack_push8_pop1_rtn_b32", VGPR_32, VReg_256>;
742+
def DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_BVH_STACK<
743+
"ds_bvh_stack_push8_pop2_rtn_b64", VReg_64, VReg_256>;
744+
} // End OtherPredicates = [HasImageInsts].
745+
735746
defm DS_COND_SUB_U32 : DS_1A1D_NORET_mc<"ds_cond_sub_u32">;
736747
defm DS_COND_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
737748
defm DS_SUB_CLAMP_U32 : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
@@ -1268,6 +1279,11 @@ defm DS_PK_ADD_BF16 : DS_Real_gfx12<0x09b>;
12681279
defm DS_PK_ADD_RTN_BF16 : DS_Real_gfx12<0x0ab>;
12691280
defm DS_BPERMUTE_FI_B32 : DS_Real_gfx12<0x0cd>;
12701281

1282+
defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12<0x0e0,
1283+
"ds_bvh_stack_push4_pop1_rtn_b32", true>;
1284+
defm DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_Real_gfx12<0x0e1>;
1285+
defm DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_Real_gfx12<0x0e2>;
1286+
12711287
// New aliases added in GFX12 without renaming the instructions.
12721288
let AssemblerPredicate = isGFX12Plus in {
12731289
def : AMDGPUMnemonicAlias<"ds_subrev_u32", "ds_rsub_u32">;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
14511451
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
14521452
return true;
14531453
}
1454-
case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
1454+
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
1455+
case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
1456+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
1457+
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn: {
14551458
Info.opc = ISD::INTRINSIC_W_CHAIN;
14561459

14571460
const GCNTargetMachine &TM =
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
3+
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
4+
5+
declare { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push4.pop1.rtn(i32, i32, <4 x i32>, i32 immarg)
6+
declare { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop1.rtn(i32, i32, <8 x i32>, i32 immarg)
7+
declare { i64, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop2.rtn(i32, i32, <8 x i32>, i32 immarg)
8+
declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1)
9+
10+
define amdgpu_gs void @test_ds_bvh_stack_push4_pop1(i32 %addr, i32 %data0, <4 x i32> %data1) {
11+
; CHECK-LABEL: test_ds_bvh_stack_push4_pop1:
12+
; CHECK: ; %bb.0:
13+
; CHECK-NEXT: ds_bvh_stack_push4_pop1_rtn_b32 v1, v0, v1, v[2:5]
14+
; CHECK-NEXT: s_wait_dscnt 0x0
15+
; CHECK-NEXT: export prim v1, off, off, off done
16+
; CHECK-NEXT: s_endpgm
17+
%pair = call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push4.pop1.rtn(i32 %addr, i32 %data0, <4 x i32> %data1, i32 0)
18+
%vdst = extractvalue { i32, i32 } %pair, 0
19+
%newaddr = extractvalue { i32, i32 } %pair, 1
20+
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %vdst, i32 %newaddr, i32 poison, i32 poison, i1 true, i1 false)
21+
ret void
22+
}
23+
24+
define amdgpu_gs void @test_ds_bvh_stack_push4_pop1_1(i32 %addr, i32 %data0, <4 x i32> %data1) {
25+
; CHECK-LABEL: test_ds_bvh_stack_push4_pop1_1:
26+
; CHECK: ; %bb.0:
27+
; CHECK-NEXT: ds_bvh_stack_push4_pop1_rtn_b32 v1, v0, v1, v[2:5] offset:1
28+
; CHECK-NEXT: s_wait_dscnt 0x0
29+
; CHECK-NEXT: export prim v1, off, off, off done
30+
; CHECK-NEXT: s_endpgm
31+
%pair = call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push4.pop1.rtn(i32 %addr, i32 %data0, <4 x i32> %data1, i32 1)
32+
%vdst = extractvalue { i32, i32 } %pair, 0
33+
%newaddr = extractvalue { i32, i32 } %pair, 1
34+
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %vdst, i32 %newaddr, i32 poison, i32 poison, i1 true, i1 false)
35+
ret void
36+
}
37+
38+
define amdgpu_gs void @test_ds_bvh_stack_push8_pop1(i32 %addr, i32 %data0, <8 x i32> %data1) {
39+
; CHECK-LABEL: test_ds_bvh_stack_push8_pop1:
40+
; CHECK: ; %bb.0:
41+
; CHECK-NEXT: ds_bvh_stack_push8_pop1_rtn_b32 v1, v0, v1, v[2:9]
42+
; CHECK-NEXT: s_wait_dscnt 0x0
43+
; CHECK-NEXT: export prim v1, off, off, off done
44+
; CHECK-NEXT: s_endpgm
45+
%pair = call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop1.rtn(i32 %addr, i32 %data0, <8 x i32> %data1, i32 0)
46+
%vdst = extractvalue { i32, i32 } %pair, 0
47+
%newaddr = extractvalue { i32, i32 } %pair, 1
48+
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %vdst, i32 %newaddr, i32 poison, i32 poison, i1 true, i1 false)
49+
ret void
50+
}
51+
52+
define amdgpu_gs void @test_ds_bvh_stack_push8_pop1_1(i32 %addr, i32 %data0, <8 x i32> %data1) {
53+
; CHECK-LABEL: test_ds_bvh_stack_push8_pop1_1:
54+
; CHECK: ; %bb.0:
55+
; CHECK-NEXT: ds_bvh_stack_push8_pop1_rtn_b32 v1, v0, v1, v[2:9] offset:1
56+
; CHECK-NEXT: s_wait_dscnt 0x0
57+
; CHECK-NEXT: export prim v1, off, off, off done
58+
; CHECK-NEXT: s_endpgm
59+
%pair = call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop1.rtn(i32 %addr, i32 %data0, <8 x i32> %data1, i32 1)
60+
%vdst = extractvalue { i32, i32 } %pair, 0
61+
%newaddr = extractvalue { i32, i32 } %pair, 1
62+
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %vdst, i32 %newaddr, i32 poison, i32 poison, i1 true, i1 false)
63+
ret void
64+
}
65+
66+
define amdgpu_gs void @test_ds_bvh_stack_push8_pop2(i32 %addr, i32 %data0, <8 x i32> %data1, ptr addrspace(1) %out1, ptr addrspace(1) %out2) {
67+
; CHECK-LABEL: test_ds_bvh_stack_push8_pop2:
68+
; CHECK: ; %bb.0:
69+
; CHECK-NEXT: ds_bvh_stack_push8_pop2_rtn_b64 v[1:2], v0, v1, v[2:9]
70+
; CHECK-NEXT: s_wait_dscnt 0x0
71+
; CHECK-NEXT: export prim v1, off, off, off done
72+
; CHECK-NEXT: s_endpgm
73+
%pair = call { i64, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop2.rtn(i32 %addr, i32 %data0, <8 x i32> %data1, i32 0)
74+
%vdst = extractvalue { i64, i32 } %pair, 0
75+
%newaddr = extractvalue { i64, i32 } %pair, 1
76+
%vdst.v2i32 = bitcast i64 %vdst to <2 x i32>
77+
%vdst.lo = extractelement <2 x i32> %vdst.v2i32, i32 0
78+
%vdst.hi = extractelement <2 x i32> %vdst.v2i32, i32 1
79+
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %vdst.lo, i32 %vdst.hi, i32 %newaddr, i32 poison, i1 true, i1 false)
80+
ret void
81+
}
82+
83+
define amdgpu_gs void @test_ds_bvh_stack_push8_pop2_1(i32 %addr, i32 %data0, <8 x i32> %data1, ptr addrspace(1) %out1, ptr addrspace(1) %out2) {
84+
; CHECK-LABEL: test_ds_bvh_stack_push8_pop2_1:
85+
; CHECK: ; %bb.0:
86+
; CHECK-NEXT: ds_bvh_stack_push8_pop2_rtn_b64 v[1:2], v0, v1, v[2:9] offset:1
87+
; CHECK-NEXT: s_wait_dscnt 0x0
88+
; CHECK-NEXT: export prim v1, off, off, off done
89+
; CHECK-NEXT: s_endpgm
90+
%pair = call { i64, i32 } @llvm.amdgcn.ds.bvh.stack.push8.pop2.rtn(i32 %addr, i32 %data0, <8 x i32> %data1, i32 1)
91+
%vdst = extractvalue { i64, i32 } %pair, 0
92+
%newaddr = extractvalue { i64, i32 } %pair, 1
93+
%vdst.v2i32 = bitcast i64 %vdst to <2 x i32>
94+
%vdst.lo = extractelement <2 x i32> %vdst.v2i32, i32 0
95+
%vdst.hi = extractelement <2 x i32> %vdst.v2i32, i32 1
96+
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %vdst.lo, i32 %vdst.hi, i32 %newaddr, i32 poison, i1 true, i1 false)
97+
ret void
98+
}

llvm/test/MC/AMDGPU/gfx12_asm_ds.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1922,3 +1922,21 @@ ds_bpermute_fi_b32 v5, v1, v2 offset:0
19221922

19231923
ds_bpermute_fi_b32 v255, v255, v255 offset:4
19241924
// GFX12: encoding: [0x04,0x00,0x34,0xdb,0xff,0xff,0x00,0xff]
1925+
1926+
ds_bvh_stack_push4_pop1_rtn_b32 v1, v0, v1, v[2:5]
1927+
// GFX12: encoding: [0x00,0x00,0x80,0xdb,0x00,0x01,0x02,0x01]
1928+
1929+
ds_bvh_stack_push4_pop1_rtn_b32 v1, v0, v1, v[2:5] offset:1
1930+
// GFX12: encoding: [0x01,0x00,0x80,0xdb,0x00,0x01,0x02,0x01]
1931+
1932+
ds_bvh_stack_push8_pop1_rtn_b32 v1, v0, v1, v[2:9]
1933+
// GFX12: encoding: [0x00,0x00,0x84,0xdb,0x00,0x01,0x02,0x01]
1934+
1935+
ds_bvh_stack_push8_pop1_rtn_b32 v1, v0, v1, v[2:9] offset:1
1936+
// GFX12: encoding: [0x01,0x00,0x84,0xdb,0x00,0x01,0x02,0x01]
1937+
1938+
ds_bvh_stack_push8_pop2_rtn_b64 v[254:255], v253, v252, v[244:251]
1939+
// GFX12: encoding: [0x00,0x00,0x88,0xdb,0xfd,0xfc,0xf4,0xfe]
1940+
1941+
ds_bvh_stack_push8_pop2_rtn_b64 v[254:255], v253, v252, v[244:251] offset:127
1942+
// GFX12: encoding: [0x7f,0x00,0x88,0xdb,0xfd,0xfc,0xf4,0xfe]

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_ds.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3242,3 +3242,27 @@
32423242

32433243
# GFX12: ds_bpermute_fi_b32 v255, v255, v255 offset:4 ; encoding: [0x04,0x00,0x34,0xdb,0xff,0xff,0x00,0xff]
32443244
0x04,0x00,0x34,0xdb,0xff,0xff,0x00,0xff
3245+
3246+
# GFX12: ds_bvh_stack_push4_pop1_rtn_b32 v1, v0, v1, v[2:5] ; encoding: [0x00,0x00,0x80,0xdb,0x00,0x01,0x02,0x01]
3247+
0x00,0x00,0x80,0xdb,0x00,0x01,0x02,0x01
3248+
3249+
# GFX12: ds_bvh_stack_push4_pop1_rtn_b32 v1, v0, v1, v[2:5] offset:1 ; encoding: [0x01,0x00,0x80,0xdb,0x00,0x01,0x02,0x01]
3250+
0x01,0x00,0x80,0xdb,0x00,0x01,0x02,0x01
3251+
3252+
# GFX12: ds_bvh_stack_push8_pop1_rtn_b32 v1, v0, v1, v[2:9] ; encoding: [0x00,0x00,0x84,0xdb,0x00,0x01,0x02,0x01]
3253+
0x00,0x00,0x84,0xdb,0x00,0x01,0x02,0x01
3254+
3255+
# GFX12: ds_bvh_stack_push8_pop1_rtn_b32 v1, v0, v1, v[2:9] offset:1 ; encoding: [0x01,0x00,0x84,0xdb,0x00,0x01,0x02,0x01]
3256+
0x01,0x00,0x84,0xdb,0x00,0x01,0x02,0x01
3257+
3258+
# GFX12: ds_bvh_stack_push8_pop2_rtn_b64 v[254:255], v253, v252, v[244:251] ; encoding: [0x00,0x00,0x88,0xdb,0xfd,0xfc,0xf4,0xfe]
3259+
0x00,0x00,0x88,0xdb,0xfd,0xfc,0xf4,0xfe
3260+
3261+
# GFX12: ds_bvh_stack_push8_pop2_rtn_b64 v[254:255], v253, v252, v[244:251] offset:127 ; encoding: [0x7f,0x00,0x88,0xdb,0xfd,0xfc,0xf4,0xfe]
3262+
0x7f,0x00,0x88,0xdb,0xfd,0xfc,0xf4,0xfe
3263+
3264+
# GFX12: ds_bvh_stack_push8_pop2_rtn_b64 v[1:2], v3, v4, v[5:12] offset:127 ; encoding: [0x7f,0x00,0x88,0xdb,0x03,0x04,0x05,0x01]
3265+
0x7f,0x00,0x88,0xdb,0x03,0x04,0x05,0x01
3266+
3267+
# GFX12: ds_bvh_stack_push8_pop2_rtn_b64 v[1:2], v3, v4, v[5:12] ; encoding: [0x00,0x00,0x88,0xdb,0x03,0x04,0x05,0x01]
3268+
0x00,0x00,0x88,0xdb,0x03,0x04,0x05,0x01

0 commit comments

Comments
 (0)