Skip to content

Commit 4dbaf29

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Add v_prng_b32 instruction for gfx950 (llvm#116310)
Rand num instruction for stochastic rounding. Change-Id: I0dd1f2a1fd72084273b463a53378fa44af88e78f
1 parent ae04462 commit 4dbaf29

File tree

16 files changed

+263
-2
lines changed

16 files changed

+263
-2
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,5 +520,7 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs",
520520
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
521521
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
522522

523+
TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst")
524+
523525
#undef BUILTIN
524526
#undef TARGET_BUILTIN

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
9090
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
9191
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
92-
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
92+
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
9393
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9494
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9595
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -emit-llvm \
2+
// RUN: -verify -o - %s
3+
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \
4+
// RUN: -verify -o - %s
5+
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \
6+
// RUN: -verify -o - %s
7+
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \
8+
// RUN: -verify -o - %s
9+
10+
11+
// REQUIRES: amdgpu-registered-target
12+
13+
typedef unsigned int uint;
14+
void test_prng_b32(global uint* out, uint a) {
15+
*out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}}
16+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s
3+
// REQUIRES: amdgpu-registered-target
4+
5+
typedef unsigned int uint;
6+
7+
// CHECK-LABEL: @test_prng_b32(
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
10+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
11+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
12+
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
13+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
14+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
15+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
16+
// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
17+
// CHECK-NEXT: ret void
18+
//
19+
void test_prng_b32(global uint* out, uint a) {
20+
*out = __builtin_amdgcn_prng_b32(a);
21+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
560560
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
561561
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
562562

563+
def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
564+
[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]
565+
>, ClangBuiltin<"__builtin_amdgcn_prng_b32">;
566+
563567
} // TargetPrefix = "amdgcn"
564568

565569
// New-style image intrinsics

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,24 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
972972
"Has restricted SOffset (immediate not supported)."
973973
>;
974974

975+
def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
976+
"HasRequiredExportPriority",
977+
"true",
978+
"Export priority must be explicitly manipulated on GFX11.5"
979+
>;
980+
981+
def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
982+
"HasVmemWriteVgprInOrder",
983+
"true",
984+
"VMEM instructions of the same type write VGPR results in order"
985+
>;
986+
987+
def FeaturePrngInst : SubtargetFeature<"prng-inst",
988+
"HasPrngInst",
989+
"true",
990+
"Has v_prng_b32 instruction"
991+
>;
992+
975993
//===------------------------------------------------------------===//
976994
// Subtarget Features (options and debugging)
977995
//===------------------------------------------------------------===//
@@ -1488,6 +1506,7 @@ def FeatureISAVersion9_5_Common : FeatureSet<
14881506
FeatureFP8ConversionInsts,
14891507
FeatureCvtFP8VOP1Bug,
14901508
FeatureGFX950Insts,
1509+
FeaturePrngInst
14911510
])>;
14921511

14931512
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2339,6 +2358,9 @@ def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts(
23392358
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
23402359
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
23412360

2361+
def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
2362+
AssemblerPredicate<(all_of FeaturePrngInst)>;
2363+
23422364
def HasGDS : Predicate<"Subtarget->hasGDS()">;
23432365

23442366
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
12761276

12771277
break;
12781278
}
1279+
case Intrinsic::amdgcn_prng_b32: {
1280+
auto *Src = II.getArgOperand(0);
1281+
if (isa<UndefValue>(Src)) {
1282+
return IC.replaceInstUsesWith(II, Src);
1283+
}
1284+
}
12791285
}
12801286
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
12811287
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4492,6 +4492,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
44924492
case Intrinsic::amdgcn_cvt_pk_u8_f32:
44934493
case Intrinsic::amdgcn_alignbyte:
44944494
case Intrinsic::amdgcn_perm:
4495+
case Intrinsic::amdgcn_prng_b32:
44954496
case Intrinsic::amdgcn_fdot2:
44964497
case Intrinsic::amdgcn_sdot2:
44974498
case Intrinsic::amdgcn_udot2:

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
221221
bool HasVGPRSingleUseHintInsts = false;
222222
bool HasPseudoScalarTrans = false;
223223
bool HasRestrictedSOffset = false;
224-
224+
bool HasPrngInst = false;
225225
bool HasVcmpxPermlaneHazard = false;
226226
bool HasVMEMtoScalarWriteHazard = false;
227227
bool HasSMEMtoVectorWriteHazard = false;
@@ -1318,6 +1318,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13181318
/// instruction.
13191319
unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
13201320

1321+
bool hasPrngInst() const { return HasPrngInst; }
1322+
13211323
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
13221324
/// SGPRs
13231325
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,9 @@ let SubtargetPredicate = isGFX11Plus in {
745745
defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>;
746746
} // End SubtargetPredicate = isGFX11Plus
747747

748+
let SubtargetPredicate = HasPrngInst in
749+
defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>;
750+
748751
foreach vt = Reg32Types.types in {
749752
def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)),
750753
(vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0)))
@@ -1455,6 +1458,8 @@ defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
14551458
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
14561459
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
14571460

1461+
defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>;
1462+
14581463
class MovDPP8Pattern<Predicate Pred, Instruction Inst> : GCNPat <
14591464
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
14601465
(Inst VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))> {

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
470470
Features["gws"] = true;
471471
break;
472472
case GK_GFX950:
473+
Features["prng-inst"] = true;
473474
Features["gfx950-insts"] = true;
474475
[[fallthrough]];
475476
case GK_GFX942:
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2+
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3+
4+
declare i32 @llvm.amdgcn.prng.b32(i32) #0
5+
6+
; GCN-LABEL: {{^}}prng_b32:
7+
; GCN: v_prng_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
8+
define amdgpu_kernel void @prng_b32(ptr addrspace(1) %out, i32 %src) #1 {
9+
%prng = call i32 @llvm.amdgcn.prng.b32(i32 %src) #0
10+
store i32 %prng, ptr addrspace(1) %out, align 4
11+
ret void
12+
}
13+
14+
; GCN-LABEL: {{^}}prng_b32_constant_4
15+
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 4
16+
define amdgpu_kernel void @prng_b32_constant_4(ptr addrspace(1) %out) #1 {
17+
%prng = call i32 @llvm.amdgcn.prng.b32(i32 4) #0
18+
store i32 %prng, ptr addrspace(1) %out, align 4
19+
ret void
20+
}
21+
22+
; GCN-LABEL: {{^}}prng_b32_constant_100
23+
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 0x64
24+
define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
25+
%prng = call i32 @llvm.amdgcn.prng.b32(i32 100) #0
26+
store i32 %prng, ptr addrspace(1) %out, align 4
27+
ret void
28+
}
29+
30+
31+
attributes #0 = { nounwind readnone }
32+
attributes #1 = { nounwind }

llvm/test/MC/AMDGPU/gfx950_asm_vop1.s

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s
2+
3+
v_prng_b32 v5, v1
4+
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
5+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
6+
7+
v_prng_b32 v5, v255
8+
// GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e]
9+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
10+
11+
v_prng_b32 v5, s1
12+
// GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e]
13+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
14+
15+
v_prng_b32 v5, s101
16+
// GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e]
17+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
18+
19+
v_prng_b32 v5, vcc_lo
20+
// GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e]
21+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
22+
23+
v_prng_b32 v5, vcc_hi
24+
// GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e]
25+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
26+
27+
v_prng_b32 v5, ttmp15
28+
// GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e]
29+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
30+
31+
v_prng_b32 v5, m0
32+
// GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e]
33+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
34+
35+
v_prng_b32 v5, exec_lo
36+
// GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e]
37+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
38+
39+
v_prng_b32 v5, exec_hi
40+
// GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e]
41+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
42+
43+
v_prng_b32 v5, -1
44+
// GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e]
45+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
46+
47+
v_prng_b32 v5, 0.5
48+
// GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e]
49+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
50+
51+
v_prng_b32 v5, src_scc
52+
// GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e]
53+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
54+
55+
v_prng_b32 v255, 0xaf123456
56+
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
57+
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefixes=GFX950 %s
2+
3+
v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
4+
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff]
5+
6+
v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
7+
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff]
8+
9+
v_prng_b32 v5, v1 row_mirror
10+
// GFX950: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff]
11+
12+
v_prng_b32 v5, v1 row_half_mirror
13+
// GFX950: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff]
14+
15+
v_prng_b32 v5, v1 row_shl:1
16+
// GFX950: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff]
17+
18+
v_prng_b32 v5, v1 row_shl:15
19+
// GFX950: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff]
20+
21+
v_prng_b32 v5, v1 row_shr:1
22+
// GFX950: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff]
23+
24+
v_prng_b32 v5, v1 row_shr:15
25+
// GFX950: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff]
26+
27+
v_prng_b32 v5, v1 row_ror:1
28+
// GFX950: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff]
29+
30+
v_prng_b32 v5, v1 row_ror:15
31+
// GFX950: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff]
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX950 %s
2+
3+
# GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
4+
0x01,0xb1,0x0a,0x7e
5+
6+
# GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e]
7+
0xff,0xb1,0x0a,0x7e
8+
9+
# GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e]
10+
0x01,0xb0,0x0a,0x7e
11+
12+
# GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e]
13+
0x65,0xb0,0x0a,0x7e
14+
15+
# GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e]
16+
0x6a,0xb0,0x0a,0x7e
17+
18+
# GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e]
19+
0x6b,0xb0,0x0a,0x7e
20+
21+
# GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e]
22+
0x7b,0xb0,0x0a,0x7e
23+
24+
# GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e]
25+
0x7c,0xb0,0x0a,0x7e
26+
27+
# GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e]
28+
0x7e,0xb0,0x0a,0x7e
29+
30+
# GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e]
31+
0x7f,0xb0,0x0a,0x7e
32+
33+
# GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e]
34+
0xc1,0xb0,0x0a,0x7e
35+
36+
# GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e]
37+
0xf0,0xb0,0x0a,0x7e
38+
39+
# GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e]
40+
0xfd,0xb0,0x0a,0x7e
41+
42+
# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
43+
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6453,3 +6453,21 @@ define half @test_constant_fold_exp2_f16_neg_denorm() {
64536453
%val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff)
64546454
ret half %val
64556455
}
6456+
6457+
; --------------------------------------------------------------------
6458+
; llvm.amdgcn.prng
6459+
; --------------------------------------------------------------------
6460+
declare i32 @llvm.amdgcn.prng.b32(i32)
6461+
define i32 @prng_undef_i32() {
6462+
; CHECK-LABEL: @prng_undef_i32(
6463+
; CHECK-NEXT: ret i32 undef
6464+
%prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
6465+
ret i32 %prng
6466+
}
6467+
6468+
define i32 @prng_poison_i32() {
6469+
; CHECK-LABEL: @prng_poison_i32(
6470+
; CHECK-NEXT: ret i32 poison
6471+
%prng = call i32 @llvm.amdgcn.prng.b32(i32 poison)
6472+
ret i32 %prng
6473+
}

0 commit comments

Comments
 (0)