Skip to content

Commit 9449e63

Browse files
committed
Feedback, move comments to header, add more subtarget tests
1 parent 8885b13 commit 9449e63

File tree

8 files changed

+738
-20
lines changed

8 files changed

+738
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -435,35 +435,37 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
435435
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
436436
const Function &F = MF.getFunction();
437437
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
438-
439-
auto CreateExpr = [&MF](int64_t Value) {
440-
return MCConstantExpr::create(Value, MF.getContext());
441-
};
438+
MCContext &Ctx = MF.getContext();
442439

443440
MCKernelDescriptor KernelDescriptor;
444441

445442
assert(isUInt<32>(PI.ScratchSize));
446443
assert(isUInt<32>(PI.getComputePGMRSrc1(STM)));
447444
assert(isUInt<32>(PI.getComputePGMRSrc2()));
448445

449-
KernelDescriptor.group_segment_fixed_size = CreateExpr(PI.LDSSize);
450-
KernelDescriptor.private_segment_fixed_size = CreateExpr(PI.ScratchSize);
446+
KernelDescriptor.group_segment_fixed_size =
447+
MCConstantExpr::create(PI.LDSSize, Ctx);
448+
KernelDescriptor.private_segment_fixed_size =
449+
MCConstantExpr::create(PI.ScratchSize, Ctx);
451450

452451
Align MaxKernArgAlign;
453-
KernelDescriptor.kernarg_size =
454-
CreateExpr(STM.getKernArgSegmentSize(F, MaxKernArgAlign));
452+
KernelDescriptor.kernarg_size = MCConstantExpr::create(
453+
STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
455454

456-
KernelDescriptor.compute_pgm_rsrc1 = CreateExpr(PI.getComputePGMRSrc1(STM));
457-
KernelDescriptor.compute_pgm_rsrc2 = CreateExpr(PI.getComputePGMRSrc2());
455+
KernelDescriptor.compute_pgm_rsrc1 =
456+
MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx);
457+
KernelDescriptor.compute_pgm_rsrc2 =
458+
MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx);
458459
KernelDescriptor.kernel_code_properties =
459-
CreateExpr(getAmdhsaKernelCodeProperties(MF));
460+
MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx);
460461

461462
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
462-
KernelDescriptor.compute_pgm_rsrc3 = CreateExpr(
463-
STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0);
463+
KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create(
464+
STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx);
464465

465-
KernelDescriptor.kernarg_preload = CreateExpr(
466-
AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0);
466+
KernelDescriptor.kernarg_preload = MCConstantExpr::create(
467+
AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
468+
Ctx);
467469

468470
return KernelDescriptor;
469471
}

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
using namespace llvm;
1414
using namespace llvm::AMDGPU;
1515

16-
// MCExpr for:
17-
// Dst = Dst & ~Mask
18-
// Dst = Dst | (Value << Shift)
1916
void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value,
2017
uint32_t Shift, uint32_t Mask,
2118
MCContext &Ctx) {
@@ -26,8 +23,6 @@ void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value,
2623
Ctx);
2724
}
2825

29-
// MCExpr for:
30-
// return (Src & Mask) >> Shift
3126
const MCExpr *MCKernelDescriptor::bits_get(const MCExpr *Src, uint32_t Shift,
3227
uint32_t Mask, MCContext &Ctx) {
3328
auto Sft = MCConstantExpr::create(Shift, Ctx);

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,14 @@ struct MCKernelDescriptor {
3333
const MCExpr *kernel_code_properties = nullptr;
3434
const MCExpr *kernarg_preload = nullptr;
3535

36+
// MCExpr for:
37+
// Dst = Dst & ~Mask
38+
// Dst = Dst | (Value << Shift)
3639
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift,
3740
uint32_t Mask, MCContext &Ctx);
41+
42+
// MCExpr for:
43+
// return (Src & Mask) >> Shift
3844
static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift,
3945
uint32_t Mask, MCContext &Ctx);
4046
};

llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s

Lines changed: 190 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s

Lines changed: 186 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=ASM %s
2+
// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %s > %t
3+
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
4+
5+
// When going from asm -> asm, the expressions should remain the same (i.e., symbolic).
6+
// When going from asm -> obj, the expressions should get resolved (through fixups),
7+
8+
// OBJDUMP: Contents of section .rodata
9+
// expr_defined_later
10+
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
11+
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
12+
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
13+
// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000
14+
// expr_defined
15+
// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
16+
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
17+
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
18+
// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000
19+
20+
.text
21+
// ASM: .text
22+
23+
.amdhsa_code_object_version 4
24+
// ASM: .amdhsa_code_object_version 4
25+
26+
.p2align 8
27+
.type expr_defined_later,@function
28+
expr_defined_later:
29+
s_endpgm
30+
31+
.p2align 8
32+
.type expr_defined,@function
33+
expr_defined:
34+
s_endpgm
35+
36+
.rodata
37+
// ASM: .rodata
38+
39+
.p2align 6
40+
.amdhsa_kernel expr_defined_later
41+
.amdhsa_group_segment_fixed_size defined_value+2
42+
.amdhsa_private_segment_fixed_size defined_value+3
43+
.amdhsa_system_vgpr_workitem_id defined_2_bits
44+
.amdhsa_float_round_mode_32 defined_2_bits
45+
.amdhsa_float_round_mode_16_64 defined_2_bits
46+
.amdhsa_float_denorm_mode_32 defined_2_bits
47+
.amdhsa_float_denorm_mode_16_64 defined_2_bits
48+
.amdhsa_system_sgpr_workgroup_id_x defined_boolean
49+
.amdhsa_system_sgpr_workgroup_id_y defined_boolean
50+
.amdhsa_system_sgpr_workgroup_id_z defined_boolean
51+
.amdhsa_system_sgpr_workgroup_info defined_boolean
52+
.amdhsa_exception_fp_ieee_invalid_op defined_boolean
53+
.amdhsa_exception_fp_denorm_src defined_boolean
54+
.amdhsa_exception_fp_ieee_div_zero defined_boolean
55+
.amdhsa_exception_fp_ieee_overflow defined_boolean
56+
.amdhsa_exception_fp_ieee_underflow defined_boolean
57+
.amdhsa_exception_fp_ieee_inexact defined_boolean
58+
.amdhsa_exception_int_div_zero defined_boolean
59+
.amdhsa_uses_dynamic_stack defined_boolean
60+
.amdhsa_next_free_vgpr 0
61+
.amdhsa_next_free_sgpr 0
62+
.end_amdhsa_kernel
63+
64+
.set defined_value, 41
65+
.set defined_2_bits, 3
66+
.set defined_boolean, 1
67+
68+
.p2align 6
69+
.amdhsa_kernel expr_defined
70+
.amdhsa_group_segment_fixed_size defined_value+1
71+
.amdhsa_private_segment_fixed_size defined_value+2
72+
.amdhsa_system_vgpr_workitem_id defined_2_bits
73+
.amdhsa_float_round_mode_32 defined_2_bits
74+
.amdhsa_float_round_mode_16_64 defined_2_bits
75+
.amdhsa_float_denorm_mode_32 defined_2_bits
76+
.amdhsa_float_denorm_mode_16_64 defined_2_bits
77+
.amdhsa_system_sgpr_workgroup_id_x defined_boolean
78+
.amdhsa_system_sgpr_workgroup_id_y defined_boolean
79+
.amdhsa_system_sgpr_workgroup_id_z defined_boolean
80+
.amdhsa_system_sgpr_workgroup_info defined_boolean
81+
.amdhsa_exception_fp_ieee_invalid_op defined_boolean
82+
.amdhsa_exception_fp_denorm_src defined_boolean
83+
.amdhsa_exception_fp_ieee_div_zero defined_boolean
84+
.amdhsa_exception_fp_ieee_overflow defined_boolean
85+
.amdhsa_exception_fp_ieee_underflow defined_boolean
86+
.amdhsa_exception_fp_ieee_inexact defined_boolean
87+
.amdhsa_exception_int_div_zero defined_boolean
88+
.amdhsa_uses_dynamic_stack defined_boolean
89+
.amdhsa_next_free_vgpr 0
90+
.amdhsa_next_free_sgpr 0
91+
.end_amdhsa_kernel
92+
93+
// ASM: .amdhsa_kernel expr_defined_later
94+
// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
95+
// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
96+
// ASM-NEXT: .amdhsa_kernarg_size 0
97+
// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
98+
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
99+
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
100+
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
101+
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
102+
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
103+
// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
104+
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
105+
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
106+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
107+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
108+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9
109+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10
110+
// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11
111+
// ASM-NEXT: .amdhsa_next_free_vgpr 0
112+
// ASM-NEXT: .amdhsa_next_free_sgpr 0
113+
// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
114+
// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
115+
// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16
116+
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18
117+
// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21
118+
// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23
119+
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24
120+
// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25
121+
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26
122+
// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27
123+
// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28
124+
// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29
125+
// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30
126+
// ASM-NEXT: .end_amdhsa_kernel
127+
128+
// ASM: .set defined_value, 41
129+
// ASM-NEXT: .no_dead_strip defined_value
130+
// ASM-NEXT: .set defined_2_bits, 3
131+
// ASM-NEXT: .no_dead_strip defined_2_bits
132+
// ASM-NEXT: .set defined_boolean, 1
133+
// ASM-NEXT: .no_dead_strip defined_boolean
134+
135+
// ASM: .amdhsa_kernel expr_defined
136+
// ASM-NEXT: .amdhsa_group_segment_fixed_size 42
137+
// ASM-NEXT: .amdhsa_private_segment_fixed_size 43
138+
// ASM-NEXT: .amdhsa_kernarg_size 0
139+
// ASM-NEXT: .amdhsa_user_sgpr_count 0
140+
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
141+
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
142+
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
143+
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
144+
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
145+
// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
146+
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
147+
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
148+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
149+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
150+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
151+
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
152+
// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3
153+
// ASM-NEXT: .amdhsa_next_free_vgpr 0
154+
// ASM-NEXT: .amdhsa_next_free_sgpr 0
155+
// ASM-NEXT: .amdhsa_float_round_mode_32 3
156+
// ASM-NEXT: .amdhsa_float_round_mode_16_64 3
157+
// ASM-NEXT: .amdhsa_float_denorm_mode_32 3
158+
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3
159+
// ASM-NEXT: .amdhsa_dx10_clamp 1
160+
// ASM-NEXT: .amdhsa_ieee_mode 1
161+
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
162+
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
163+
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
164+
// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
165+
// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
166+
// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
167+
// ASM-NEXT: .amdhsa_exception_int_div_zero 1
168+
// ASM-NEXT: .end_amdhsa_kernel

0 commit comments

Comments
 (0)