|
| 1 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s |
| 3 | + |
| 4 | +; OBJDUMP: Contents of section .rodata: |
| 5 | +; OBJDUMP-NEXT: 0000 00000000 00000000 10010000 00000000 ................ |
| 6 | +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ................ |
| 7 | +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ................ |
| 8 | +; OBJDUMP-NOT: 0030 0000af00 94130000 1a000400 00000000 ................ |
| 9 | +; OBJDUMP-NEXT: 0030 4000af00 94130000 1a000400 00000000 @............... |
| 10 | + |
| 11 | +; ASM-LABEL: amdhsa_kernarg_preload_4_implicit_6: |
| 12 | +; ASM: .amdhsa_user_sgpr_count 10 |
| 13 | +; ASM: .amdhsa_next_free_sgpr 10 |
| 14 | +; ASM: ; NumSgprs: 16 |
| 15 | +; ASM: ; NumSGPRsForWavesPerEU: 16 |
| 16 | + |
| 17 | +; Test that we include preloaded SGPRs in the GRANULATED_WAVEFRONT_SGPR_COUNT |
| 18 | +; feild that are not explicitly referenced in the kernel. This test has 6 implicit |
| 19 | +; user SPGRs enabled, 4 preloaded kernarg SGPRs, plus 6 extra SGPRs allocated |
| 20 | +; for flat scratch, ect. The total number of allocated SGPRs encoded in the |
| 21 | +; kernel descriptor should be 16. That's a 1 in the KD field since the granule |
| 22 | +; size is 8 and it's NumGranules - 1. The encoding for that looks like '40'. |
| 23 | + |
| 24 | +define amdgpu_kernel void @amdhsa_kernarg_preload_4_implicit_6(i128 inreg) { ret void } |
| 25 | + |
| 26 | +; OBJDUMP-NEXT: 0040 00000000 00000000 20010000 00000000 ........ ....... |
| 27 | +; OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 ................ |
| 28 | +; OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 ................ |
| 29 | +; OBJDUMP-NEXT: 0070 4000af00 94000000 08000800 00000000 @............... |
| 30 | + |
| 31 | +; ASM-LABEL: amdhsa_kernarg_preload_8_implicit_2: |
| 32 | +; ASM: .amdhsa_user_sgpr_count 10 |
| 33 | +; ASM: .amdhsa_next_free_sgpr 10 |
| 34 | +; ASM: ; NumSgprs: 16 |
| 35 | +; ASM: ; NumSGPRsForWavesPerEU: 16 |
| 36 | + |
| 37 | +; Only the kernarg_ptr is enabled so we should have 8 preload kernarg SGPRs, 2 |
| 38 | +; implicit, and 6 extra. |
| 39 | + |
| 40 | +define amdgpu_kernel void @amdhsa_kernarg_preload_8_implicit_2(i256 inreg) #0 { ret void } |
| 41 | + |
| 42 | +; OBJDUMP-NEXT: 0080 00000000 00000000 08010000 00000000 ................ |
| 43 | +; OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 ................ |
| 44 | +; OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 ................ |
| 45 | +; OBJDUMP-NEXT: 00b0 4000af00 86000000 08000100 00000000 @............... |
| 46 | + |
| 47 | +; ASM-LABEL: amdhsa_kernarg_preload_1_implicit_2: |
| 48 | +; ASM: .amdhsa_user_sgpr_count 3 |
| 49 | +; ASM: .amdhsa_next_free_sgpr 3 |
| 50 | +; ASM: ; NumSgprs: 9 |
| 51 | +; ASM: ; NumSGPRsForWavesPerEU: 9 |
| 52 | + |
| 53 | +; 1 preload, 2 implicit, 6 extra. Rounds up to 16 SGPRs in the KD. |
| 54 | + |
| 55 | +define amdgpu_kernel void @amdhsa_kernarg_preload_1_implicit_2(i32 inreg) #0 { ret void } |
| 56 | + |
| 57 | +; OBJDUMP-NEXT: 00c0 00000000 00000000 08010000 00000000 ................ |
| 58 | +; OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000 ................ |
| 59 | +; OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 ................ |
| 60 | +; OBJDUMP-NEXT: 00f0 0000af00 84000000 08000000 00000000 ................ |
| 61 | + |
| 62 | +; ASM-LABEL: amdhsa_kernarg_preload_0_implicit_2: |
| 63 | +; ASM: .amdhsa_user_sgpr_count 2 |
| 64 | +; ASM: .amdhsa_next_free_sgpr 0 |
| 65 | +; ASM: ; NumSgprs: 6 |
| 66 | +; ASM: ; NumSGPRsForWavesPerEU: 6 |
| 67 | + |
| 68 | +; 0 preload kernarg SGPRs, 2 implicit, 6 extra. Rounds up to 8 SGPRs in the KD. |
| 69 | +; Encoded like '00'. |
| 70 | + |
| 71 | +define amdgpu_kernel void @amdhsa_kernarg_preload_0_implicit_2(i32) #0 { ret void } |
| 72 | + |
| 73 | +attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
0 commit comments