|
1 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s |
2 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s |
3 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s |
4 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s |
5 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s |
| 1 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH128K %s |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH128K %s |
| 3 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH256K %s |
| 4 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH128K %s |
| 5 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH256K %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH1024K %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH2048K %s |
6 | 8 |
|
7 |
| -; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo14: |
| 9 | +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16: |
8 | 10 | ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
9 |
| -; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ffc, [[FI]] |
10 |
| -; GCN: {{flat|global}}_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] |
11 |
| -define amdgpu_kernel void @scratch_buffer_known_high_masklo14() #0 { |
| 11 | +; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xfffc, [[FI]] |
| 12 | +; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], |
| 13 | +define amdgpu_kernel void @scratch_buffer_known_high_masklo16() { |
12 | 14 | %alloca = alloca i32, align 4, addrspace(5)
|
13 | 15 | store volatile i32 0, ptr addrspace(5) %alloca
|
14 | 16 | %toint = ptrtoint ptr addrspace(5) %alloca to i32
|
15 |
| - %masked = and i32 %toint, 16383 |
| 17 | + %masked = and i32 %toint, 65535 |
16 | 18 | store volatile i32 %masked, ptr addrspace(1) undef
|
17 | 19 | ret void
|
18 | 20 | }
|
19 | 21 |
|
20 |
| -; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16: |
| 22 | +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17: |
21 | 23 | ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
22 |
| -; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]] |
23 |
| -; GCN: {{flat|global}}_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] |
24 |
| -define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 { |
| 24 | +; SCRATCH128K-NOT: v_and_b32 |
| 25 | +; SCRATCH256K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]] |
| 26 | +; SCRATCH1024K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]] |
| 27 | +; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]] |
| 28 | +; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], |
| 29 | +define amdgpu_kernel void @scratch_buffer_known_high_masklo17() { |
25 | 30 | %alloca = alloca i32, align 4, addrspace(5)
|
26 | 31 | store volatile i32 0, ptr addrspace(5) %alloca
|
27 | 32 | %toint = ptrtoint ptr addrspace(5) %alloca to i32
|
28 |
| - %masked = and i32 %toint, 65535 |
| 33 | + %masked = and i32 %toint, 131071 |
29 | 34 | store volatile i32 %masked, ptr addrspace(1) undef
|
30 | 35 | ret void
|
31 | 36 | }
|
32 | 37 |
|
33 |
| -; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17: |
| 38 | +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo18: |
34 | 39 | ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
35 |
| -; WAVE64-NOT: [[FI]] |
36 |
| -; WAVE64: {{flat|global}}_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[FI]] |
37 |
| - |
38 |
| -; WAVE32: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1fffc, [[FI]] |
39 |
| -; WAVE32: {{flat|global}}_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] |
40 |
| -define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 { |
| 40 | +; SCRATCH128K-NOT: v_and_b32 |
| 41 | +; SCRATCH256K-NOT: v_and_b32 |
| 42 | +; SCRATCH1024K: v_and_b32_e32 v{{[0-9]+}}, 0x3fffc, [[FI]] |
| 43 | +; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0x3fffc, [[FI]] |
| 44 | +; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], |
| 45 | +define amdgpu_kernel void @scratch_buffer_known_high_masklo18() { |
41 | 46 | %alloca = alloca i32, align 4, addrspace(5)
|
42 | 47 | store volatile i32 0, ptr addrspace(5) %alloca
|
43 | 48 | %toint = ptrtoint ptr addrspace(5) %alloca to i32
|
44 |
| - %masked = and i32 %toint, 131071 |
| 49 | + %masked = and i32 %toint, 262143 |
45 | 50 | store volatile i32 %masked, ptr addrspace(1) undef
|
46 | 51 | ret void
|
47 | 52 | }
|
48 | 53 |
|
49 |
| -; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18: |
| 54 | +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo20: |
50 | 55 | ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
51 |
| -; GCN-NOT: [[FI]] |
52 |
| -; GCN: {{flat|global}}_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[FI]] |
53 |
| -define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 { |
| 56 | +; SCRATCH128K-NOT: v_and_b32 |
| 57 | +; SCRATCH256K-NOT: v_and_b32 |
| 58 | +; SCRATCH1024K-NOT: v_and_b32 |
| 59 | +; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0xffffc, [[FI]] |
| 60 | +; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], |
| 61 | +define amdgpu_kernel void @scratch_buffer_known_high_masklo20() { |
54 | 62 | %alloca = alloca i32, align 4, addrspace(5)
|
55 | 63 | store volatile i32 0, ptr addrspace(5) %alloca
|
56 | 64 | %toint = ptrtoint ptr addrspace(5) %alloca to i32
|
57 |
| - %masked = and i32 %toint, 262143 |
| 65 | + %masked = and i32 %toint, 1048575 |
58 | 66 | store volatile i32 %masked, ptr addrspace(1) undef
|
59 | 67 | ret void
|
60 | 68 | }
|
61 | 69 |
|
62 |
| -attributes #0 = { nounwind } |
| 70 | +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo21: |
| 71 | +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4 |
| 72 | +; GCN-NOT: v_and_b32 |
| 73 | +; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], |
| 74 | +define amdgpu_kernel void @scratch_buffer_known_high_masklo21() { |
| 75 | + %alloca = alloca i32, align 4, addrspace(5) |
| 76 | + store volatile i32 0, ptr addrspace(5) %alloca |
| 77 | + %toint = ptrtoint ptr addrspace(5) %alloca to i32 |
| 78 | + %masked = and i32 %toint, 2097151 |
| 79 | + store volatile i32 %masked, ptr addrspace(1) undef |
| 80 | + ret void |
| 81 | +} |
0 commit comments