5
5
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
6
6
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
7
7
; A2V-NOT: SCRATCH_RSRC
8
- ; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0
8
+ ; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0 ; Reload Reuse
9
9
; A2M: buffer_store_dword v[[VSPILL]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
10
10
; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
11
- ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
11
+ ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
12
12
; A2V: ScratchSize: 0
13
13
define amdgpu_kernel void @max_24regs_32a_used (<16 x float > addrspace (1 )* %arg , float addrspace (1 )* %out ) #0 {
14
14
bb:
34
34
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
35
35
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
36
36
; A2V-NOT: SCRATCH_RSRC
37
- ; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}}
37
+ ; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}} ; Reload Reuse
38
38
; A2M: buffer_store_dword v[[VSPILL]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
39
39
; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
40
- ; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
40
+ ; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
41
41
; A2V: ScratchSize: 0
42
42
define amdgpu_kernel void @max_12regs_13a_used (i32 %cond , <4 x float > addrspace (1 )* %arg , <4 x float > addrspace (1 )* %out ) #2 {
43
43
bb:
55
55
st:
56
56
%gep1 = getelementptr <4 x float >, <4 x float > addrspace (1 )* %out , i64 16
57
57
%gep2 = getelementptr <4 x float >, <4 x float > addrspace (1 )* %out , i64 32
58
- store <4 x float > %mai.1 , <4 x float > addrspace (1 )* %gep1
59
- store <4 x float > %mai.2 , <4 x float > addrspace (1 )* %gep2
58
+ call void asm sideeffect "" , "a,a" (<4 x float > %mai.1 , <4 x float > %mai.2 )
60
59
ret void
61
60
}
62
61
65
64
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
66
65
; A2V-NOT: SCRATCH_RSRC
67
66
68
- ; A2V: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}}
69
- ; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
67
+ ; A2V: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}} ; Reload Reuse
68
+ ; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
70
69
; A2V: ScratchSize: 0
71
70
72
71
; A2M: buffer_store_dword v[[VSPILLSTORE:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
73
72
; A2M: buffer_load_dword v[[VSPILL_RELOAD:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
74
- ; A2M: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL_RELOAD]]
73
+ ; A2M: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL_RELOAD]] ; Reload Reuse
75
74
define amdgpu_kernel void @max_10_vgprs_used_9a () #1 {
76
- %v0 = load volatile i32 , i32 addrspace (3 )* undef
77
- %v1 = load volatile i32 , i32 addrspace (3 )* undef
78
- %v2 = load volatile i32 , i32 addrspace (3 )* undef
79
- %v3 = load volatile i32 , i32 addrspace (3 )* undef
80
- %v4 = load volatile i32 , i32 addrspace (3 )* undef
81
- %v5 = load volatile i32 , i32 addrspace (3 )* undef
82
- %v6 = load volatile i32 , i32 addrspace (3 )* undef
83
- %v7 = load volatile i32 , i32 addrspace (3 )* undef
84
- call void asm sideeffect "" , "a,a,a,a,~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6}" (i32 %v0 , i32 %v1 , i32 %v2 , i32 %v3 )
85
- %v8 = load volatile i32 , i32 addrspace (3 )* undef
86
- call void asm sideeffect "" , "a,a,a,a,a" (i32 %v4 , i32 %v5 , i32 %v6 , i32 %v7 , i32 %v8 )
75
+ %a1 = call <4 x i32 > asm sideeffect "" , "=a" ()
76
+ %a2 = call <4 x i32 > asm sideeffect "" , "=a" ()
77
+ %a3 = call i32 asm sideeffect "" , "=a" ()
78
+ %a4 = call <2 x i32 > asm sideeffect "" , "=a" ()
79
+ call void asm sideeffect "" , "a,a,a" (<4 x i32 > %a1 , <4 x i32 > %a2 , i32 %a3 )
80
+ call void asm sideeffect "" , "a" (<2 x i32 > %a4 )
87
81
ret void
88
82
}
89
83
90
84
; GCN-LABEL: {{^}}max_32regs_mfma32:
91
85
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
92
86
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
93
87
; A2V-NOT: SCRATCH_RSRC
94
- ; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0
88
+ ; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0 ; Reload Reuse
95
89
; A2M: buffer_store_dword v[[VSPILL]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
96
90
; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
97
- ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
91
+ ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
98
92
; A2V: ScratchSize: 0
99
93
define amdgpu_kernel void @max_32regs_mfma32 (float addrspace (1 )* %arg ) #3 {
100
94
bb:
@@ -115,6 +109,6 @@ declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i3
115
109
declare <32 x float > @llvm.amdgcn.mfma.f32.32x32x1f32 (float , float , <32 x float >, i32 , i32 , i32 )
116
110
117
111
attributes #0 = { nounwind "amdgpu-num-vgpr" ="24" }
118
- attributes #1 = { nounwind "amdgpu-num-vgpr" ="8 " }
112
+ attributes #1 = { nounwind "amdgpu-num-vgpr" ="10 " }
119
113
attributes #2 = { nounwind "amdgpu-num-vgpr" ="12" }
120
114
attributes #3 = { nounwind "amdgpu-num-vgpr" ="32" }
0 commit comments