16
16
;
17
17
; GCN-LABEL: {{^}}ps_main:
18
18
19
- ; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2
20
- ; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0
19
+ ; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2
20
+ ; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0
21
+ ; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
21
22
22
23
; GFX10-FLATSCR: s_add_u32 s0, s0, s2
23
24
; GFX10-FLATSCR: s_addc_u32 s1, s1, 0
36
37
; FLATSCR-NOT: SCRATCH_RSRC_DWORD
37
38
38
39
; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
39
- ; GFX9-FLATSCR: scratch_store_dword off, v2, [[SP]] offset:
40
- ; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
41
- ; GFX9-FLATSCR: scratch_store_dword off, v2, [[SP]] offset:
40
+ ; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
42
41
43
- ; GFX10-FLATSCR: scratch_store_dword off, v2, off offset:
44
- ; GFX10-FLATSCR: scratch_store_dword off, v2, off offset:
42
+ ; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:
45
43
46
- ; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
47
- ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
44
+ ; MUBUF-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
45
+ ; MUBUF-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
46
+ ; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0
48
47
; GCN-NOT: s_mov_b32 s0
49
48
50
49
; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]]
53
52
; MUBUF: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
54
53
; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen
55
54
; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off
56
- ; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[HI_OFF]], off
57
55
define amdgpu_ps float @ps_main (i32 %idx ) {
58
56
%v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
59
57
%v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -79,9 +77,7 @@ define amdgpu_ps float @ps_main(i32 %idx) {
79
77
; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen
80
78
81
79
; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
82
- ; GFX9-FLATSCR: scratch_store_dword off, v2, [[SP]] offset:
83
- ; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0
84
- ; GFX9-FLATSCR: scratch_store_dword off, v2, [[SP]] offset:
80
+ ; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset:
85
81
86
82
; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
87
83
; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off
0 commit comments