Skip to content

Commit 805bab4

Browse files
committed
fix test case since archs allow imm for v_pk_add
1 parent 8efad09 commit 805bab4

File tree

1 file changed

+33
-95
lines changed

1 file changed

+33
-95
lines changed

llvm/test/CodeGen/AMDGPU/packed-fp32.ll

Lines changed: 33 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -394,31 +394,17 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
394394
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
395395
; GFX900-NEXT: s_endpgm
396396
;
397-
; PACKED-SDAG-LABEL: fadd_v2_v_lit_splat:
398-
; PACKED-SDAG: ; %bb.0:
399-
; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
400-
; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
401-
; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
402-
; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
403-
; PACKED-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
404-
; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
405-
; PACKED-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0 op_sel_hi:[1,0]
406-
; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
407-
; PACKED-SDAG-NEXT: s_endpgm
408-
;
409-
; PACKED-GISEL-LABEL: fadd_v2_v_lit_splat:
410-
; PACKED-GISEL: ; %bb.0:
411-
; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
412-
; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
413-
; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
414-
; PACKED-GISEL-NEXT: s_mov_b32 s2, 1.0
415-
; PACKED-GISEL-NEXT: s_mov_b32 s3, s2
416-
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
417-
; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
418-
; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
419-
; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3]
420-
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
421-
; PACKED-GISEL-NEXT: s_endpgm
397+
; PACKED-LABEL: fadd_v2_v_lit_splat:
398+
; PACKED: ; %bb.0:
399+
; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
400+
; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
401+
; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
402+
; PACKED-NEXT: s_waitcnt lgkmcnt(0)
403+
; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
404+
; PACKED-NEXT: s_waitcnt vmcnt(0)
405+
; PACKED-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0 op_sel_hi:[1,0]
406+
; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
407+
; PACKED-NEXT: s_endpgm
422408
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
423409
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
424410
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
@@ -1171,31 +1157,17 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
11711157
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
11721158
; GFX900-NEXT: s_endpgm
11731159
;
1174-
; PACKED-SDAG-LABEL: fmul_v2_v_lit_splat:
1175-
; PACKED-SDAG: ; %bb.0:
1176-
; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1177-
; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1178-
; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1179-
; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1180-
; PACKED-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1181-
; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
1182-
; PACKED-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], 4.0 op_sel_hi:[1,0]
1183-
; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1184-
; PACKED-SDAG-NEXT: s_endpgm
1185-
;
1186-
; PACKED-GISEL-LABEL: fmul_v2_v_lit_splat:
1187-
; PACKED-GISEL: ; %bb.0:
1188-
; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1189-
; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1190-
; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1191-
; PACKED-GISEL-NEXT: s_mov_b32 s2, 4.0
1192-
; PACKED-GISEL-NEXT: s_mov_b32 s3, s2
1193-
; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1194-
; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1195-
; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
1196-
; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[2:3]
1197-
; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1198-
; PACKED-GISEL-NEXT: s_endpgm
1160+
; PACKED-LABEL: fmul_v2_v_lit_splat:
1161+
; PACKED: ; %bb.0:
1162+
; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1163+
; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1164+
; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1165+
; PACKED-NEXT: s_waitcnt lgkmcnt(0)
1166+
; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1167+
; PACKED-NEXT: s_waitcnt vmcnt(0)
1168+
; PACKED-NEXT: v_pk_mul_f32 v[0:1], v[0:1], 4.0 op_sel_hi:[1,0]
1169+
; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1170+
; PACKED-NEXT: s_endpgm
11991171
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
12001172
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
12011173
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
@@ -1701,51 +1673,17 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
17011673
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
17021674
; GFX900-NEXT: s_endpgm
17031675
;
1704-
; PACKED-SDAG-LABEL: fma_v2_v_lit_splat:
1705-
; PACKED-SDAG: ; %bb.0:
1706-
; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1707-
; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1708-
; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1709-
; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1710-
; PACKED-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1711-
; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
1712-
; PACKED-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], 4.0, 1.0 op_sel_hi:[1,0,0]
1713-
; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1714-
; PACKED-SDAG-NEXT: s_endpgm
1715-
;
1716-
; GFX90A-GISEL-LABEL: fma_v2_v_lit_splat:
1717-
; GFX90A-GISEL: ; %bb.0:
1718-
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1719-
; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1720-
; GFX90A-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0
1721-
; GFX90A-GISEL-NEXT: s_mov_b32 s4, 1.0
1722-
; GFX90A-GISEL-NEXT: s_mov_b32 s2, 4.0
1723-
; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1724-
; GFX90A-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
1725-
; GFX90A-GISEL-NEXT: s_mov_b32 s5, s4
1726-
; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2
1727-
; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
1728-
; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
1729-
; GFX90A-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3]
1730-
; GFX90A-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
1731-
; GFX90A-GISEL-NEXT: s_endpgm
1732-
;
1733-
; GFX942-GISEL-LABEL: fma_v2_v_lit_splat:
1734-
; GFX942-GISEL: ; %bb.0:
1735-
; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1736-
; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1737-
; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0
1738-
; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0
1739-
; GFX942-GISEL-NEXT: s_mov_b32 s2, 4.0
1740-
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1741-
; GFX942-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
1742-
; GFX942-GISEL-NEXT: s_mov_b32 s5, s4
1743-
; GFX942-GISEL-NEXT: s_mov_b32 s3, s2
1744-
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
1745-
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
1746-
; GFX942-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3]
1747-
; GFX942-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
1748-
; GFX942-GISEL-NEXT: s_endpgm
1676+
; PACKED-LABEL: fma_v2_v_lit_splat:
1677+
; PACKED: ; %bb.0:
1678+
; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1679+
; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1680+
; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1681+
; PACKED-NEXT: s_waitcnt lgkmcnt(0)
1682+
; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1683+
; PACKED-NEXT: s_waitcnt vmcnt(0)
1684+
; PACKED-NEXT: v_pk_fma_f32 v[0:1], v[0:1], 4.0, 1.0 op_sel_hi:[1,0,0]
1685+
; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1686+
; PACKED-NEXT: s_endpgm
17491687
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
17501688
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
17511689
%load = load <2 x float>, ptr addrspace(1) %gep, align 8

0 commit comments

Comments
 (0)