@@ -394,31 +394,17 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
394
394
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
395
395
; GFX900-NEXT: s_endpgm
396
396
;
397
- ; PACKED-SDAG-LABEL: fadd_v2_v_lit_splat:
398
- ; PACKED-SDAG: ; %bb.0:
399
- ; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
400
- ; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
401
- ; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
402
- ; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
403
- ; PACKED-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
404
- ; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
405
- ; PACKED-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0 op_sel_hi:[1,0]
406
- ; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
407
- ; PACKED-SDAG-NEXT: s_endpgm
408
- ;
409
- ; PACKED-GISEL-LABEL: fadd_v2_v_lit_splat:
410
- ; PACKED-GISEL: ; %bb.0:
411
- ; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
412
- ; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
413
- ; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
414
- ; PACKED-GISEL-NEXT: s_mov_b32 s2, 1.0
415
- ; PACKED-GISEL-NEXT: s_mov_b32 s3, s2
416
- ; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
417
- ; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
418
- ; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
419
- ; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3]
420
- ; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
421
- ; PACKED-GISEL-NEXT: s_endpgm
397
+ ; PACKED-LABEL: fadd_v2_v_lit_splat:
398
+ ; PACKED: ; %bb.0:
399
+ ; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
400
+ ; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
401
+ ; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
402
+ ; PACKED-NEXT: s_waitcnt lgkmcnt(0)
403
+ ; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
404
+ ; PACKED-NEXT: s_waitcnt vmcnt(0)
405
+ ; PACKED-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0 op_sel_hi:[1,0]
406
+ ; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
407
+ ; PACKED-NEXT: s_endpgm
422
408
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
423
409
%gep = getelementptr inbounds <2 x float >, ptr addrspace (1 ) %a , i32 %id
424
410
%load = load <2 x float >, ptr addrspace (1 ) %gep , align 8
@@ -1171,31 +1157,17 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
1171
1157
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1172
1158
; GFX900-NEXT: s_endpgm
1173
1159
;
1174
- ; PACKED-SDAG-LABEL: fmul_v2_v_lit_splat:
1175
- ; PACKED-SDAG: ; %bb.0:
1176
- ; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1177
- ; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1178
- ; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1179
- ; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1180
- ; PACKED-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1181
- ; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
1182
- ; PACKED-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], 4.0 op_sel_hi:[1,0]
1183
- ; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1184
- ; PACKED-SDAG-NEXT: s_endpgm
1185
- ;
1186
- ; PACKED-GISEL-LABEL: fmul_v2_v_lit_splat:
1187
- ; PACKED-GISEL: ; %bb.0:
1188
- ; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1189
- ; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1190
- ; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1191
- ; PACKED-GISEL-NEXT: s_mov_b32 s2, 4.0
1192
- ; PACKED-GISEL-NEXT: s_mov_b32 s3, s2
1193
- ; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1194
- ; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1195
- ; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
1196
- ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[2:3]
1197
- ; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1198
- ; PACKED-GISEL-NEXT: s_endpgm
1160
+ ; PACKED-LABEL: fmul_v2_v_lit_splat:
1161
+ ; PACKED: ; %bb.0:
1162
+ ; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1163
+ ; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1164
+ ; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1165
+ ; PACKED-NEXT: s_waitcnt lgkmcnt(0)
1166
+ ; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1167
+ ; PACKED-NEXT: s_waitcnt vmcnt(0)
1168
+ ; PACKED-NEXT: v_pk_mul_f32 v[0:1], v[0:1], 4.0 op_sel_hi:[1,0]
1169
+ ; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1170
+ ; PACKED-NEXT: s_endpgm
1199
1171
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
1200
1172
%gep = getelementptr inbounds <2 x float >, ptr addrspace (1 ) %a , i32 %id
1201
1173
%load = load <2 x float >, ptr addrspace (1 ) %gep , align 8
@@ -1701,51 +1673,17 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
1701
1673
; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1702
1674
; GFX900-NEXT: s_endpgm
1703
1675
;
1704
- ; PACKED-SDAG-LABEL: fma_v2_v_lit_splat:
1705
- ; PACKED-SDAG: ; %bb.0:
1706
- ; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1707
- ; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1708
- ; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1709
- ; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1710
- ; PACKED-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1711
- ; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
1712
- ; PACKED-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], 4.0, 1.0 op_sel_hi:[1,0,0]
1713
- ; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1714
- ; PACKED-SDAG-NEXT: s_endpgm
1715
- ;
1716
- ; GFX90A-GISEL-LABEL: fma_v2_v_lit_splat:
1717
- ; GFX90A-GISEL: ; %bb.0:
1718
- ; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1719
- ; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1720
- ; GFX90A-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0
1721
- ; GFX90A-GISEL-NEXT: s_mov_b32 s4, 1.0
1722
- ; GFX90A-GISEL-NEXT: s_mov_b32 s2, 4.0
1723
- ; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1724
- ; GFX90A-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
1725
- ; GFX90A-GISEL-NEXT: s_mov_b32 s5, s4
1726
- ; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2
1727
- ; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
1728
- ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
1729
- ; GFX90A-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3]
1730
- ; GFX90A-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
1731
- ; GFX90A-GISEL-NEXT: s_endpgm
1732
- ;
1733
- ; GFX942-GISEL-LABEL: fma_v2_v_lit_splat:
1734
- ; GFX942-GISEL: ; %bb.0:
1735
- ; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1736
- ; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1737
- ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0
1738
- ; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0
1739
- ; GFX942-GISEL-NEXT: s_mov_b32 s2, 4.0
1740
- ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1741
- ; GFX942-GISEL-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
1742
- ; GFX942-GISEL-NEXT: s_mov_b32 s5, s4
1743
- ; GFX942-GISEL-NEXT: s_mov_b32 s3, s2
1744
- ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
1745
- ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
1746
- ; GFX942-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[2:3], v[2:3]
1747
- ; GFX942-GISEL-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
1748
- ; GFX942-GISEL-NEXT: s_endpgm
1676
+ ; PACKED-LABEL: fma_v2_v_lit_splat:
1677
+ ; PACKED: ; %bb.0:
1678
+ ; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1679
+ ; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1680
+ ; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1681
+ ; PACKED-NEXT: s_waitcnt lgkmcnt(0)
1682
+ ; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1683
+ ; PACKED-NEXT: s_waitcnt vmcnt(0)
1684
+ ; PACKED-NEXT: v_pk_fma_f32 v[0:1], v[0:1], 4.0, 1.0 op_sel_hi:[1,0,0]
1685
+ ; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1686
+ ; PACKED-NEXT: s_endpgm
1749
1687
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
1750
1688
%gep = getelementptr inbounds <2 x float >, ptr addrspace (1 ) %a , i32 %id
1751
1689
%load = load <2 x float >, ptr addrspace (1 ) %gep , align 8
0 commit comments