@@ -108,15 +108,12 @@ define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace
108
108
; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
109
109
; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
110
110
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
111
- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
111
+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
112
112
; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
113
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
113
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
114
114
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
115
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
115
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
116
116
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
117
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
118
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
119
- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
120
117
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
121
118
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
122
119
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -240,15 +237,12 @@ define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrs
240
237
; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
241
238
; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
242
239
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
243
- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
240
+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
244
241
; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
245
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
242
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
246
243
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
247
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
244
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
248
245
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
249
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
250
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
251
- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
252
246
; GFX11-GISEL-REAL16-NEXT: v_subrev_f16_e32 v0.l, 2.0, v0.l
253
247
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
254
248
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -486,15 +480,12 @@ define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(
486
480
; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
487
481
; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
488
482
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
489
- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
483
+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
490
484
; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
491
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
485
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
492
486
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
493
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
487
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
494
488
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
495
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
496
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
497
- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
498
489
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
499
490
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
500
491
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -620,15 +611,12 @@ define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(
620
611
; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
621
612
; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
622
613
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
623
- ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0 , 1, v0
614
+ ; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v1 , 1, v0
624
615
; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0)
625
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0 , s[0:1] glc dlc
616
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_b16 v0, v1 , s[0:1] glc dlc
626
617
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
627
- ; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0 , s[2:3] glc dlc
618
+ ; GFX11-GISEL-REAL16-NEXT: global_load_d16_hi_b16 v0, v1 , s[2:3] glc dlc
628
619
; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0)
629
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l
630
- ; GFX11-GISEL-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l
631
- ; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
632
620
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
633
621
; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h
634
622
; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
0 commit comments