@@ -587,34 +587,63 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
587
587
; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
588
588
; FLATSCR_GFX10-NEXT: s_endpgm
589
589
;
590
- ; GFX11-LABEL: vload2_private:
591
- ; GFX11: ; %bb.0: ; %entry
592
- ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
593
- ; GFX11-NEXT: v_mov_b32_e32 v2, 0
594
- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
595
- ; GFX11-NEXT: global_load_u16 v0, v2, s[0:1]
596
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
597
- ; GFX11-NEXT: scratch_store_b16 off, v0, off dlc
598
- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
599
- ; GFX11-NEXT: global_load_u16 v0, v2, s[0:1] offset:2
600
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
601
- ; GFX11-NEXT: scratch_store_b16 off, v0, off offset:2 dlc
602
- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
603
- ; GFX11-NEXT: global_load_u16 v0, v2, s[0:1] offset:4
604
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
605
- ; GFX11-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
606
- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
607
- ; GFX11-NEXT: s_clause 0x1
608
- ; GFX11-NEXT: scratch_load_u16 v0, off, off offset:2
609
- ; GFX11-NEXT: scratch_load_u16 v3, off, off
610
- ; GFX11-NEXT: s_waitcnt vmcnt(1)
611
- ; GFX11-NEXT: v_mov_b32_e32 v1, v0
612
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
613
- ; GFX11-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
614
- ; GFX11-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
615
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
616
- ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
617
- ; GFX11-NEXT: s_endpgm
590
+ ; GFX11-TRUE16-LABEL: vload2_private:
591
+ ; GFX11-TRUE16: ; %bb.0: ; %entry
592
+ ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
593
+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
594
+ ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
595
+ ; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1]
596
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
597
+ ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off dlc
598
+ ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
599
+ ; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:2
600
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
601
+ ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:2 dlc
602
+ ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
603
+ ; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:4
604
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
605
+ ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
606
+ ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
607
+ ; GFX11-TRUE16-NEXT: s_clause 0x1
608
+ ; GFX11-TRUE16-NEXT: scratch_load_u16 v3, off, off offset:2
609
+ ; GFX11-TRUE16-NEXT: scratch_load_u16 v0, off, off
610
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
611
+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v3
612
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
613
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
614
+ ; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
615
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
616
+ ; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[2:3]
617
+ ; GFX11-TRUE16-NEXT: s_endpgm
618
+ ;
619
+ ; GFX11-FAKE16-LABEL: vload2_private:
620
+ ; GFX11-FAKE16: ; %bb.0: ; %entry
621
+ ; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
622
+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0
623
+ ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
624
+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[0:1]
625
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
626
+ ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off dlc
627
+ ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
628
+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:2
629
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
630
+ ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off offset:2 dlc
631
+ ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
632
+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:4
633
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
634
+ ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
635
+ ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
636
+ ; GFX11-FAKE16-NEXT: s_clause 0x1
637
+ ; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, off offset:2
638
+ ; GFX11-FAKE16-NEXT: scratch_load_u16 v3, off, off
639
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
640
+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0
641
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
642
+ ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
643
+ ; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
644
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
645
+ ; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[2:3]
646
+ ; GFX11-FAKE16-NEXT: s_endpgm
618
647
entry:
619
648
%loc = alloca [3 x i16 ], align 2 , addrspace (5 )
620
649
%tmp = load i16 , ptr addrspace (1 ) %in , align 2
@@ -968,16 +997,27 @@ define <2 x i16> @chain_hi_to_lo_group_may_alias_store(ptr addrspace(3) %ptr, pt
968
997
; GFX10-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
969
998
; GFX10-NEXT: s_setpc_b64 s[30:31]
970
999
;
971
- ; GFX11-LABEL: chain_hi_to_lo_group_may_alias_store:
972
- ; GFX11: ; %bb.0: ; %bb
973
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974
- ; GFX11-NEXT: v_mov_b32_e32 v2, 0x7b
975
- ; GFX11-NEXT: ds_load_u16 v3, v0
976
- ; GFX11-NEXT: ds_store_b16 v1, v2
977
- ; GFX11-NEXT: ds_load_u16 v0, v0 offset:2
978
- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
979
- ; GFX11-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
980
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
1000
+ ; GFX11-TRUE16-LABEL: chain_hi_to_lo_group_may_alias_store:
1001
+ ; GFX11-TRUE16: ; %bb.0: ; %bb
1002
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003
+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7b
1004
+ ; GFX11-TRUE16-NEXT: ds_load_u16 v3, v0
1005
+ ; GFX11-TRUE16-NEXT: ds_store_b16 v1, v2
1006
+ ; GFX11-TRUE16-NEXT: ds_load_u16 v0, v0 offset:2
1007
+ ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1008
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
1009
+ ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
1010
+ ;
1011
+ ; GFX11-FAKE16-LABEL: chain_hi_to_lo_group_may_alias_store:
1012
+ ; GFX11-FAKE16: ; %bb.0: ; %bb
1013
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014
+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7b
1015
+ ; GFX11-FAKE16-NEXT: ds_load_u16 v3, v0
1016
+ ; GFX11-FAKE16-NEXT: ds_store_b16 v1, v2
1017
+ ; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:2
1018
+ ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1019
+ ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
1020
+ ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
981
1021
bb:
982
1022
%gep_lo = getelementptr inbounds i16 , ptr addrspace (3 ) %ptr , i64 1
983
1023
%load_hi = load i16 , ptr addrspace (3 ) %ptr
0 commit comments