Skip to content

Commit 4eec26b

Browse files
committed
[AMDGPU] Handle min/max in isNarrowingProfitable
Introduces a slight regression in some cases but it'll even out once we disable the promotion in CGP.
1 parent 780054d commit 4eec26b

File tree

3 files changed

+99
-136
lines changed

3 files changed

+99
-136
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,10 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT,
10421042
case ISD::MUL:
10431043
case ISD::SETCC:
10441044
case ISD::SELECT:
1045+
case ISD::SMIN:
1046+
case ISD::SMAX:
1047+
case ISD::UMIN:
1048+
case ISD::UMAX:
10451049
if (Subtarget->has16BitInsts() &&
10461050
(!DestVT.isVector() || !Subtarget->hasVOP3PInsts())) {
10471051
// Don't narrow back down to i16 if promoted to i32 already.

llvm/test/CodeGen/AMDGPU/min.ll

Lines changed: 84 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -649,36 +649,35 @@ define amdgpu_kernel void @s_test_imin_sle_v4i8(ptr addrspace(1) %out, [8 x i32]
649649
;
650650
; GFX9-LABEL: s_test_imin_sle_v4i8:
651651
; GFX9: ; %bb.0:
652-
; GFX9-NEXT: s_load_dword s3, s[8:9], 0x4c
653652
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x28
653+
; GFX9-NEXT: s_load_dword s3, s[8:9], 0x4c
654654
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
655655
; GFX9-NEXT: v_mov_b32_e32 v0, 0
656656
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
657-
; GFX9-NEXT: s_lshr_b32 s5, s2, 16
658-
; GFX9-NEXT: s_lshr_b32 s8, s3, 16
659-
; GFX9-NEXT: s_ashr_i32 s9, s3, 24
660-
; GFX9-NEXT: s_ashr_i32 s6, s2, 24
661-
; GFX9-NEXT: s_bfe_i32 s8, s8, 0x80000
662-
; GFX9-NEXT: v_mov_b32_e32 v1, s9
663-
; GFX9-NEXT: s_bfe_i32 s5, s5, 0x80000
657+
; GFX9-NEXT: s_sext_i32_i16 s5, s2
664658
; GFX9-NEXT: s_sext_i32_i16 s7, s3
665-
; GFX9-NEXT: v_min_i16_e32 v1, s6, v1
666-
; GFX9-NEXT: v_mov_b32_e32 v2, s8
667-
; GFX9-NEXT: s_sext_i32_i16 s4, s2
668-
; GFX9-NEXT: s_lshr_b32 s7, s7, 8
669-
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1
670-
; GFX9-NEXT: v_min_i16_e32 v2, s5, v2
671-
; GFX9-NEXT: s_lshr_b32 s4, s4, 8
672-
; GFX9-NEXT: s_bfe_i32 s3, s3, 0x80000
673-
; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
674-
; GFX9-NEXT: v_mov_b32_e32 v2, s7
675-
; GFX9-NEXT: s_bfe_i32 s2, s2, 0x80000
676-
; GFX9-NEXT: v_min_i16_e32 v2, s4, v2
677-
; GFX9-NEXT: v_mov_b32_e32 v3, s3
678-
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2
679-
; GFX9-NEXT: v_min_i16_e32 v3, s2, v3
680-
; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
681-
; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
659+
; GFX9-NEXT: s_ashr_i32 s7, s7, 8
660+
; GFX9-NEXT: s_ashr_i32 s5, s5, 8
661+
; GFX9-NEXT: s_ashr_i32 s4, s2, 24
662+
; GFX9-NEXT: s_ashr_i32 s6, s3, 24
663+
; GFX9-NEXT: s_min_i32 s5, s5, s7
664+
; GFX9-NEXT: s_sext_i32_i8 s7, s3
665+
; GFX9-NEXT: s_sext_i32_i8 s8, s2
666+
; GFX9-NEXT: s_bfe_i32 s3, s3, 0x80010
667+
; GFX9-NEXT: s_bfe_i32 s2, s2, 0x80010
668+
; GFX9-NEXT: s_min_i32 s7, s8, s7
669+
; GFX9-NEXT: s_min_i32 s4, s4, s6
670+
; GFX9-NEXT: s_min_i32 s2, s2, s3
671+
; GFX9-NEXT: s_lshl_b32 s5, s5, 8
672+
; GFX9-NEXT: s_and_b32 s7, s7, 0xff
673+
; GFX9-NEXT: s_lshl_b32 s4, s4, 8
674+
; GFX9-NEXT: s_and_b32 s2, s2, 0xff
675+
; GFX9-NEXT: s_or_b32 s5, s7, s5
676+
; GFX9-NEXT: s_or_b32 s2, s2, s4
677+
; GFX9-NEXT: s_and_b32 s5, s5, 0xffff
678+
; GFX9-NEXT: s_lshl_b32 s2, s2, 16
679+
; GFX9-NEXT: s_or_b32 s2, s5, s2
680+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
682681
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
683682
; GFX9-NEXT: s_endpgm
684683
;
@@ -688,111 +687,70 @@ define amdgpu_kernel void @s_test_imin_sle_v4i8(ptr addrspace(1) %out, [8 x i32]
688687
; GFX10-NEXT: s_load_dword s2, s[8:9], 0x28
689688
; GFX10-NEXT: s_load_dword s3, s[8:9], 0x4c
690689
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
690+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
691691
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
692-
; GFX10-NEXT: s_sext_i32_i16 s4, s2
692+
; GFX10-NEXT: s_sext_i32_i16 s5, s2
693693
; GFX10-NEXT: s_sext_i32_i16 s7, s3
694-
; GFX10-NEXT: s_ashr_i32 s6, s2, 24
695-
; GFX10-NEXT: s_ashr_i32 s9, s3, 24
696-
; GFX10-NEXT: s_lshr_b32 s4, s4, 8
697-
; GFX10-NEXT: s_lshr_b32 s7, s7, 8
698-
; GFX10-NEXT: v_min_i16 v0, s6, s9
699-
; GFX10-NEXT: v_min_i16 v1, s4, s7
700-
; GFX10-NEXT: s_lshr_b32 s5, s2, 16
701-
; GFX10-NEXT: s_lshr_b32 s8, s3, 16
702-
; GFX10-NEXT: s_bfe_i32 s2, s2, 0x80000
703-
; GFX10-NEXT: s_bfe_i32 s5, s5, 0x80000
704-
; GFX10-NEXT: s_bfe_i32 s4, s8, 0x80000
705-
; GFX10-NEXT: s_bfe_i32 s3, s3, 0x80000
706-
; GFX10-NEXT: v_min_i16 v2, s5, s4
707-
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 8, v0
708-
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1
709-
; GFX10-NEXT: v_min_i16 v3, s2, s3
710-
; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
711-
; GFX10-NEXT: v_mov_b32_e32 v2, 0
712-
; GFX10-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
713-
; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
714-
; GFX10-NEXT: global_store_dword v2, v0, s[0:1]
694+
; GFX10-NEXT: s_ashr_i32 s4, s2, 24
695+
; GFX10-NEXT: s_ashr_i32 s6, s3, 24
696+
; GFX10-NEXT: s_sext_i32_i8 s8, s3
697+
; GFX10-NEXT: s_sext_i32_i8 s9, s2
698+
; GFX10-NEXT: s_bfe_i32 s3, s3, 0x80010
699+
; GFX10-NEXT: s_bfe_i32 s2, s2, 0x80010
700+
; GFX10-NEXT: s_ashr_i32 s7, s7, 8
701+
; GFX10-NEXT: s_ashr_i32 s5, s5, 8
702+
; GFX10-NEXT: s_min_i32 s8, s9, s8
703+
; GFX10-NEXT: s_min_i32 s4, s4, s6
704+
; GFX10-NEXT: s_min_i32 s2, s2, s3
705+
; GFX10-NEXT: s_min_i32 s3, s5, s7
706+
; GFX10-NEXT: s_and_b32 s5, s8, 0xff
707+
; GFX10-NEXT: s_lshl_b32 s4, s4, 8
708+
; GFX10-NEXT: s_lshl_b32 s3, s3, 8
709+
; GFX10-NEXT: s_and_b32 s2, s2, 0xff
710+
; GFX10-NEXT: s_or_b32 s3, s5, s3
711+
; GFX10-NEXT: s_or_b32 s2, s2, s4
712+
; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
713+
; GFX10-NEXT: s_lshl_b32 s2, s2, 16
714+
; GFX10-NEXT: s_or_b32 s2, s3, s2
715+
; GFX10-NEXT: v_mov_b32_e32 v1, s2
716+
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
715717
; GFX10-NEXT: s_endpgm
716718
;
717-
; GFX11-TRUE16-LABEL: s_test_imin_sle_v4i8:
718-
; GFX11-TRUE16: ; %bb.0:
719-
; GFX11-TRUE16-NEXT: s_clause 0x1
720-
; GFX11-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x28
721-
; GFX11-TRUE16-NEXT: s_load_b32 s1, s[4:5], 0x4c
722-
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
723-
; GFX11-TRUE16-NEXT: s_sext_i32_i16 s2, s0
724-
; GFX11-TRUE16-NEXT: s_lshr_b32 s3, s0, 16
725-
; GFX11-TRUE16-NEXT: s_sext_i32_i16 s7, s1
726-
; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s1, 16
727-
; GFX11-TRUE16-NEXT: s_ashr_i32 s6, s0, 24
728-
; GFX11-TRUE16-NEXT: s_ashr_i32 s9, s1, 24
729-
; GFX11-TRUE16-NEXT: s_lshr_b32 s2, s2, 8
730-
; GFX11-TRUE16-NEXT: s_bfe_i32 s3, s3, 0x80000
731-
; GFX11-TRUE16-NEXT: s_bfe_i32 s0, s0, 0x80000
732-
; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s7, 8
733-
; GFX11-TRUE16-NEXT: s_bfe_i32 s8, s8, 0x80000
734-
; GFX11-TRUE16-NEXT: s_bfe_i32 s1, s1, 0x80000
735-
; GFX11-TRUE16-NEXT: v_min_i16 v0.l, s6, s9
736-
; GFX11-TRUE16-NEXT: v_min_i16 v1.l, s3, s8
737-
; GFX11-TRUE16-NEXT: v_min_i16 v2.l, s2, s7
738-
; GFX11-TRUE16-NEXT: v_min_i16 v3.l, s0, s1
739-
; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
740-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 8, v0
741-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
742-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 8, v2
743-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3
744-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
745-
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v1, v0
746-
; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v3, v2
747-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
748-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
749-
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
750-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
751-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
752-
; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v1, v0
753-
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
754-
; GFX11-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1]
755-
; GFX11-TRUE16-NEXT: s_endpgm
756-
;
757-
; GFX11-FAKE16-LABEL: s_test_imin_sle_v4i8:
758-
; GFX11-FAKE16: ; %bb.0:
759-
; GFX11-FAKE16-NEXT: s_clause 0x1
760-
; GFX11-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x28
761-
; GFX11-FAKE16-NEXT: s_load_b32 s1, s[4:5], 0x4c
762-
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
763-
; GFX11-FAKE16-NEXT: s_sext_i32_i16 s2, s0
764-
; GFX11-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
765-
; GFX11-FAKE16-NEXT: s_sext_i32_i16 s7, s1
766-
; GFX11-FAKE16-NEXT: s_lshr_b32 s8, s1, 16
767-
; GFX11-FAKE16-NEXT: s_ashr_i32 s6, s0, 24
768-
; GFX11-FAKE16-NEXT: s_bfe_i32 s0, s0, 0x80000
769-
; GFX11-FAKE16-NEXT: s_ashr_i32 s9, s1, 24
770-
; GFX11-FAKE16-NEXT: s_bfe_i32 s1, s1, 0x80000
771-
; GFX11-FAKE16-NEXT: s_lshr_b32 s2, s2, 8
772-
; GFX11-FAKE16-NEXT: s_bfe_i32 s3, s3, 0x80000
773-
; GFX11-FAKE16-NEXT: s_lshr_b32 s7, s7, 8
774-
; GFX11-FAKE16-NEXT: s_bfe_i32 s8, s8, 0x80000
775-
; GFX11-FAKE16-NEXT: v_min_i16 v0, s6, s9
776-
; GFX11-FAKE16-NEXT: v_min_i16 v1, s0, s1
777-
; GFX11-FAKE16-NEXT: v_min_i16 v2, s3, s8
778-
; GFX11-FAKE16-NEXT: v_min_i16 v3, s2, s7
779-
; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
780-
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 8, v0
781-
; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
782-
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
783-
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 8, v3
784-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
785-
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v2, v0
786-
; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v3
787-
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0
788-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
789-
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
790-
; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
791-
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
792-
; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v1, v0
793-
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
794-
; GFX11-FAKE16-NEXT: global_store_b32 v2, v0, s[0:1]
795-
; GFX11-FAKE16-NEXT: s_endpgm
719+
; GFX11-LABEL: s_test_imin_sle_v4i8:
720+
; GFX11: ; %bb.0:
721+
; GFX11-NEXT: s_clause 0x2
722+
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x28
723+
; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x4c
724+
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
725+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
726+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
727+
; GFX11-NEXT: s_sext_i32_i16 s5, s2
728+
; GFX11-NEXT: s_sext_i32_i16 s7, s3
729+
; GFX11-NEXT: s_ashr_i32 s4, s2, 24
730+
; GFX11-NEXT: s_ashr_i32 s6, s3, 24
731+
; GFX11-NEXT: s_sext_i32_i8 s8, s3
732+
; GFX11-NEXT: s_sext_i32_i8 s9, s2
733+
; GFX11-NEXT: s_bfe_i32 s3, s3, 0x80010
734+
; GFX11-NEXT: s_bfe_i32 s2, s2, 0x80010
735+
; GFX11-NEXT: s_ashr_i32 s7, s7, 8
736+
; GFX11-NEXT: s_ashr_i32 s5, s5, 8
737+
; GFX11-NEXT: s_min_i32 s8, s9, s8
738+
; GFX11-NEXT: s_min_i32 s4, s4, s6
739+
; GFX11-NEXT: s_min_i32 s2, s2, s3
740+
; GFX11-NEXT: s_min_i32 s3, s5, s7
741+
; GFX11-NEXT: s_and_b32 s5, s8, 0xff
742+
; GFX11-NEXT: s_lshl_b32 s4, s4, 8
743+
; GFX11-NEXT: s_lshl_b32 s3, s3, 8
744+
; GFX11-NEXT: s_and_b32 s2, s2, 0xff
745+
; GFX11-NEXT: s_or_b32 s3, s5, s3
746+
; GFX11-NEXT: s_or_b32 s2, s2, s4
747+
; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
748+
; GFX11-NEXT: s_lshl_b32 s2, s2, 16
749+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
750+
; GFX11-NEXT: s_or_b32 s2, s3, s2
751+
; GFX11-NEXT: v_mov_b32_e32 v1, s2
752+
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
753+
; GFX11-NEXT: s_endpgm
796754
%cmp = icmp sle <4 x i8> %a, %b
797755
%val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
798756
store <4 x i8> %val, ptr addrspace(1) %out

llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,11 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr addrspace(1) %out, i32 inreg
162162
; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
163163
; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0xff
164164
; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
165-
; SDAG-VI-NEXT: v_max_i16_e64 v1, s2, 0
166-
; SDAG-VI-NEXT: v_max_i16_e64 v2, s3, 0
167-
; SDAG-VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
168-
; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
165+
; SDAG-VI-NEXT: s_sext_i32_i16 s2, s2
166+
; SDAG-VI-NEXT: s_sext_i32_i16 s3, s3
167+
; SDAG-VI-NEXT: v_med3_i32 v1, s2, 0, v0
168+
; SDAG-VI-NEXT: v_med3_i32 v0, s3, 0, v0
169+
; SDAG-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
169170
; SDAG-VI-NEXT: v_or_b32_e32 v2, v1, v0
170171
; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
171172
; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
@@ -689,12 +690,12 @@ define amdgpu_kernel void @vec_smax_smin_sgpr(ptr addrspace(1) %out, <2 x i16> i
689690
; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
690691
; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0xff
691692
; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
692-
; SDAG-VI-NEXT: s_lshr_b32 s3, s2, 16
693-
; SDAG-VI-NEXT: v_max_i16_e64 v1, s2, 0
694-
; SDAG-VI-NEXT: v_max_i16_e64 v2, s3, 0
695-
; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
696-
; SDAG-VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
697-
; SDAG-VI-NEXT: v_or_b32_e32 v2, v1, v0
693+
; SDAG-VI-NEXT: s_ashr_i32 s3, s2, 16
694+
; SDAG-VI-NEXT: s_sext_i32_i16 s2, s2
695+
; SDAG-VI-NEXT: v_med3_i32 v1, s2, 0, v0
696+
; SDAG-VI-NEXT: v_med3_i32 v0, s3, 0, v0
697+
; SDAG-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
698+
; SDAG-VI-NEXT: v_or_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
698699
; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
699700
; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
700701
; SDAG-VI-NEXT: flat_store_dword v[0:1], v2

0 commit comments

Comments
 (0)