@@ -480,11 +480,8 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
480
480
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
481
481
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
482
482
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
483
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
484
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
485
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
486
483
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
487
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1 , v0
484
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l , v0.h
488
485
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
489
486
;
490
487
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
@@ -604,12 +601,9 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
604
601
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
605
602
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
606
603
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
607
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
608
- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
609
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
610
604
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
611
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
612
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1 , v0
605
+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3. l, v2.l
606
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l , v0.h
613
607
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
614
608
;
615
609
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
@@ -731,13 +725,10 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
731
725
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
732
726
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v3.l
733
727
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
734
- ; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v4, s0, 0x7fff
735
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
736
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
737
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
738
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
739
- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
740
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
728
+ ; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
729
+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
730
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
731
+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v2.l
741
732
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
742
733
;
743
734
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -804,13 +795,13 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
804
795
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v5, 0x7fff
805
796
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
806
797
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
798
+ ; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v5
807
799
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
808
800
; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
809
- ; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v5
810
- ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
801
+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
811
802
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
803
+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
812
804
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6.l, v3.l
813
- ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
814
805
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
815
806
;
816
807
; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -877,12 +868,9 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
877
868
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
878
869
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
879
870
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
880
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3 )
871
+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1 )
881
872
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v4.l
882
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
883
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
884
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
885
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
873
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
886
874
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
887
875
;
888
876
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
@@ -937,8 +925,8 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
937
925
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
938
926
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
939
927
; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
940
- ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
941
928
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
929
+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
942
930
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
943
931
; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v4.l, v5.l
944
932
; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -1016,27 +1004,21 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
1016
1004
; GFX11-SDAG-TRUE16: ; %bb.0:
1017
1005
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1018
1006
; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
1019
- ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1007
+ ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
1020
1008
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
1021
1009
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
1010
+ ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
1022
1011
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
1023
1012
; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
1024
- ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1013
+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v6.l, v5.l
1025
1014
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1026
- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6 .l, v3.l
1015
+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v7 .l, v3.l
1027
1016
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
1028
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4 )
1017
+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2 )
1029
1018
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
1030
- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v5.l
1031
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1032
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
1033
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
1034
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
1035
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1036
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
1037
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
1019
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
1038
1020
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1039
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3 , v1
1021
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l , v1.h
1040
1022
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
1041
1023
;
1042
1024
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
@@ -1209,20 +1191,14 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
1209
1191
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1210
1192
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1211
1193
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1194
+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
1212
1195
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
1213
- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.l, v3.l
1214
1196
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1215
- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l , v6.l, v5.l
1197
+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h , v6.l, v5.l
1216
1198
; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v4.l
1217
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1218
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
1219
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
1220
- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1221
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
1222
- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
1223
1199
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1224
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2 , v0
1225
- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3 , v1
1200
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l , v0.h
1201
+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l , v1.h
1226
1202
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
1227
1203
;
1228
1204
; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
0 commit comments