@@ -818,6 +818,71 @@ define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 {
818
818
ret <2 x i16 > %r
819
819
}
820
820
821
+ define <2 x i16 > @test_trunc_2xi32_muliple_use0 (<2 x i32 > %a , ptr %p ) #0 {
822
+ ; I16x2-LABEL: test_trunc_2xi32_muliple_use0(
823
+ ; I16x2: {
824
+ ; I16x2-NEXT: .reg .b32 %r<7>;
825
+ ; I16x2-NEXT: .reg .b64 %rd<2>;
826
+ ; I16x2-EMPTY:
827
+ ; I16x2-NEXT: // %bb.0:
828
+ ; I16x2-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use0_param_0];
829
+ ; I16x2-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use0_param_1];
830
+ ; I16x2-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
831
+ ; I16x2-NEXT: mov.b32 %r5, 65537;
832
+ ; I16x2-NEXT: add.s16x2 %r6, %r3, %r5;
833
+ ; I16x2-NEXT: st.u32 [%rd1], %r6;
834
+ ; I16x2-NEXT: st.param.b32 [func_retval0], %r3;
835
+ ; I16x2-NEXT: ret;
836
+ ;
837
+ ; NO-I16x2-LABEL: test_trunc_2xi32_muliple_use0(
838
+ ; NO-I16x2: {
839
+ ; NO-I16x2-NEXT: .reg .b16 %rs<5>;
840
+ ; NO-I16x2-NEXT: .reg .b32 %r<5>;
841
+ ; NO-I16x2-NEXT: .reg .b64 %rd<2>;
842
+ ; NO-I16x2-EMPTY:
843
+ ; NO-I16x2-NEXT: // %bb.0:
844
+ ; NO-I16x2-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use0_param_0];
845
+ ; NO-I16x2-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use0_param_1];
846
+ ; NO-I16x2-NEXT: cvt.u16.u32 %rs1, %r2;
847
+ ; NO-I16x2-NEXT: cvt.u16.u32 %rs2, %r1;
848
+ ; NO-I16x2-NEXT: mov.b32 %r3, {%rs2, %rs1};
849
+ ; NO-I16x2-NEXT: add.s16 %rs3, %rs1, 1;
850
+ ; NO-I16x2-NEXT: add.s16 %rs4, %rs2, 1;
851
+ ; NO-I16x2-NEXT: mov.b32 %r4, {%rs4, %rs3};
852
+ ; NO-I16x2-NEXT: st.u32 [%rd1], %r4;
853
+ ; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r3;
854
+ ; NO-I16x2-NEXT: ret;
855
+ %r = trunc <2 x i32 > %a to <2 x i16 >
856
+ ; Reuse the truncate - optimizing to PRMT when we don't have i16x2 vectors
857
+ ; would increase register pressure
858
+ %s = add <2 x i16 > %r , splat (i16 1 )
859
+ store <2 x i16 > %s , ptr %p
860
+ ret <2 x i16 > %r
861
+ }
862
+
863
+ define <2 x i16 > @test_trunc_2xi32_muliple_use1 (<2 x i32 > %a , ptr %p ) #0 {
864
+ ; COMMON-LABEL: test_trunc_2xi32_muliple_use1(
865
+ ; COMMON: {
866
+ ; COMMON-NEXT: .reg .b32 %r<7>;
867
+ ; COMMON-NEXT: .reg .b64 %rd<2>;
868
+ ; COMMON-EMPTY:
869
+ ; COMMON-NEXT: // %bb.0:
870
+ ; COMMON-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use1_param_0];
871
+ ; COMMON-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use1_param_1];
872
+ ; COMMON-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
873
+ ; COMMON-NEXT: add.s32 %r5, %r2, 1;
874
+ ; COMMON-NEXT: add.s32 %r6, %r1, 1;
875
+ ; COMMON-NEXT: st.v2.u32 [%rd1], {%r6, %r5};
876
+ ; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
877
+ ; COMMON-NEXT: ret;
878
+ %r = trunc <2 x i32 > %a to <2 x i16 >
879
+ ; Reuse the original value - optimizing to PRMT does not increase register
880
+ ; pressure
881
+ %s = add <2 x i32 > %a , splat (i32 1 )
882
+ store <2 x i32 > %s , ptr %p
883
+ ret <2 x i16 > %r
884
+ }
885
+
821
886
define <2 x i16 > @test_trunc_2xi64 (<2 x i64 > %a ) #0 {
822
887
; COMMON-LABEL: test_trunc_2xi64(
823
888
; COMMON: {
0 commit comments