@@ -873,9 +873,8 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) {
873
873
; SSE41-NEXT: pand %xmm5, %xmm0
874
874
; SSE41-NEXT: por %xmm4, %xmm0
875
875
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
876
- ; SSE41-NEXT: packusdw %xmm1, %xmm1
877
- ; SSE41-NEXT: packusdw %xmm1, %xmm1
878
- ; SSE41-NEXT: movdqa %xmm1, %xmm0
876
+ ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
877
+ ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
879
878
; SSE41-NEXT: retq
880
879
;
881
880
; AVX1-LABEL: trunc_packus_v2i64_v2i16:
@@ -887,21 +886,32 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) {
887
886
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
888
887
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
889
888
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
890
- ; AVX1-NEXT: vpackusdw %xmm0, % xmm0, % xmm0
891
- ; AVX1-NEXT: vpackusdw %xmm0, % xmm0, % xmm0
889
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
890
+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
892
891
; AVX1-NEXT: retq
893
892
;
894
- ; AVX2-LABEL: trunc_packus_v2i64_v2i16:
895
- ; AVX2: # %bb.0:
896
- ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
897
- ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
898
- ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
899
- ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
900
- ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
901
- ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
902
- ; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
903
- ; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
904
- ; AVX2-NEXT: retq
893
+ ; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16:
894
+ ; AVX2-SLOW: # %bb.0:
895
+ ; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
896
+ ; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
897
+ ; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
898
+ ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
899
+ ; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
900
+ ; AVX2-SLOW-NEXT: vpand %xmm0, %xmm1, %xmm0
901
+ ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
902
+ ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
903
+ ; AVX2-SLOW-NEXT: retq
904
+ ;
905
+ ; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16:
906
+ ; AVX2-FAST: # %bb.0:
907
+ ; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
908
+ ; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
909
+ ; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
910
+ ; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
911
+ ; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
912
+ ; AVX2-FAST-NEXT: vpand %xmm0, %xmm1, %xmm0
913
+ ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
914
+ ; AVX2-FAST-NEXT: retq
905
915
;
906
916
; AVX512F-LABEL: trunc_packus_v2i64_v2i16:
907
917
; AVX512F: # %bb.0:
0 commit comments