@@ -891,3 +891,230 @@ define <8 x double> @fdiv_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
891
891
%r = fdiv <8 x double > %x , %s
892
892
ret <8 x double > %r
893
893
}
894
+
895
+ define <4 x i32 > @sub_v4i32 (<4 x i1 > %b , <4 x i32 > noundef %x , <4 x i32 > noundef %y ) {
896
+ ; AVX2-LABEL: sub_v4i32:
897
+ ; AVX2: # %bb.0:
898
+ ; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
899
+ ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
900
+ ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
901
+ ; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0
902
+ ; AVX2-NEXT: retq
903
+ ;
904
+ ; AVX512F-LABEL: sub_v4i32:
905
+ ; AVX512F: # %bb.0:
906
+ ; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
907
+ ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
908
+ ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
909
+ ; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z}
910
+ ; AVX512F-NEXT: vpsubd %xmm0, %xmm1, %xmm0
911
+ ; AVX512F-NEXT: vzeroupper
912
+ ; AVX512F-NEXT: retq
913
+ ;
914
+ ; AVX512VL-LABEL: sub_v4i32:
915
+ ; AVX512VL: # %bb.0:
916
+ ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
917
+ ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
918
+ ; AVX512VL-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z}
919
+ ; AVX512VL-NEXT: vpsubd %xmm0, %xmm1, %xmm0
920
+ ; AVX512VL-NEXT: retq
921
+ %s = select <4 x i1 > %b , <4 x i32 > %y , <4 x i32 > zeroinitializer
922
+ %r = sub <4 x i32 > %x , %s
923
+ ret <4 x i32 > %r
924
+ }
925
+
926
+ ; negative test - sub is not commutative; there is no identity constant for operand 0
927
+
928
+ define <8 x i32 > @sub_v8i32_commute (<8 x i1 > %b , <8 x i32 > noundef %x , <8 x i32 > noundef %y ) {
929
+ ; AVX2-LABEL: sub_v8i32_commute:
930
+ ; AVX2: # %bb.0:
931
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
932
+ ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
933
+ ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
934
+ ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
935
+ ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
936
+ ; AVX2-NEXT: retq
937
+ ;
938
+ ; AVX512F-LABEL: sub_v8i32_commute:
939
+ ; AVX512F: # %bb.0:
940
+ ; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
941
+ ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
942
+ ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
943
+ ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
944
+ ; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z}
945
+ ; AVX512F-NEXT: vpsubd %ymm1, %ymm0, %ymm0
946
+ ; AVX512F-NEXT: retq
947
+ ;
948
+ ; AVX512VL-LABEL: sub_v8i32_commute:
949
+ ; AVX512VL: # %bb.0:
950
+ ; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
951
+ ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0
952
+ ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1
953
+ ; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z}
954
+ ; AVX512VL-NEXT: vpsubd %ymm1, %ymm0, %ymm0
955
+ ; AVX512VL-NEXT: retq
956
+ %s = select <8 x i1 > %b , <8 x i32 > %y , <8 x i32 > zeroinitializer
957
+ %r = sub <8 x i32 > %s , %x
958
+ ret <8 x i32 > %r
959
+ }
960
+
961
+ define <16 x i32 > @sub_v16i32_swap (<16 x i1 > %b , <16 x i32 > noundef %x , <16 x i32 > noundef %y ) {
962
+ ; AVX2-LABEL: sub_v16i32_swap:
963
+ ; AVX2: # %bb.0:
964
+ ; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
965
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
966
+ ; AVX2-NEXT: vpslld $31, %ymm5, %ymm5
967
+ ; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5
968
+ ; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4
969
+ ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
970
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
971
+ ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
972
+ ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
973
+ ; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0
974
+ ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
975
+ ; AVX2-NEXT: vpsubd %ymm4, %ymm2, %ymm1
976
+ ; AVX2-NEXT: retq
977
+ ;
978
+ ; AVX512-LABEL: sub_v16i32_swap:
979
+ ; AVX512: # %bb.0:
980
+ ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
981
+ ; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
982
+ ; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
983
+ ; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z}
984
+ ; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm0
985
+ ; AVX512-NEXT: retq
986
+ %s = select <16 x i1 > %b , <16 x i32 > zeroinitializer , <16 x i32 > %y
987
+ %r = sub <16 x i32 > %x , %s
988
+ ret <16 x i32 > %r
989
+ }
990
+
991
+ ; negative test - sub is not commutative; there is no identity constant for operand 0
992
+
993
+ define <16 x i32 > @sub_v16i32_commute_swap (<16 x i1 > %b , <16 x i32 > noundef %x , <16 x i32 > noundef %y ) {
994
+ ; AVX2-LABEL: sub_v16i32_commute_swap:
995
+ ; AVX2: # %bb.0:
996
+ ; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
997
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
998
+ ; AVX2-NEXT: vpslld $31, %ymm5, %ymm5
999
+ ; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5
1000
+ ; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4
1001
+ ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1002
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1003
+ ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1004
+ ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
1005
+ ; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0
1006
+ ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
1007
+ ; AVX2-NEXT: vpsubd %ymm2, %ymm4, %ymm1
1008
+ ; AVX2-NEXT: retq
1009
+ ;
1010
+ ; AVX512-LABEL: sub_v16i32_commute_swap:
1011
+ ; AVX512: # %bb.0:
1012
+ ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1013
+ ; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
1014
+ ; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
1015
+ ; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1016
+ ; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0
1017
+ ; AVX512-NEXT: retq
1018
+ %s = select <16 x i1 > %b , <16 x i32 > zeroinitializer , <16 x i32 > %y
1019
+ %r = sub <16 x i32 > %s , %x
1020
+ ret <16 x i32 > %r
1021
+ }
1022
+
1023
+ define <8 x i32 > @sub_v8i32_cast_cond (i8 noundef zeroext %pb , <8 x i32 > noundef %x , <8 x i32 > noundef %y ) {
1024
+ ; AVX2-LABEL: sub_v8i32_cast_cond:
1025
+ ; AVX2: # %bb.0:
1026
+ ; AVX2-NEXT: vmovd %edi, %xmm2
1027
+ ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
1028
+ ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
1029
+ ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
1030
+ ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
1031
+ ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1032
+ ; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
1033
+ ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
1034
+ ; AVX2-NEXT: retq
1035
+ ;
1036
+ ; AVX512F-LABEL: sub_v8i32_cast_cond:
1037
+ ; AVX512F: # %bb.0:
1038
+ ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1039
+ ; AVX512F-NEXT: kmovw %edi, %k1
1040
+ ; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1041
+ ; AVX512F-NEXT: vpsubd %ymm1, %ymm0, %ymm0
1042
+ ; AVX512F-NEXT: retq
1043
+ ;
1044
+ ; AVX512VL-LABEL: sub_v8i32_cast_cond:
1045
+ ; AVX512VL: # %bb.0:
1046
+ ; AVX512VL-NEXT: kmovw %edi, %k1
1047
+ ; AVX512VL-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
1048
+ ; AVX512VL-NEXT: vpsubd %ymm1, %ymm0, %ymm0
1049
+ ; AVX512VL-NEXT: retq
1050
+ %b = bitcast i8 %pb to <8 x i1 >
1051
+ %s = select <8 x i1 > %b , <8 x i32 > %y , <8 x i32 > zeroinitializer
1052
+ %r = sub <8 x i32 > %x , %s
1053
+ ret <8 x i32 > %r
1054
+ }
1055
+
1056
+ define <8 x i64 > @sub_v8i64_cast_cond (i8 noundef zeroext %pb , <8 x i64 > noundef %x , <8 x i64 > noundef %y ) {
1057
+ ; AVX2-LABEL: sub_v8i64_cast_cond:
1058
+ ; AVX2: # %bb.0:
1059
+ ; AVX2-NEXT: movl %edi, %eax
1060
+ ; AVX2-NEXT: shrb %al
1061
+ ; AVX2-NEXT: andb $1, %al
1062
+ ; AVX2-NEXT: movzbl %al, %eax
1063
+ ; AVX2-NEXT: movl %edi, %ecx
1064
+ ; AVX2-NEXT: andb $1, %cl
1065
+ ; AVX2-NEXT: movzbl %cl, %ecx
1066
+ ; AVX2-NEXT: vmovd %ecx, %xmm4
1067
+ ; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
1068
+ ; AVX2-NEXT: movl %edi, %eax
1069
+ ; AVX2-NEXT: shrb $2, %al
1070
+ ; AVX2-NEXT: andb $1, %al
1071
+ ; AVX2-NEXT: movzbl %al, %eax
1072
+ ; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
1073
+ ; AVX2-NEXT: movl %edi, %eax
1074
+ ; AVX2-NEXT: shrb $3, %al
1075
+ ; AVX2-NEXT: andb $1, %al
1076
+ ; AVX2-NEXT: movzbl %al, %eax
1077
+ ; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
1078
+ ; AVX2-NEXT: movl %edi, %eax
1079
+ ; AVX2-NEXT: shrb $4, %al
1080
+ ; AVX2-NEXT: andb $1, %al
1081
+ ; AVX2-NEXT: movzbl %al, %eax
1082
+ ; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
1083
+ ; AVX2-NEXT: movl %edi, %eax
1084
+ ; AVX2-NEXT: shrb $5, %al
1085
+ ; AVX2-NEXT: andb $1, %al
1086
+ ; AVX2-NEXT: movzbl %al, %eax
1087
+ ; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
1088
+ ; AVX2-NEXT: movl %edi, %eax
1089
+ ; AVX2-NEXT: shrb $6, %al
1090
+ ; AVX2-NEXT: andb $1, %al
1091
+ ; AVX2-NEXT: movzbl %al, %eax
1092
+ ; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
1093
+ ; AVX2-NEXT: shrb $7, %dil
1094
+ ; AVX2-NEXT: movzbl %dil, %eax
1095
+ ; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
1096
+ ; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
1097
+ ; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
1098
+ ; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
1099
+ ; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
1100
+ ; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
1101
+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
1102
+ ; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
1103
+ ; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
1104
+ ; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
1105
+ ; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
1106
+ ; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
1107
+ ; AVX2-NEXT: vpsubq %ymm3, %ymm1, %ymm1
1108
+ ; AVX2-NEXT: retq
1109
+ ;
1110
+ ; AVX512-LABEL: sub_v8i64_cast_cond:
1111
+ ; AVX512: # %bb.0:
1112
+ ; AVX512-NEXT: kmovw %edi, %k1
1113
+ ; AVX512-NEXT: vmovdqa64 %zmm1, %zmm1 {%k1} {z}
1114
+ ; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0
1115
+ ; AVX512-NEXT: retq
1116
+ %b = bitcast i8 %pb to <8 x i1 >
1117
+ %s = select <8 x i1 > %b , <8 x i64 > %y , <8 x i64 > zeroinitializer
1118
+ %r = sub <8 x i64 > %x , %s
1119
+ ret <8 x i64 > %r
1120
+ }
0 commit comments