@@ -1061,3 +1061,133 @@ bb:
1061
1061
ret <4 x i1 > %tmp4
1062
1062
}
1063
1063
1064
+ ; Regression reported on 057db2002bb3d79429db3c5fe436c8cefc50cb25
1065
+ @d = external global <2 x i64 >, align 16
1066
+ define void @constantfold_andn_mask () nounwind {
1067
+ ; SSE-LABEL: constantfold_andn_mask:
1068
+ ; SSE: # %bb.0: # %entry
1069
+ ; SSE-NEXT: pushq %rax
1070
+ ; SSE-NEXT: callq use@PLT
1071
+ ; SSE-NEXT: movdqu (%rax), %xmm1
1072
+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1073
+ ; SSE-NEXT: pand %xmm2, %xmm0
1074
+ ; SSE-NEXT: pavgb %xmm2, %xmm0
1075
+ ; SSE-NEXT: pandn %xmm1, %xmm0
1076
+ ; SSE-NEXT: pand %xmm2, %xmm1
1077
+ ; SSE-NEXT: pandn %xmm0, %xmm2
1078
+ ; SSE-NEXT: por %xmm1, %xmm2
1079
+ ; SSE-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1080
+ ; SSE-NEXT: xorq d@GOTPCREL(%rip), %rax
1081
+ ; SSE-NEXT: movdqa %xmm2, (%rax)
1082
+ ; SSE-NEXT: popq %rax
1083
+ ; SSE-NEXT: retq
1084
+ ;
1085
+ ; XOP-LABEL: constantfold_andn_mask:
1086
+ ; XOP: # %bb.0: # %entry
1087
+ ; XOP-NEXT: pushq %rax
1088
+ ; XOP-NEXT: callq use@PLT
1089
+ ; XOP-NEXT: vmovdqu (%rax), %xmm1
1090
+ ; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1091
+ ; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3
1092
+ ; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0
1093
+ ; XOP-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1094
+ ; XOP-NEXT: vpandn %xmm1, %xmm0, %xmm0
1095
+ ; XOP-NEXT: vpandn %xmm0, %xmm2, %xmm0
1096
+ ; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0
1097
+ ; XOP-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1098
+ ; XOP-NEXT: xorq d@GOTPCREL(%rip), %rax
1099
+ ; XOP-NEXT: vmovdqa %xmm0, (%rax)
1100
+ ; XOP-NEXT: popq %rax
1101
+ ; XOP-NEXT: retq
1102
+ ;
1103
+ ; AVX1-LABEL: constantfold_andn_mask:
1104
+ ; AVX1: # %bb.0: # %entry
1105
+ ; AVX1-NEXT: pushq %rax
1106
+ ; AVX1-NEXT: callq use@PLT
1107
+ ; AVX1-NEXT: vmovdqu (%rax), %xmm1
1108
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1109
+ ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
1110
+ ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1111
+ ; AVX1-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1112
+ ; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
1113
+ ; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0
1114
+ ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1115
+ ; AVX1-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1116
+ ; AVX1-NEXT: xorq d@GOTPCREL(%rip), %rax
1117
+ ; AVX1-NEXT: vmovdqa %xmm0, (%rax)
1118
+ ; AVX1-NEXT: popq %rax
1119
+ ; AVX1-NEXT: retq
1120
+ ;
1121
+ ; AVX2-LABEL: constantfold_andn_mask:
1122
+ ; AVX2: # %bb.0: # %entry
1123
+ ; AVX2-NEXT: pushq %rax
1124
+ ; AVX2-NEXT: callq use@PLT
1125
+ ; AVX2-NEXT: vmovdqu (%rax), %xmm1
1126
+ ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1127
+ ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
1128
+ ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1129
+ ; AVX2-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1130
+ ; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
1131
+ ; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
1132
+ ; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
1133
+ ; AVX2-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1134
+ ; AVX2-NEXT: xorq d@GOTPCREL(%rip), %rax
1135
+ ; AVX2-NEXT: vmovdqa %xmm0, (%rax)
1136
+ ; AVX2-NEXT: popq %rax
1137
+ ; AVX2-NEXT: retq
1138
+ ;
1139
+ ; AVX512F-LABEL: constantfold_andn_mask:
1140
+ ; AVX512F: # %bb.0: # %entry
1141
+ ; AVX512F-NEXT: pushq %rax
1142
+ ; AVX512F-NEXT: callq use@PLT
1143
+ ; AVX512F-NEXT: vmovdqu (%rax), %xmm1
1144
+ ; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1145
+ ; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
1146
+ ; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1147
+ ; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0
1148
+ ; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0
1149
+ ; AVX512F-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1150
+ ; AVX512F-NEXT: xorq d@GOTPCREL(%rip), %rax
1151
+ ; AVX512F-NEXT: vmovdqa %xmm0, (%rax)
1152
+ ; AVX512F-NEXT: popq %rax
1153
+ ; AVX512F-NEXT: vzeroupper
1154
+ ; AVX512F-NEXT: retq
1155
+ ;
1156
+ ; AVX512VL-LABEL: constantfold_andn_mask:
1157
+ ; AVX512VL: # %bb.0: # %entry
1158
+ ; AVX512VL-NEXT: pushq %rax
1159
+ ; AVX512VL-NEXT: callq use@PLT
1160
+ ; AVX512VL-NEXT: vmovdqu (%rax), %xmm1
1161
+ ; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1162
+ ; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
1163
+ ; AVX512VL-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1164
+ ; AVX512VL-NEXT: vpandn %xmm1, %xmm0, %xmm0
1165
+ ; AVX512VL-NEXT: vpternlogq $216, %xmm2, %xmm1, %xmm0
1166
+ ; AVX512VL-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1167
+ ; AVX512VL-NEXT: xorq d@GOTPCREL(%rip), %rax
1168
+ ; AVX512VL-NEXT: vmovdqa %xmm0, (%rax)
1169
+ ; AVX512VL-NEXT: popq %rax
1170
+ ; AVX512VL-NEXT: retq
1171
+ entry:
1172
+ %call = call noundef <2 x i64 > @use ()
1173
+ %_msret = load <2 x i64 >, ptr undef , align 8
1174
+ %i = bitcast <2 x i64 > %_msret to <16 x i8 >
1175
+ %i1 = bitcast <2 x i64 > %call to <16 x i8 >
1176
+ %i2 = and <16 x i8 > %i , <i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 >
1177
+ %i3 = and <16 x i8 > %i1 , <i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 >
1178
+ %i4 = call <16 x i8 > @llvm.x86.sse2.pavg.b (<16 x i8 > <i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 >, <16 x i8 > %i3 )
1179
+ %i5 = bitcast <16 x i8 > %i2 to <2 x i64 >
1180
+ %i6 = bitcast <16 x i8 > %i4 to <2 x i64 >
1181
+ %i7 = and <2 x i64 > %_msret , <i64 567462211834873824 , i64 567462211834873824 >
1182
+ %i8 = xor <2 x i64 > zeroinitializer , <i64 -1 , i64 -1 >
1183
+ %i9 = xor <2 x i64 > %i6 , <i64 -1 , i64 -1 >
1184
+ %i10 = and <2 x i64 > %i8 , %i5
1185
+ %i11 = and <2 x i64 > %i7 , %i9
1186
+ %i12 = or <2 x i64 > zeroinitializer , %i10
1187
+ %i13 = or <2 x i64 > %i12 , %i11
1188
+ store <2 x i64 > %i13 , ptr inttoptr (i64 xor (i64 ptrtoint (ptr @d to i64 ), i64 87960930222080 ) to ptr ), align 16
1189
+ ret void
1190
+ }
1191
+
1192
+ declare <2 x i64 > @use ()
1193
+ declare <16 x i8 > @llvm.x86.sse2.pavg.b (<16 x i8 >, <16 x i8 >)
0 commit comments