@@ -1104,6 +1104,176 @@ entry:
1104
1104
ret <8 x i16 > %conv6
1105
1105
}
1106
1106
1107
+ ; i8 saturate
1108
+
1109
+ define <2 x i8 > @stest_f64i8 (<2 x double > %x ) {
1110
+ ; CHECK-LABEL: stest_f64i8:
1111
+ ; CHECK: # %bb.0: # %entry
1112
+ ; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
1113
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <127,127,u,u>
1114
+ ; CHECK-NEXT: movdqa %xmm1, %xmm2
1115
+ ; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
1116
+ ; CHECK-NEXT: pand %xmm2, %xmm0
1117
+ ; CHECK-NEXT: pandn %xmm1, %xmm2
1118
+ ; CHECK-NEXT: por %xmm0, %xmm2
1119
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <4294967168,4294967168,u,u>
1120
+ ; CHECK-NEXT: movdqa %xmm2, %xmm0
1121
+ ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
1122
+ ; CHECK-NEXT: pand %xmm0, %xmm2
1123
+ ; CHECK-NEXT: pandn %xmm1, %xmm0
1124
+ ; CHECK-NEXT: por %xmm2, %xmm0
1125
+ ; CHECK-NEXT: packssdw %xmm0, %xmm0
1126
+ ; CHECK-NEXT: packsswb %xmm0, %xmm0
1127
+ ; CHECK-NEXT: retq
1128
+ entry:
1129
+ %conv = fptosi <2 x double > %x to <2 x i32 >
1130
+ %0 = icmp slt <2 x i32 > %conv , <i32 127 , i32 127 >
1131
+ %spec.store.select = select <2 x i1 > %0 , <2 x i32 > %conv , <2 x i32 > <i32 127 , i32 127 >
1132
+ %1 = icmp sgt <2 x i32 > %spec.store.select , <i32 -128 , i32 -128 >
1133
+ %spec.store.select7 = select <2 x i1 > %1 , <2 x i32 > %spec.store.select , <2 x i32 > <i32 -128 , i32 -128 >
1134
+ %conv6 = trunc <2 x i32 > %spec.store.select7 to <2 x i8 >
1135
+ ret <2 x i8 > %conv6
1136
+ }
1137
+
1138
+ define <2 x i8 > @utest_f64i8 (<2 x double > %x ) {
1139
+ ; CHECK-LABEL: utest_f64i8:
1140
+ ; CHECK: # %bb.0: # %entry
1141
+ ; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
1142
+ ; CHECK-NEXT: movapd %xmm1, %xmm2
1143
+ ; CHECK-NEXT: psrad $31, %xmm2
1144
+ ; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1145
+ ; CHECK-NEXT: cvttpd2dq %xmm0, %xmm3
1146
+ ; CHECK-NEXT: andpd %xmm2, %xmm3
1147
+ ; CHECK-NEXT: orpd %xmm1, %xmm3
1148
+ ; CHECK-NEXT: movapd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1149
+ ; CHECK-NEXT: xorpd %xmm3, %xmm0
1150
+ ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1151
+ ; CHECK-NEXT: movdqa %xmm0, %xmm1
1152
+ ; CHECK-NEXT: pandn %xmm3, %xmm1
1153
+ ; CHECK-NEXT: psrld $24, %xmm0
1154
+ ; CHECK-NEXT: por %xmm1, %xmm0
1155
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1156
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1157
+ ; CHECK-NEXT: retq
1158
+ entry:
1159
+ %conv = fptoui <2 x double > %x to <2 x i32 >
1160
+ %0 = icmp ult <2 x i32 > %conv , <i32 255 , i32 255 >
1161
+ %spec.store.select = select <2 x i1 > %0 , <2 x i32 > %conv , <2 x i32 > <i32 255 , i32 255 >
1162
+ %conv6 = trunc <2 x i32 > %spec.store.select to <2 x i8 >
1163
+ ret <2 x i8 > %conv6
1164
+ }
1165
+
1166
+ define <2 x i8 > @ustest_f64i8 (<2 x double > %x ) {
1167
+ ; CHECK-LABEL: ustest_f64i8:
1168
+ ; CHECK: # %bb.0: # %entry
1169
+ ; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
1170
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <255,255,u,u>
1171
+ ; CHECK-NEXT: movdqa %xmm1, %xmm2
1172
+ ; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
1173
+ ; CHECK-NEXT: pand %xmm2, %xmm0
1174
+ ; CHECK-NEXT: pandn %xmm1, %xmm2
1175
+ ; CHECK-NEXT: por %xmm0, %xmm2
1176
+ ; CHECK-NEXT: pxor %xmm1, %xmm1
1177
+ ; CHECK-NEXT: movdqa %xmm2, %xmm0
1178
+ ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
1179
+ ; CHECK-NEXT: pand %xmm2, %xmm0
1180
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1181
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1182
+ ; CHECK-NEXT: retq
1183
+ entry:
1184
+ %conv = fptosi <2 x double > %x to <2 x i32 >
1185
+ %0 = icmp slt <2 x i32 > %conv , <i32 255 , i32 255 >
1186
+ %spec.store.select = select <2 x i1 > %0 , <2 x i32 > %conv , <2 x i32 > <i32 255 , i32 255 >
1187
+ %1 = icmp sgt <2 x i32 > %spec.store.select , zeroinitializer
1188
+ %spec.store.select7 = select <2 x i1 > %1 , <2 x i32 > %spec.store.select , <2 x i32 > zeroinitializer
1189
+ %conv6 = trunc <2 x i32 > %spec.store.select7 to <2 x i8 >
1190
+ ret <2 x i8 > %conv6
1191
+ }
1192
+
1193
+ define <4 x i8 > @stest_f32i8 (<4 x float > %x ) {
1194
+ ; CHECK-LABEL: stest_f32i8:
1195
+ ; CHECK: # %bb.0: # %entry
1196
+ ; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
1197
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127]
1198
+ ; CHECK-NEXT: movdqa %xmm1, %xmm2
1199
+ ; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
1200
+ ; CHECK-NEXT: pand %xmm2, %xmm0
1201
+ ; CHECK-NEXT: pandn %xmm1, %xmm2
1202
+ ; CHECK-NEXT: por %xmm0, %xmm2
1203
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
1204
+ ; CHECK-NEXT: movdqa %xmm2, %xmm0
1205
+ ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
1206
+ ; CHECK-NEXT: pand %xmm0, %xmm2
1207
+ ; CHECK-NEXT: pandn %xmm1, %xmm0
1208
+ ; CHECK-NEXT: por %xmm2, %xmm0
1209
+ ; CHECK-NEXT: packssdw %xmm0, %xmm0
1210
+ ; CHECK-NEXT: packsswb %xmm0, %xmm0
1211
+ ; CHECK-NEXT: retq
1212
+ entry:
1213
+ %conv = fptosi <4 x float > %x to <4 x i32 >
1214
+ %0 = icmp slt <4 x i32 > %conv , <i32 127 , i32 127 , i32 127 , i32 127 >
1215
+ %spec.store.select = select <4 x i1 > %0 , <4 x i32 > %conv , <4 x i32 > <i32 127 , i32 127 , i32 127 , i32 127 >
1216
+ %1 = icmp sgt <4 x i32 > %spec.store.select , <i32 -128 , i32 -128 , i32 -128 , i32 -128 >
1217
+ %spec.store.select7 = select <4 x i1 > %1 , <4 x i32 > %spec.store.select , <4 x i32 > <i32 -128 , i32 -128 , i32 -128 , i32 -128 >
1218
+ %conv6 = trunc <4 x i32 > %spec.store.select7 to <4 x i8 >
1219
+ ret <4 x i8 > %conv6
1220
+ }
1221
+
1222
+ define <4 x i8 > @utest_f32i8 (<4 x float > %x ) {
1223
+ ; CHECK-LABEL: utest_f32i8:
1224
+ ; CHECK: # %bb.0: # %entry
1225
+ ; CHECK-NEXT: cvttps2dq %xmm0, %xmm1
1226
+ ; CHECK-NEXT: movdqa %xmm1, %xmm2
1227
+ ; CHECK-NEXT: psrad $31, %xmm2
1228
+ ; CHECK-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1229
+ ; CHECK-NEXT: cvttps2dq %xmm0, %xmm3
1230
+ ; CHECK-NEXT: pand %xmm2, %xmm3
1231
+ ; CHECK-NEXT: por %xmm1, %xmm3
1232
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
1233
+ ; CHECK-NEXT: pxor %xmm3, %xmm0
1234
+ ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1235
+ ; CHECK-NEXT: movdqa %xmm0, %xmm1
1236
+ ; CHECK-NEXT: pandn %xmm3, %xmm1
1237
+ ; CHECK-NEXT: psrld $24, %xmm0
1238
+ ; CHECK-NEXT: por %xmm1, %xmm0
1239
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1240
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1241
+ ; CHECK-NEXT: retq
1242
+ entry:
1243
+ %conv = fptoui <4 x float > %x to <4 x i32 >
1244
+ %0 = icmp ult <4 x i32 > %conv , <i32 255 , i32 255 , i32 255 , i32 255 >
1245
+ %spec.store.select = select <4 x i1 > %0 , <4 x i32 > %conv , <4 x i32 > <i32 255 , i32 255 , i32 255 , i32 255 >
1246
+ %conv6 = trunc <4 x i32 > %spec.store.select to <4 x i8 >
1247
+ ret <4 x i8 > %conv6
1248
+ }
1249
+
1250
+ define <4 x i8 > @ustest_f32i8 (<4 x float > %x ) {
1251
+ ; CHECK-LABEL: ustest_f32i8:
1252
+ ; CHECK: # %bb.0: # %entry
1253
+ ; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
1254
+ ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
1255
+ ; CHECK-NEXT: movdqa %xmm1, %xmm2
1256
+ ; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
1257
+ ; CHECK-NEXT: pand %xmm2, %xmm0
1258
+ ; CHECK-NEXT: pandn %xmm1, %xmm2
1259
+ ; CHECK-NEXT: por %xmm0, %xmm2
1260
+ ; CHECK-NEXT: pxor %xmm1, %xmm1
1261
+ ; CHECK-NEXT: movdqa %xmm2, %xmm0
1262
+ ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
1263
+ ; CHECK-NEXT: pand %xmm2, %xmm0
1264
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1265
+ ; CHECK-NEXT: packuswb %xmm0, %xmm0
1266
+ ; CHECK-NEXT: retq
1267
+ entry:
1268
+ %conv = fptosi <4 x float > %x to <4 x i32 >
1269
+ %0 = icmp slt <4 x i32 > %conv , <i32 255 , i32 255 , i32 255 , i32 255 >
1270
+ %spec.store.select = select <4 x i1 > %0 , <4 x i32 > %conv , <4 x i32 > <i32 255 , i32 255 , i32 255 , i32 255 >
1271
+ %1 = icmp sgt <4 x i32 > %spec.store.select , zeroinitializer
1272
+ %spec.store.select7 = select <4 x i1 > %1 , <4 x i32 > %spec.store.select , <4 x i32 > zeroinitializer
1273
+ %conv6 = trunc <4 x i32 > %spec.store.select7 to <4 x i8 >
1274
+ ret <4 x i8 > %conv6
1275
+ }
1276
+
1107
1277
; i64 saturate
1108
1278
1109
1279
define <2 x i64 > @stest_f64i64 (<2 x double > %x ) {
0 commit comments