@@ -3172,240 +3172,40 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
3172
3172
define <4 x i32 > @umulo_v4i1 (<4 x i1 > %a0 , <4 x i1 > %a1 , <4 x i1 >* %p2 ) nounwind {
3173
3173
; SSE-LABEL: umulo_v4i1:
3174
3174
; SSE: # %bb.0:
3175
- ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
3176
- ; SSE-NEXT: pand %xmm2, %xmm1
3177
- ; SSE-NEXT: pand %xmm2, %xmm0
3178
- ; SSE-NEXT: pmaddwd %xmm1, %xmm0
3179
- ; SSE-NEXT: movdqa %xmm0, %xmm1
3180
- ; SSE-NEXT: psrld $1, %xmm1
3181
- ; SSE-NEXT: pxor %xmm2, %xmm2
3182
- ; SSE-NEXT: pcmpeqd %xmm2, %xmm1
3183
- ; SSE-NEXT: pcmpeqd %xmm2, %xmm2
3184
- ; SSE-NEXT: pxor %xmm2, %xmm1
3175
+ ; SSE-NEXT: pand %xmm1, %xmm0
3185
3176
; SSE-NEXT: pslld $31, %xmm0
3186
3177
; SSE-NEXT: movmskps %xmm0, %eax
3187
3178
; SSE-NEXT: movb %al, (%rdi)
3188
- ; SSE-NEXT: movdqa %xmm1 , %xmm0
3179
+ ; SSE-NEXT: xorps %xmm0 , %xmm0
3189
3180
; SSE-NEXT: retq
3190
3181
;
3191
- ; AVX1-LABEL: umulo_v4i1:
3192
- ; AVX1: # %bb.0:
3193
- ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
3194
- ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
3195
- ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
3196
- ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1
3197
- ; AVX1-NEXT: vpsrld $1, %xmm1, %xmm0
3198
- ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
3199
- ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
3200
- ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3201
- ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
3202
- ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
3203
- ; AVX1-NEXT: vmovmskps %xmm1, %eax
3204
- ; AVX1-NEXT: movb %al, (%rdi)
3205
- ; AVX1-NEXT: retq
3206
- ;
3207
- ; AVX2-LABEL: umulo_v4i1:
3208
- ; AVX2: # %bb.0:
3209
- ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
3210
- ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
3211
- ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
3212
- ; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1
3213
- ; AVX2-NEXT: vpsrld $1, %xmm1, %xmm0
3214
- ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
3215
- ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
3216
- ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3217
- ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
3218
- ; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
3219
- ; AVX2-NEXT: vmovmskps %xmm1, %eax
3220
- ; AVX2-NEXT: movb %al, (%rdi)
3221
- ; AVX2-NEXT: retq
3182
+ ; AVX-LABEL: umulo_v4i1:
3183
+ ; AVX: # %bb.0:
3184
+ ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
3185
+ ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
3186
+ ; AVX-NEXT: vmovmskps %xmm0, %eax
3187
+ ; AVX-NEXT: movb %al, (%rdi)
3188
+ ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
3189
+ ; AVX-NEXT: retq
3222
3190
;
3223
3191
; AVX512F-LABEL: umulo_v4i1:
3224
3192
; AVX512F: # %bb.0:
3225
- ; AVX512F-NEXT: pushq %rbx
3193
+ ; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
3226
3194
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
3227
3195
; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k0
3228
- ; AVX512F-NEXT: kshiftrw $3, %k0, %k1
3229
- ; AVX512F-NEXT: kmovw %k1, %r8d
3230
- ; AVX512F-NEXT: andb $1, %r8b
3231
- ; AVX512F-NEXT: vpslld $31, %xmm1, %xmm0
3232
- ; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k1
3233
- ; AVX512F-NEXT: kshiftrw $3, %k1, %k2
3234
- ; AVX512F-NEXT: kmovw %k2, %r9d
3235
- ; AVX512F-NEXT: andb $1, %r9b
3236
- ; AVX512F-NEXT: kshiftrw $2, %k0, %k2
3237
- ; AVX512F-NEXT: kmovw %k2, %r10d
3238
- ; AVX512F-NEXT: andb $1, %r10b
3239
- ; AVX512F-NEXT: kshiftrw $2, %k1, %k2
3240
- ; AVX512F-NEXT: kmovw %k2, %r11d
3241
- ; AVX512F-NEXT: andb $1, %r11b
3242
- ; AVX512F-NEXT: kshiftrw $1, %k0, %k2
3243
- ; AVX512F-NEXT: kmovw %k2, %ecx
3244
- ; AVX512F-NEXT: andb $1, %cl
3245
- ; AVX512F-NEXT: kshiftrw $1, %k1, %k2
3246
- ; AVX512F-NEXT: kmovw %k2, %edx
3247
- ; AVX512F-NEXT: andb $1, %dl
3248
- ; AVX512F-NEXT: kmovw %k0, %eax
3249
- ; AVX512F-NEXT: andb $1, %al
3250
- ; AVX512F-NEXT: kmovw %k1, %esi
3251
- ; AVX512F-NEXT: andb $1, %sil
3252
- ; AVX512F-NEXT: movw $-3, %bx
3253
- ; AVX512F-NEXT: kmovw %ebx, %k0
3254
- ; AVX512F-NEXT: # kill: def $al killed $al killed $eax
3255
- ; AVX512F-NEXT: mulb %sil
3256
- ; AVX512F-NEXT: movl %eax, %esi
3257
- ; AVX512F-NEXT: testb $2, %al
3258
- ; AVX512F-NEXT: setne %al
3259
- ; AVX512F-NEXT: kmovw %eax, %k1
3260
- ; AVX512F-NEXT: kandw %k0, %k1, %k1
3261
- ; AVX512F-NEXT: movl %ecx, %eax
3262
- ; AVX512F-NEXT: mulb %dl
3263
- ; AVX512F-NEXT: movl %eax, %ecx
3264
- ; AVX512F-NEXT: testb $2, %al
3265
- ; AVX512F-NEXT: setne %al
3266
- ; AVX512F-NEXT: kmovw %eax, %k2
3267
- ; AVX512F-NEXT: kshiftlw $15, %k2, %k2
3268
- ; AVX512F-NEXT: kshiftrw $14, %k2, %k2
3269
- ; AVX512F-NEXT: korw %k2, %k1, %k2
3270
- ; AVX512F-NEXT: movw $-5, %ax
3271
- ; AVX512F-NEXT: kmovw %eax, %k1
3272
- ; AVX512F-NEXT: kandw %k1, %k2, %k2
3273
- ; AVX512F-NEXT: movl %r10d, %eax
3274
- ; AVX512F-NEXT: mulb %r11b
3275
- ; AVX512F-NEXT: movl %eax, %edx
3276
- ; AVX512F-NEXT: testb $2, %al
3277
- ; AVX512F-NEXT: setne %al
3278
- ; AVX512F-NEXT: kmovw %eax, %k3
3279
- ; AVX512F-NEXT: kshiftlw $2, %k3, %k3
3280
- ; AVX512F-NEXT: korw %k3, %k2, %k2
3281
- ; AVX512F-NEXT: kshiftlw $13, %k2, %k2
3282
- ; AVX512F-NEXT: kshiftrw $13, %k2, %k2
3283
- ; AVX512F-NEXT: movl %r8d, %eax
3284
- ; AVX512F-NEXT: mulb %r9b
3285
- ; AVX512F-NEXT: # kill: def $al killed $al def $eax
3286
- ; AVX512F-NEXT: testb $2, %al
3287
- ; AVX512F-NEXT: setne %bl
3288
- ; AVX512F-NEXT: kmovw %ebx, %k3
3289
- ; AVX512F-NEXT: kshiftlw $3, %k3, %k3
3290
- ; AVX512F-NEXT: korw %k3, %k2, %k2
3291
- ; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
3292
- ; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
3293
- ; AVX512F-NEXT: andl $1, %esi
3294
- ; AVX512F-NEXT: kmovw %esi, %k2
3295
- ; AVX512F-NEXT: kandw %k0, %k2, %k0
3296
- ; AVX512F-NEXT: kmovw %ecx, %k2
3297
- ; AVX512F-NEXT: kshiftlw $15, %k2, %k2
3298
- ; AVX512F-NEXT: kshiftrw $14, %k2, %k2
3299
- ; AVX512F-NEXT: korw %k2, %k0, %k0
3300
- ; AVX512F-NEXT: kandw %k1, %k0, %k0
3301
- ; AVX512F-NEXT: kmovw %edx, %k1
3302
- ; AVX512F-NEXT: kshiftlw $15, %k1, %k1
3303
- ; AVX512F-NEXT: kshiftrw $13, %k1, %k1
3304
- ; AVX512F-NEXT: korw %k1, %k0, %k0
3305
- ; AVX512F-NEXT: movw $-9, %cx
3306
- ; AVX512F-NEXT: kmovw %ecx, %k1
3307
- ; AVX512F-NEXT: kandw %k1, %k0, %k0
3308
- ; AVX512F-NEXT: kmovw %eax, %k1
3309
- ; AVX512F-NEXT: kshiftlw $15, %k1, %k1
3310
- ; AVX512F-NEXT: kshiftrw $12, %k1, %k1
3311
- ; AVX512F-NEXT: korw %k1, %k0, %k0
3312
3196
; AVX512F-NEXT: kmovw %k0, %eax
3313
3197
; AVX512F-NEXT: movb %al, (%rdi)
3314
- ; AVX512F-NEXT: popq %rbx
3198
+ ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
3315
3199
; AVX512F-NEXT: retq
3316
3200
;
3317
3201
; AVX512BW-LABEL: umulo_v4i1:
3318
3202
; AVX512BW: # %bb.0:
3319
- ; AVX512BW-NEXT: pushq %rbx
3203
+ ; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0
3320
3204
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
3321
3205
; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k0
3322
- ; AVX512BW-NEXT: kshiftrw $3, %k0, %k1
3323
- ; AVX512BW-NEXT: kmovd %k1, %r8d
3324
- ; AVX512BW-NEXT: andb $1, %r8b
3325
- ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
3326
- ; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k1
3327
- ; AVX512BW-NEXT: kshiftrw $3, %k1, %k2
3328
- ; AVX512BW-NEXT: kmovd %k2, %r9d
3329
- ; AVX512BW-NEXT: andb $1, %r9b
3330
- ; AVX512BW-NEXT: kshiftrw $2, %k0, %k2
3331
- ; AVX512BW-NEXT: kmovd %k2, %r10d
3332
- ; AVX512BW-NEXT: andb $1, %r10b
3333
- ; AVX512BW-NEXT: kshiftrw $2, %k1, %k2
3334
- ; AVX512BW-NEXT: kmovd %k2, %r11d
3335
- ; AVX512BW-NEXT: andb $1, %r11b
3336
- ; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
3337
- ; AVX512BW-NEXT: kmovd %k2, %ecx
3338
- ; AVX512BW-NEXT: andb $1, %cl
3339
- ; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
3340
- ; AVX512BW-NEXT: kmovd %k2, %edx
3341
- ; AVX512BW-NEXT: andb $1, %dl
3342
- ; AVX512BW-NEXT: kmovd %k0, %eax
3343
- ; AVX512BW-NEXT: andb $1, %al
3344
- ; AVX512BW-NEXT: kmovd %k1, %esi
3345
- ; AVX512BW-NEXT: andb $1, %sil
3346
- ; AVX512BW-NEXT: movw $-3, %bx
3347
- ; AVX512BW-NEXT: kmovd %ebx, %k0
3348
- ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
3349
- ; AVX512BW-NEXT: mulb %sil
3350
- ; AVX512BW-NEXT: movl %eax, %esi
3351
- ; AVX512BW-NEXT: testb $2, %al
3352
- ; AVX512BW-NEXT: setne %al
3353
- ; AVX512BW-NEXT: kmovd %eax, %k1
3354
- ; AVX512BW-NEXT: kandw %k0, %k1, %k1
3355
- ; AVX512BW-NEXT: movl %ecx, %eax
3356
- ; AVX512BW-NEXT: mulb %dl
3357
- ; AVX512BW-NEXT: movl %eax, %ecx
3358
- ; AVX512BW-NEXT: testb $2, %al
3359
- ; AVX512BW-NEXT: setne %al
3360
- ; AVX512BW-NEXT: kmovd %eax, %k2
3361
- ; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
3362
- ; AVX512BW-NEXT: kshiftrw $14, %k2, %k2
3363
- ; AVX512BW-NEXT: korw %k2, %k1, %k2
3364
- ; AVX512BW-NEXT: movw $-5, %ax
3365
- ; AVX512BW-NEXT: kmovd %eax, %k1
3366
- ; AVX512BW-NEXT: kandw %k1, %k2, %k2
3367
- ; AVX512BW-NEXT: movl %r10d, %eax
3368
- ; AVX512BW-NEXT: mulb %r11b
3369
- ; AVX512BW-NEXT: movl %eax, %edx
3370
- ; AVX512BW-NEXT: testb $2, %al
3371
- ; AVX512BW-NEXT: setne %al
3372
- ; AVX512BW-NEXT: kmovd %eax, %k3
3373
- ; AVX512BW-NEXT: kshiftlw $2, %k3, %k3
3374
- ; AVX512BW-NEXT: korw %k3, %k2, %k2
3375
- ; AVX512BW-NEXT: kshiftlw $13, %k2, %k2
3376
- ; AVX512BW-NEXT: kshiftrw $13, %k2, %k2
3377
- ; AVX512BW-NEXT: movl %r8d, %eax
3378
- ; AVX512BW-NEXT: mulb %r9b
3379
- ; AVX512BW-NEXT: # kill: def $al killed $al def $eax
3380
- ; AVX512BW-NEXT: testb $2, %al
3381
- ; AVX512BW-NEXT: setne %bl
3382
- ; AVX512BW-NEXT: kmovd %ebx, %k3
3383
- ; AVX512BW-NEXT: kshiftlw $3, %k3, %k3
3384
- ; AVX512BW-NEXT: korw %k3, %k2, %k2
3385
- ; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
3386
- ; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
3387
- ; AVX512BW-NEXT: andl $1, %esi
3388
- ; AVX512BW-NEXT: kmovw %esi, %k2
3389
- ; AVX512BW-NEXT: kandw %k0, %k2, %k0
3390
- ; AVX512BW-NEXT: kmovd %ecx, %k2
3391
- ; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
3392
- ; AVX512BW-NEXT: kshiftrw $14, %k2, %k2
3393
- ; AVX512BW-NEXT: korw %k2, %k0, %k0
3394
- ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3395
- ; AVX512BW-NEXT: kmovd %edx, %k1
3396
- ; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
3397
- ; AVX512BW-NEXT: kshiftrw $13, %k1, %k1
3398
- ; AVX512BW-NEXT: korw %k1, %k0, %k0
3399
- ; AVX512BW-NEXT: movw $-9, %cx
3400
- ; AVX512BW-NEXT: kmovd %ecx, %k1
3401
- ; AVX512BW-NEXT: kandw %k1, %k0, %k0
3402
- ; AVX512BW-NEXT: kmovd %eax, %k1
3403
- ; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
3404
- ; AVX512BW-NEXT: kshiftrw $12, %k1, %k1
3405
- ; AVX512BW-NEXT: korw %k1, %k0, %k0
3406
3206
; AVX512BW-NEXT: kmovd %k0, %eax
3407
3207
; AVX512BW-NEXT: movb %al, (%rdi)
3408
- ; AVX512BW-NEXT: popq %rbx
3208
+ ; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
3409
3209
; AVX512BW-NEXT: retq
3410
3210
%t = call {<4 x i1 >, <4 x i1 >} @llvm.umul.with.overflow.v4i1 (<4 x i1 > %a0 , <4 x i1 > %a1 )
3411
3211
%val = extractvalue {<4 x i1 >, <4 x i1 >} %t , 0
0 commit comments