@@ -315,11 +315,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
315
315
define i32 @func4 (i32 %x , i32 %y ) nounwind {
316
316
; X64-LABEL: func4:
317
317
; X64: # %bb.0:
318
- ; X64-NEXT: movl %edi, %ecx
319
- ; X64-NEXT: imull %esi, %ecx
320
318
; X64-NEXT: xorl %eax, %eax
321
- ; X64-NEXT: testl %ecx, %ecx
322
- ; X64-NEXT: setns %al
319
+ ; X64-NEXT: movl %edi, %ecx
320
+ ; X64-NEXT: xorl %esi, %ecx
321
+ ; X64-NEXT: sets %al
323
322
; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
324
323
; X64-NEXT: imull %esi, %edi
325
324
; X64-NEXT: cmovnol %edi, %eax
@@ -328,13 +327,12 @@ define i32 @func4(i32 %x, i32 %y) nounwind {
328
327
; X86-LABEL: func4:
329
328
; X86: # %bb.0:
330
329
; X86-NEXT: pushl %esi
331
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
332
330
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
333
- ; X86-NEXT: movl %eax, %esi
334
- ; X86-NEXT: imull %edx, %esi
331
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
335
332
; X86-NEXT: xorl %ecx, %ecx
336
- ; X86-NEXT: testl %esi, %esi
337
- ; X86-NEXT: setns %cl
333
+ ; X86-NEXT: movl %eax, %esi
334
+ ; X86-NEXT: xorl %edx, %esi
335
+ ; X86-NEXT: sets %cl
338
336
; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
339
337
; X86-NEXT: imull %edx, %eax
340
338
; X86-NEXT: cmovol %ecx, %eax
@@ -347,11 +345,10 @@ define i32 @func4(i32 %x, i32 %y) nounwind {
347
345
define i64 @func5 (i64 %x , i64 %y ) {
348
346
; X64-LABEL: func5:
349
347
; X64: # %bb.0:
350
- ; X64-NEXT: movq %rdi, %rax
351
- ; X64-NEXT: imulq %rsi, %rax
352
348
; X64-NEXT: xorl %ecx, %ecx
353
- ; X64-NEXT: testq %rax, %rax
354
- ; X64-NEXT: setns %cl
349
+ ; X64-NEXT: movq %rdi, %rax
350
+ ; X64-NEXT: xorq %rsi, %rax
351
+ ; X64-NEXT: sets %cl
355
352
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
356
353
; X64-NEXT: addq %rcx, %rax
357
354
; X64-NEXT: imulq %rsi, %rdi
@@ -360,47 +357,58 @@ define i64 @func5(i64 %x, i64 %y) {
360
357
;
361
358
; X86-LABEL: func5:
362
359
; X86: # %bb.0:
363
- ; X86-NEXT: pushl %edi
360
+ ; X86-NEXT: pushl %ebp
364
361
; X86-NEXT: .cfi_def_cfa_offset 8
365
- ; X86-NEXT: pushl %esi
362
+ ; X86-NEXT: pushl %ebx
366
363
; X86-NEXT: .cfi_def_cfa_offset 12
367
- ; X86-NEXT: pushl %eax
364
+ ; X86-NEXT: pushl %edi
368
365
; X86-NEXT: .cfi_def_cfa_offset 16
369
- ; X86-NEXT: .cfi_offset %esi, -12
370
- ; X86-NEXT: .cfi_offset %edi, -8
371
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
366
+ ; X86-NEXT: pushl %esi
367
+ ; X86-NEXT: .cfi_def_cfa_offset 20
368
+ ; X86-NEXT: pushl %eax
369
+ ; X86-NEXT: .cfi_def_cfa_offset 24
370
+ ; X86-NEXT: .cfi_offset %esi, -20
371
+ ; X86-NEXT: .cfi_offset %edi, -16
372
+ ; X86-NEXT: .cfi_offset %ebx, -12
373
+ ; X86-NEXT: .cfi_offset %ebp, -8
373
374
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
374
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
375
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
376
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
375
377
; X86-NEXT: movl $0, (%esp)
376
378
; X86-NEXT: movl %esp, %edi
379
+ ; X86-NEXT: xorl %ebp, %ebp
380
+ ; X86-NEXT: xorl %ebx, %ebx
381
+ ; X86-NEXT: movl %eax, %esi
382
+ ; X86-NEXT: xorl %ecx, %esi
383
+ ; X86-NEXT: movl $-1, %esi
384
+ ; X86-NEXT: cmovsl %ebp, %esi
385
+ ; X86-NEXT: sets %bl
386
+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
377
387
; X86-NEXT: pushl %edi
378
388
; X86-NEXT: .cfi_adjust_cfa_offset 4
379
- ; X86-NEXT: pushl %esi
389
+ ; X86-NEXT: pushl %ecx
380
390
; X86-NEXT: .cfi_adjust_cfa_offset 4
381
391
; X86-NEXT: pushl %edx
382
392
; X86-NEXT: .cfi_adjust_cfa_offset 4
383
- ; X86-NEXT: pushl %ecx
384
- ; X86-NEXT: .cfi_adjust_cfa_offset 4
385
393
; X86-NEXT: pushl %eax
386
394
; X86-NEXT: .cfi_adjust_cfa_offset 4
395
+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
396
+ ; X86-NEXT: .cfi_adjust_cfa_offset 4
387
397
; X86-NEXT: calll __mulodi4
388
398
; X86-NEXT: addl $20, %esp
389
399
; X86-NEXT: .cfi_adjust_cfa_offset -20
390
- ; X86-NEXT: xorl %ecx, %ecx
391
- ; X86-NEXT: testl %edx, %edx
392
- ; X86-NEXT: setns %cl
393
- ; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
394
- ; X86-NEXT: movl %edx, %esi
395
- ; X86-NEXT: sarl $31, %esi
396
400
; X86-NEXT: cmpl $0, (%esp)
397
401
; X86-NEXT: cmovnel %esi, %eax
398
- ; X86-NEXT: cmovnel %ecx , %edx
402
+ ; X86-NEXT: cmovnel %ebx , %edx
399
403
; X86-NEXT: addl $4, %esp
400
- ; X86-NEXT: .cfi_def_cfa_offset 12
404
+ ; X86-NEXT: .cfi_def_cfa_offset 20
401
405
; X86-NEXT: popl %esi
402
- ; X86-NEXT: .cfi_def_cfa_offset 8
406
+ ; X86-NEXT: .cfi_def_cfa_offset 16
403
407
; X86-NEXT: popl %edi
408
+ ; X86-NEXT: .cfi_def_cfa_offset 12
409
+ ; X86-NEXT: popl %ebx
410
+ ; X86-NEXT: .cfi_def_cfa_offset 8
411
+ ; X86-NEXT: popl %ebp
404
412
; X86-NEXT: .cfi_def_cfa_offset 4
405
413
; X86-NEXT: retl
406
414
%tmp = call i64 @llvm.smul.fix.sat.i64 (i64 %x , i64 %y , i32 0 )
@@ -414,36 +422,34 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
414
422
; X64-NEXT: shlb $4, %sil
415
423
; X64-NEXT: sarb $4, %sil
416
424
; X64-NEXT: shlb $4, %al
425
+ ; X64-NEXT: xorl %ecx, %ecx
426
+ ; X64-NEXT: movl %eax, %edx
427
+ ; X64-NEXT: xorb %sil, %dl
428
+ ; X64-NEXT: sets %cl
429
+ ; X64-NEXT: addl $127, %ecx
417
430
; X64-NEXT: # kill: def $al killed $al killed $eax
418
431
; X64-NEXT: imulb %sil
419
- ; X64-NEXT: seto %cl
420
- ; X64-NEXT: xorl %edx, %edx
421
- ; X64-NEXT: testb %al, %al
422
- ; X64-NEXT: setns %dl
423
- ; X64-NEXT: addl $127, %edx
424
432
; X64-NEXT: movzbl %al, %eax
425
- ; X64-NEXT: testb %cl, %cl
426
- ; X64-NEXT: cmovnel %edx, %eax
433
+ ; X64-NEXT: cmovol %ecx, %eax
427
434
; X64-NEXT: sarb $4, %al
428
435
; X64-NEXT: # kill: def $al killed $al killed $eax
429
436
; X64-NEXT: retq
430
437
;
431
438
; X86-LABEL: func6:
432
439
; X86: # %bb.0:
433
- ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
434
- ; X86-NEXT: shlb $4, %cl
435
- ; X86-NEXT: sarb $4, %cl
440
+ ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
441
+ ; X86-NEXT: shlb $4, %dl
442
+ ; X86-NEXT: sarb $4, %dl
436
443
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
437
444
; X86-NEXT: shlb $4, %al
438
- ; X86-NEXT: imulb %cl
439
- ; X86-NEXT: seto %dl
440
445
; X86-NEXT: xorl %ecx, %ecx
441
- ; X86-NEXT: testb %al, %al
442
- ; X86-NEXT: setns %cl
446
+ ; X86-NEXT: movb %al, %ah
447
+ ; X86-NEXT: xorb %dl, %ah
448
+ ; X86-NEXT: sets %cl
443
449
; X86-NEXT: addl $127, %ecx
450
+ ; X86-NEXT: imulb %dl
444
451
; X86-NEXT: movzbl %al, %eax
445
- ; X86-NEXT: testb %dl, %dl
446
- ; X86-NEXT: cmovnel %ecx, %eax
452
+ ; X86-NEXT: cmovol %ecx, %eax
447
453
; X86-NEXT: sarb $4, %al
448
454
; X86-NEXT: # kill: def $al killed $al killed $eax
449
455
; X86-NEXT: retl
@@ -454,59 +460,56 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
454
460
define <4 x i32 > @vec2 (<4 x i32 > %x , <4 x i32 > %y ) nounwind {
455
461
; X64-LABEL: vec2:
456
462
; X64: # %bb.0:
457
- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
463
+ ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
464
+ ; X64-NEXT: movd %xmm2, %eax
465
+ ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
458
466
; X64-NEXT: movd %xmm2, %ecx
459
- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
460
- ; X64-NEXT: movd %xmm2, %r8d
461
- ; X64-NEXT: movl %r8d, %edx
462
- ; X64-NEXT: imull %ecx, %edx
463
- ; X64-NEXT: xorl %esi, %esi
464
- ; X64-NEXT: testl %edx, %edx
465
- ; X64-NEXT: setns %sil
466
- ; X64-NEXT: addl $2147483647, %esi # imm = 0x7FFFFFFF
467
- ; X64-NEXT: imull %ecx, %r8d
468
- ; X64-NEXT: cmovol %esi, %r8d
469
- ; X64-NEXT: movd %xmm1, %edx
467
+ ; X64-NEXT: xorl %edx, %edx
468
+ ; X64-NEXT: movl %ecx, %esi
469
+ ; X64-NEXT: xorl %eax, %esi
470
+ ; X64-NEXT: sets %dl
471
+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
472
+ ; X64-NEXT: imull %eax, %ecx
473
+ ; X64-NEXT: cmovol %edx, %ecx
474
+ ; X64-NEXT: movd %ecx, %xmm2
475
+ ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
476
+ ; X64-NEXT: movd %xmm3, %eax
477
+ ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
478
+ ; X64-NEXT: movd %xmm3, %ecx
479
+ ; X64-NEXT: xorl %edx, %edx
480
+ ; X64-NEXT: movl %ecx, %esi
481
+ ; X64-NEXT: xorl %eax, %esi
482
+ ; X64-NEXT: sets %dl
483
+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
484
+ ; X64-NEXT: imull %eax, %ecx
485
+ ; X64-NEXT: cmovol %edx, %ecx
486
+ ; X64-NEXT: movd %ecx, %xmm3
487
+ ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
488
+ ; X64-NEXT: movd %xmm1, %eax
489
+ ; X64-NEXT: movd %xmm0, %ecx
490
+ ; X64-NEXT: xorl %edx, %edx
491
+ ; X64-NEXT: movl %ecx, %esi
492
+ ; X64-NEXT: xorl %eax, %esi
493
+ ; X64-NEXT: sets %dl
494
+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
495
+ ; X64-NEXT: imull %eax, %ecx
496
+ ; X64-NEXT: cmovol %edx, %ecx
497
+ ; X64-NEXT: movd %ecx, %xmm2
498
+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
499
+ ; X64-NEXT: movd %xmm1, %eax
500
+ ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
470
501
; X64-NEXT: movd %xmm0, %ecx
502
+ ; X64-NEXT: xorl %edx, %edx
471
503
; X64-NEXT: movl %ecx, %esi
472
- ; X64-NEXT: imull %edx, %esi
473
- ; X64-NEXT: xorl %edi, %edi
474
- ; X64-NEXT: testl %esi, %esi
475
- ; X64-NEXT: setns %dil
476
- ; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF
477
- ; X64-NEXT: imull %edx, %ecx
478
- ; X64-NEXT: cmovol %edi, %ecx
479
- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
480
- ; X64-NEXT: movd %xmm2, %edx
481
- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
482
- ; X64-NEXT: movd %xmm2, %esi
483
- ; X64-NEXT: movl %esi, %edi
484
- ; X64-NEXT: imull %edx, %edi
485
- ; X64-NEXT: xorl %eax, %eax
486
- ; X64-NEXT: testl %edi, %edi
487
- ; X64-NEXT: setns %al
488
- ; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
489
- ; X64-NEXT: imull %edx, %esi
490
- ; X64-NEXT: cmovol %eax, %esi
491
- ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
492
- ; X64-NEXT: movd %xmm1, %r9d
493
- ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
494
- ; X64-NEXT: movd %xmm0, %edx
495
- ; X64-NEXT: movl %edx, %edi
496
- ; X64-NEXT: imull %r9d, %edi
497
- ; X64-NEXT: xorl %eax, %eax
498
- ; X64-NEXT: testl %edi, %edi
499
- ; X64-NEXT: setns %al
500
- ; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
501
- ; X64-NEXT: imull %r9d, %edx
502
- ; X64-NEXT: cmovol %eax, %edx
503
- ; X64-NEXT: movd %edx, %xmm0
504
- ; X64-NEXT: movd %esi, %xmm1
505
- ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
504
+ ; X64-NEXT: xorl %eax, %esi
505
+ ; X64-NEXT: sets %dl
506
+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
507
+ ; X64-NEXT: imull %eax, %ecx
508
+ ; X64-NEXT: cmovol %edx, %ecx
506
509
; X64-NEXT: movd %ecx, %xmm0
507
- ; X64-NEXT: movd %r8d, % xmm2
508
- ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0], xmm2[0],xmm0[1],xmm2[1 ]
509
- ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
510
+ ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
511
+ ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0 ]
512
+ ; X64-NEXT: movdqa %xmm2, % xmm0
510
513
; X64-NEXT: retq
511
514
;
512
515
; X86-LABEL: vec2:
@@ -515,51 +518,47 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
515
518
; X86-NEXT: pushl %ebx
516
519
; X86-NEXT: pushl %edi
517
520
; X86-NEXT: pushl %esi
521
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
522
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
523
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
524
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
518
525
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
526
+ ; X86-NEXT: xorl %ebx, %ebx
527
+ ; X86-NEXT: movl %ecx, %edx
528
+ ; X86-NEXT: xorl %edi, %edx
519
529
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
520
- ; X86-NEXT: movl %ecx, %esi
521
- ; X86-NEXT: imull %edx, %esi
522
- ; X86-NEXT: xorl %eax, %eax
523
- ; X86-NEXT: testl %esi, %esi
524
- ; X86-NEXT: setns %al
525
- ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
526
- ; X86-NEXT: imull %edx, %ecx
527
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
528
- ; X86-NEXT: cmovol %eax, %ecx
529
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
530
+ ; X86-NEXT: sets %bl
531
+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
532
+ ; X86-NEXT: imull %edi, %ecx
533
+ ; X86-NEXT: cmovol %ebx, %ecx
534
+ ; X86-NEXT: xorl %ebx, %ebx
530
535
; X86-NEXT: movl %edx, %edi
531
- ; X86-NEXT: imull %esi, %edi
532
- ; X86-NEXT: xorl %eax, %eax
533
- ; X86-NEXT: testl %edi, %edi
534
- ; X86-NEXT: setns %al
535
- ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
536
- ; X86-NEXT: imull %esi, %edx
537
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
538
- ; X86-NEXT: cmovol %eax, %edx
539
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
540
- ; X86-NEXT: movl %esi, %ebx
541
- ; X86-NEXT: imull %edi, %ebx
542
- ; X86-NEXT: xorl %eax, %eax
543
- ; X86-NEXT: testl %ebx, %ebx
544
- ; X86-NEXT: setns %al
545
- ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
546
- ; X86-NEXT: imull %edi, %esi
536
+ ; X86-NEXT: xorl %ebp, %edi
547
537
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
548
- ; X86-NEXT: cmovol %eax, %esi
549
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
538
+ ; X86-NEXT: sets %bl
539
+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
540
+ ; X86-NEXT: imull %ebp, %edx
541
+ ; X86-NEXT: cmovol %ebx, %edx
542
+ ; X86-NEXT: xorl %ebx, %ebx
550
543
; X86-NEXT: movl %edi, %ebp
551
- ; X86-NEXT: imull %eax, %ebp
544
+ ; X86-NEXT: xorl %esi, %ebp
545
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
546
+ ; X86-NEXT: sets %bl
547
+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
548
+ ; X86-NEXT: imull %esi, %edi
549
+ ; X86-NEXT: cmovol %ebx, %edi
552
550
; X86-NEXT: xorl %ebx, %ebx
553
- ; X86-NEXT: testl %ebp, %ebp
554
- ; X86-NEXT: setns %bl
551
+ ; X86-NEXT: movl %ebp, %esi
552
+ ; X86-NEXT: xorl %eax, %esi
553
+ ; X86-NEXT: sets %bl
555
554
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
556
- ; X86-NEXT: imull %eax, %edi
555
+ ; X86-NEXT: imull %eax, %ebp
556
+ ; X86-NEXT: cmovol %ebx, %ebp
557
557
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
558
- ; X86-NEXT: cmovol %ebx, %edi
559
- ; X86-NEXT: movl %ecx, 12(%eax)
560
- ; X86-NEXT: movl %edx, 8(%eax)
561
- ; X86-NEXT: movl %esi, 4(%eax)
562
- ; X86-NEXT: movl %edi, (%eax)
558
+ ; X86-NEXT: movl %ebp, 12(%eax)
559
+ ; X86-NEXT: movl %edi, 8(%eax)
560
+ ; X86-NEXT: movl %edx, 4(%eax)
561
+ ; X86-NEXT: movl %ecx, (%eax)
563
562
; X86-NEXT: popl %esi
564
563
; X86-NEXT: popl %edi
565
564
; X86-NEXT: popl %ebx
0 commit comments