Skip to content

Commit 789f012

Browse files
committed
[SelectionDAG] Fix miscompile bugs related to smul.fix.sat with scale zero
When expanding a SMULFIXSAT ISD node (usually originating from a smul.fix.sat intrinsic) we've applied some optimizations for the special case when the scale is zero. The idea has been that it would be cheaper to use an SMULO instruction (if legal) to perform the multiplication and at the same time detect any overflow. And in case of overflow we could use some SELECT:s to replace the result with the saturated min/max value. The only tricky part is to know if we overflowed on the min or max value, i.e. if the product is positive or negative. Unfortunately the implementation has been incorrect as it has looked at the product returned by the SMULO to determine the sign of the product. In case of overflow that product is truncated and won't give us the correct sign bit. This patch is adding an extra XOR of the multiplication operands, which is used to determine the sign of the non truncated product. This patch fixes PR51677. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D108938
1 parent 0bbb2d0 commit 789f012

File tree

4 files changed

+146
-146
lines changed

4 files changed

+146
-146
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3470,8 +3470,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
34703470
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
34713471
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
34723472
SDValue Zero = DAG.getConstant(0, dl, VT);
3473-
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
3474-
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
3473+
// Xor the inputs, if resulting sign bit is 0 the product will be
3474+
// positive, else negative.
3475+
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
3476+
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
3477+
Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
34753478
Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
34763479
} else {
34773480
// For unsigned multiplication, we only need to check the max since we

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8180,8 +8180,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
81808180
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
81818181
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
81828182
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
8183-
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
8184-
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
8183+
// Xor the inputs, if resulting sign bit is 0 the product will be
8184+
// positive, else negative.
8185+
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
8186+
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
8187+
Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
81858188
return DAG.getSelect(dl, VT, Overflow, Result, Product);
81868189
} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
81878190
SDValue Result =

llvm/test/CodeGen/X86/smul_fix_sat.ll

Lines changed: 134 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
315315
define i32 @func4(i32 %x, i32 %y) nounwind {
316316
; X64-LABEL: func4:
317317
; X64: # %bb.0:
318-
; X64-NEXT: movl %edi, %ecx
319-
; X64-NEXT: imull %esi, %ecx
320318
; X64-NEXT: xorl %eax, %eax
321-
; X64-NEXT: testl %ecx, %ecx
322-
; X64-NEXT: setns %al
319+
; X64-NEXT: movl %edi, %ecx
320+
; X64-NEXT: xorl %esi, %ecx
321+
; X64-NEXT: sets %al
323322
; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
324323
; X64-NEXT: imull %esi, %edi
325324
; X64-NEXT: cmovnol %edi, %eax
@@ -328,13 +327,12 @@ define i32 @func4(i32 %x, i32 %y) nounwind {
328327
; X86-LABEL: func4:
329328
; X86: # %bb.0:
330329
; X86-NEXT: pushl %esi
331-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
332330
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
333-
; X86-NEXT: movl %eax, %esi
334-
; X86-NEXT: imull %edx, %esi
331+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
335332
; X86-NEXT: xorl %ecx, %ecx
336-
; X86-NEXT: testl %esi, %esi
337-
; X86-NEXT: setns %cl
333+
; X86-NEXT: movl %eax, %esi
334+
; X86-NEXT: xorl %edx, %esi
335+
; X86-NEXT: sets %cl
338336
; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
339337
; X86-NEXT: imull %edx, %eax
340338
; X86-NEXT: cmovol %ecx, %eax
@@ -347,11 +345,10 @@ define i32 @func4(i32 %x, i32 %y) nounwind {
347345
define i64 @func5(i64 %x, i64 %y) {
348346
; X64-LABEL: func5:
349347
; X64: # %bb.0:
350-
; X64-NEXT: movq %rdi, %rax
351-
; X64-NEXT: imulq %rsi, %rax
352348
; X64-NEXT: xorl %ecx, %ecx
353-
; X64-NEXT: testq %rax, %rax
354-
; X64-NEXT: setns %cl
349+
; X64-NEXT: movq %rdi, %rax
350+
; X64-NEXT: xorq %rsi, %rax
351+
; X64-NEXT: sets %cl
355352
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
356353
; X64-NEXT: addq %rcx, %rax
357354
; X64-NEXT: imulq %rsi, %rdi
@@ -360,47 +357,58 @@ define i64 @func5(i64 %x, i64 %y) {
360357
;
361358
; X86-LABEL: func5:
362359
; X86: # %bb.0:
363-
; X86-NEXT: pushl %edi
360+
; X86-NEXT: pushl %ebp
364361
; X86-NEXT: .cfi_def_cfa_offset 8
365-
; X86-NEXT: pushl %esi
362+
; X86-NEXT: pushl %ebx
366363
; X86-NEXT: .cfi_def_cfa_offset 12
367-
; X86-NEXT: pushl %eax
364+
; X86-NEXT: pushl %edi
368365
; X86-NEXT: .cfi_def_cfa_offset 16
369-
; X86-NEXT: .cfi_offset %esi, -12
370-
; X86-NEXT: .cfi_offset %edi, -8
371-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
366+
; X86-NEXT: pushl %esi
367+
; X86-NEXT: .cfi_def_cfa_offset 20
368+
; X86-NEXT: pushl %eax
369+
; X86-NEXT: .cfi_def_cfa_offset 24
370+
; X86-NEXT: .cfi_offset %esi, -20
371+
; X86-NEXT: .cfi_offset %edi, -16
372+
; X86-NEXT: .cfi_offset %ebx, -12
373+
; X86-NEXT: .cfi_offset %ebp, -8
373374
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
374-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
375+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
376+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
375377
; X86-NEXT: movl $0, (%esp)
376378
; X86-NEXT: movl %esp, %edi
379+
; X86-NEXT: xorl %ebp, %ebp
380+
; X86-NEXT: xorl %ebx, %ebx
381+
; X86-NEXT: movl %eax, %esi
382+
; X86-NEXT: xorl %ecx, %esi
383+
; X86-NEXT: movl $-1, %esi
384+
; X86-NEXT: cmovsl %ebp, %esi
385+
; X86-NEXT: sets %bl
386+
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
377387
; X86-NEXT: pushl %edi
378388
; X86-NEXT: .cfi_adjust_cfa_offset 4
379-
; X86-NEXT: pushl %esi
389+
; X86-NEXT: pushl %ecx
380390
; X86-NEXT: .cfi_adjust_cfa_offset 4
381391
; X86-NEXT: pushl %edx
382392
; X86-NEXT: .cfi_adjust_cfa_offset 4
383-
; X86-NEXT: pushl %ecx
384-
; X86-NEXT: .cfi_adjust_cfa_offset 4
385393
; X86-NEXT: pushl %eax
386394
; X86-NEXT: .cfi_adjust_cfa_offset 4
395+
; X86-NEXT: pushl {{[0-9]+}}(%esp)
396+
; X86-NEXT: .cfi_adjust_cfa_offset 4
387397
; X86-NEXT: calll __mulodi4
388398
; X86-NEXT: addl $20, %esp
389399
; X86-NEXT: .cfi_adjust_cfa_offset -20
390-
; X86-NEXT: xorl %ecx, %ecx
391-
; X86-NEXT: testl %edx, %edx
392-
; X86-NEXT: setns %cl
393-
; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
394-
; X86-NEXT: movl %edx, %esi
395-
; X86-NEXT: sarl $31, %esi
396400
; X86-NEXT: cmpl $0, (%esp)
397401
; X86-NEXT: cmovnel %esi, %eax
398-
; X86-NEXT: cmovnel %ecx, %edx
402+
; X86-NEXT: cmovnel %ebx, %edx
399403
; X86-NEXT: addl $4, %esp
400-
; X86-NEXT: .cfi_def_cfa_offset 12
404+
; X86-NEXT: .cfi_def_cfa_offset 20
401405
; X86-NEXT: popl %esi
402-
; X86-NEXT: .cfi_def_cfa_offset 8
406+
; X86-NEXT: .cfi_def_cfa_offset 16
403407
; X86-NEXT: popl %edi
408+
; X86-NEXT: .cfi_def_cfa_offset 12
409+
; X86-NEXT: popl %ebx
410+
; X86-NEXT: .cfi_def_cfa_offset 8
411+
; X86-NEXT: popl %ebp
404412
; X86-NEXT: .cfi_def_cfa_offset 4
405413
; X86-NEXT: retl
406414
%tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 0)
@@ -414,36 +422,34 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
414422
; X64-NEXT: shlb $4, %sil
415423
; X64-NEXT: sarb $4, %sil
416424
; X64-NEXT: shlb $4, %al
425+
; X64-NEXT: xorl %ecx, %ecx
426+
; X64-NEXT: movl %eax, %edx
427+
; X64-NEXT: xorb %sil, %dl
428+
; X64-NEXT: sets %cl
429+
; X64-NEXT: addl $127, %ecx
417430
; X64-NEXT: # kill: def $al killed $al killed $eax
418431
; X64-NEXT: imulb %sil
419-
; X64-NEXT: seto %cl
420-
; X64-NEXT: xorl %edx, %edx
421-
; X64-NEXT: testb %al, %al
422-
; X64-NEXT: setns %dl
423-
; X64-NEXT: addl $127, %edx
424432
; X64-NEXT: movzbl %al, %eax
425-
; X64-NEXT: testb %cl, %cl
426-
; X64-NEXT: cmovnel %edx, %eax
433+
; X64-NEXT: cmovol %ecx, %eax
427434
; X64-NEXT: sarb $4, %al
428435
; X64-NEXT: # kill: def $al killed $al killed $eax
429436
; X64-NEXT: retq
430437
;
431438
; X86-LABEL: func6:
432439
; X86: # %bb.0:
433-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
434-
; X86-NEXT: shlb $4, %cl
435-
; X86-NEXT: sarb $4, %cl
440+
; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
441+
; X86-NEXT: shlb $4, %dl
442+
; X86-NEXT: sarb $4, %dl
436443
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
437444
; X86-NEXT: shlb $4, %al
438-
; X86-NEXT: imulb %cl
439-
; X86-NEXT: seto %dl
440445
; X86-NEXT: xorl %ecx, %ecx
441-
; X86-NEXT: testb %al, %al
442-
; X86-NEXT: setns %cl
446+
; X86-NEXT: movb %al, %ah
447+
; X86-NEXT: xorb %dl, %ah
448+
; X86-NEXT: sets %cl
443449
; X86-NEXT: addl $127, %ecx
450+
; X86-NEXT: imulb %dl
444451
; X86-NEXT: movzbl %al, %eax
445-
; X86-NEXT: testb %dl, %dl
446-
; X86-NEXT: cmovnel %ecx, %eax
452+
; X86-NEXT: cmovol %ecx, %eax
447453
; X86-NEXT: sarb $4, %al
448454
; X86-NEXT: # kill: def $al killed $al killed $eax
449455
; X86-NEXT: retl
@@ -454,59 +460,56 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
454460
define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
455461
; X64-LABEL: vec2:
456462
; X64: # %bb.0:
457-
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
463+
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
464+
; X64-NEXT: movd %xmm2, %eax
465+
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
458466
; X64-NEXT: movd %xmm2, %ecx
459-
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
460-
; X64-NEXT: movd %xmm2, %r8d
461-
; X64-NEXT: movl %r8d, %edx
462-
; X64-NEXT: imull %ecx, %edx
463-
; X64-NEXT: xorl %esi, %esi
464-
; X64-NEXT: testl %edx, %edx
465-
; X64-NEXT: setns %sil
466-
; X64-NEXT: addl $2147483647, %esi # imm = 0x7FFFFFFF
467-
; X64-NEXT: imull %ecx, %r8d
468-
; X64-NEXT: cmovol %esi, %r8d
469-
; X64-NEXT: movd %xmm1, %edx
467+
; X64-NEXT: xorl %edx, %edx
468+
; X64-NEXT: movl %ecx, %esi
469+
; X64-NEXT: xorl %eax, %esi
470+
; X64-NEXT: sets %dl
471+
; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
472+
; X64-NEXT: imull %eax, %ecx
473+
; X64-NEXT: cmovol %edx, %ecx
474+
; X64-NEXT: movd %ecx, %xmm2
475+
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
476+
; X64-NEXT: movd %xmm3, %eax
477+
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
478+
; X64-NEXT: movd %xmm3, %ecx
479+
; X64-NEXT: xorl %edx, %edx
480+
; X64-NEXT: movl %ecx, %esi
481+
; X64-NEXT: xorl %eax, %esi
482+
; X64-NEXT: sets %dl
483+
; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
484+
; X64-NEXT: imull %eax, %ecx
485+
; X64-NEXT: cmovol %edx, %ecx
486+
; X64-NEXT: movd %ecx, %xmm3
487+
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
488+
; X64-NEXT: movd %xmm1, %eax
489+
; X64-NEXT: movd %xmm0, %ecx
490+
; X64-NEXT: xorl %edx, %edx
491+
; X64-NEXT: movl %ecx, %esi
492+
; X64-NEXT: xorl %eax, %esi
493+
; X64-NEXT: sets %dl
494+
; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
495+
; X64-NEXT: imull %eax, %ecx
496+
; X64-NEXT: cmovol %edx, %ecx
497+
; X64-NEXT: movd %ecx, %xmm2
498+
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
499+
; X64-NEXT: movd %xmm1, %eax
500+
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
470501
; X64-NEXT: movd %xmm0, %ecx
502+
; X64-NEXT: xorl %edx, %edx
471503
; X64-NEXT: movl %ecx, %esi
472-
; X64-NEXT: imull %edx, %esi
473-
; X64-NEXT: xorl %edi, %edi
474-
; X64-NEXT: testl %esi, %esi
475-
; X64-NEXT: setns %dil
476-
; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF
477-
; X64-NEXT: imull %edx, %ecx
478-
; X64-NEXT: cmovol %edi, %ecx
479-
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
480-
; X64-NEXT: movd %xmm2, %edx
481-
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
482-
; X64-NEXT: movd %xmm2, %esi
483-
; X64-NEXT: movl %esi, %edi
484-
; X64-NEXT: imull %edx, %edi
485-
; X64-NEXT: xorl %eax, %eax
486-
; X64-NEXT: testl %edi, %edi
487-
; X64-NEXT: setns %al
488-
; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
489-
; X64-NEXT: imull %edx, %esi
490-
; X64-NEXT: cmovol %eax, %esi
491-
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
492-
; X64-NEXT: movd %xmm1, %r9d
493-
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
494-
; X64-NEXT: movd %xmm0, %edx
495-
; X64-NEXT: movl %edx, %edi
496-
; X64-NEXT: imull %r9d, %edi
497-
; X64-NEXT: xorl %eax, %eax
498-
; X64-NEXT: testl %edi, %edi
499-
; X64-NEXT: setns %al
500-
; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
501-
; X64-NEXT: imull %r9d, %edx
502-
; X64-NEXT: cmovol %eax, %edx
503-
; X64-NEXT: movd %edx, %xmm0
504-
; X64-NEXT: movd %esi, %xmm1
505-
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
504+
; X64-NEXT: xorl %eax, %esi
505+
; X64-NEXT: sets %dl
506+
; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
507+
; X64-NEXT: imull %eax, %ecx
508+
; X64-NEXT: cmovol %edx, %ecx
506509
; X64-NEXT: movd %ecx, %xmm0
507-
; X64-NEXT: movd %r8d, %xmm2
508-
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
509-
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
510+
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
511+
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
512+
; X64-NEXT: movdqa %xmm2, %xmm0
510513
; X64-NEXT: retq
511514
;
512515
; X86-LABEL: vec2:
@@ -515,51 +518,47 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
515518
; X86-NEXT: pushl %ebx
516519
; X86-NEXT: pushl %edi
517520
; X86-NEXT: pushl %esi
521+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
522+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
523+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
524+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
518525
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
526+
; X86-NEXT: xorl %ebx, %ebx
527+
; X86-NEXT: movl %ecx, %edx
528+
; X86-NEXT: xorl %edi, %edx
519529
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
520-
; X86-NEXT: movl %ecx, %esi
521-
; X86-NEXT: imull %edx, %esi
522-
; X86-NEXT: xorl %eax, %eax
523-
; X86-NEXT: testl %esi, %esi
524-
; X86-NEXT: setns %al
525-
; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
526-
; X86-NEXT: imull %edx, %ecx
527-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
528-
; X86-NEXT: cmovol %eax, %ecx
529-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
530+
; X86-NEXT: sets %bl
531+
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
532+
; X86-NEXT: imull %edi, %ecx
533+
; X86-NEXT: cmovol %ebx, %ecx
534+
; X86-NEXT: xorl %ebx, %ebx
530535
; X86-NEXT: movl %edx, %edi
531-
; X86-NEXT: imull %esi, %edi
532-
; X86-NEXT: xorl %eax, %eax
533-
; X86-NEXT: testl %edi, %edi
534-
; X86-NEXT: setns %al
535-
; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
536-
; X86-NEXT: imull %esi, %edx
537-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
538-
; X86-NEXT: cmovol %eax, %edx
539-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
540-
; X86-NEXT: movl %esi, %ebx
541-
; X86-NEXT: imull %edi, %ebx
542-
; X86-NEXT: xorl %eax, %eax
543-
; X86-NEXT: testl %ebx, %ebx
544-
; X86-NEXT: setns %al
545-
; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
546-
; X86-NEXT: imull %edi, %esi
536+
; X86-NEXT: xorl %ebp, %edi
547537
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
548-
; X86-NEXT: cmovol %eax, %esi
549-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
538+
; X86-NEXT: sets %bl
539+
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
540+
; X86-NEXT: imull %ebp, %edx
541+
; X86-NEXT: cmovol %ebx, %edx
542+
; X86-NEXT: xorl %ebx, %ebx
550543
; X86-NEXT: movl %edi, %ebp
551-
; X86-NEXT: imull %eax, %ebp
544+
; X86-NEXT: xorl %esi, %ebp
545+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
546+
; X86-NEXT: sets %bl
547+
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
548+
; X86-NEXT: imull %esi, %edi
549+
; X86-NEXT: cmovol %ebx, %edi
552550
; X86-NEXT: xorl %ebx, %ebx
553-
; X86-NEXT: testl %ebp, %ebp
554-
; X86-NEXT: setns %bl
551+
; X86-NEXT: movl %ebp, %esi
552+
; X86-NEXT: xorl %eax, %esi
553+
; X86-NEXT: sets %bl
555554
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
556-
; X86-NEXT: imull %eax, %edi
555+
; X86-NEXT: imull %eax, %ebp
556+
; X86-NEXT: cmovol %ebx, %ebp
557557
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
558-
; X86-NEXT: cmovol %ebx, %edi
559-
; X86-NEXT: movl %ecx, 12(%eax)
560-
; X86-NEXT: movl %edx, 8(%eax)
561-
; X86-NEXT: movl %esi, 4(%eax)
562-
; X86-NEXT: movl %edi, (%eax)
558+
; X86-NEXT: movl %ebp, 12(%eax)
559+
; X86-NEXT: movl %edi, 8(%eax)
560+
; X86-NEXT: movl %edx, 4(%eax)
561+
; X86-NEXT: movl %ecx, (%eax)
563562
; X86-NEXT: popl %esi
564563
; X86-NEXT: popl %edi
565564
; X86-NEXT: popl %ebx

0 commit comments

Comments
 (0)