Skip to content

Commit ca5247b

Browse files
committed
[DAGCombiner] Don't skip no overflow check on UMULO if the first computeKnownBits call doesn't return any 0 bits.
Even if the first computeKnownBits call doesn't have any zero bits it is possible the other operand has bitwidth-1 leading zero. In that case overflow is still impossible. So always call computeKnownBits for both operands.
1 parent 1d6a6f3 commit ca5247b

File tree

3 files changed

+26
-233
lines changed

3 files changed

+26
-233
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4672,14 +4672,12 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
46724672
DAG.getConstant(0, DL, CarryVT));
46734673
} else {
46744674
KnownBits N1Known = DAG.computeKnownBits(N1);
4675-
if (N1Known.Zero.getBoolValue()) {
4676-
KnownBits N0Known = DAG.computeKnownBits(N0);
4677-
bool Overflow;
4678-
(void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4679-
if (!Overflow)
4680-
return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4681-
DAG.getConstant(0, DL, CarryVT));
4682-
}
4675+
KnownBits N0Known = DAG.computeKnownBits(N0);
4676+
bool Overflow;
4677+
(void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4678+
if (!Overflow)
4679+
return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4680+
DAG.getConstant(0, DL, CarryVT));
46834681
}
46844682

46854683
return SDValue();

llvm/test/CodeGen/AArch64/vec_umulo.ll

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -291,23 +291,18 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
291291
define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind {
292292
; CHECK-LABEL: umulo_v4i1:
293293
; CHECK: // %bb.0:
294-
; CHECK-NEXT: movi v2.4h, #1
295-
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
296-
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
297-
; CHECK-NEXT: mul v1.4h, v0.4h, v1.4h
298-
; CHECK-NEXT: umov w9, v1.h[1]
299-
; CHECK-NEXT: umov w8, v1.h[0]
294+
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
295+
; CHECK-NEXT: umov w9, v0.h[1]
296+
; CHECK-NEXT: umov w8, v0.h[0]
300297
; CHECK-NEXT: and w9, w9, #0x1
301298
; CHECK-NEXT: bfi w8, w9, #1, #1
302-
; CHECK-NEXT: umov w9, v1.h[2]
299+
; CHECK-NEXT: umov w9, v0.h[2]
303300
; CHECK-NEXT: and w9, w9, #0x1
304-
; CHECK-NEXT: ushr v0.4h, v1.4h, #1
305301
; CHECK-NEXT: bfi w8, w9, #2, #1
306-
; CHECK-NEXT: umov w9, v1.h[3]
307-
; CHECK-NEXT: cmtst v0.4h, v0.4h, v0.4h
302+
; CHECK-NEXT: umov w9, v0.h[3]
308303
; CHECK-NEXT: bfi w8, w9, #3, #29
309-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
310304
; CHECK-NEXT: and w8, w8, #0xf
305+
; CHECK-NEXT: movi v0.2d, #0000000000000000
311306
; CHECK-NEXT: strb w8, [x0]
312307
; CHECK-NEXT: ret
313308
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)

llvm/test/CodeGen/X86/vec_umulo.ll

Lines changed: 14 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -3172,240 +3172,40 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
31723172
define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind {
31733173
; SSE-LABEL: umulo_v4i1:
31743174
; SSE: # %bb.0:
3175-
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
3176-
; SSE-NEXT: pand %xmm2, %xmm1
3177-
; SSE-NEXT: pand %xmm2, %xmm0
3178-
; SSE-NEXT: pmaddwd %xmm1, %xmm0
3179-
; SSE-NEXT: movdqa %xmm0, %xmm1
3180-
; SSE-NEXT: psrld $1, %xmm1
3181-
; SSE-NEXT: pxor %xmm2, %xmm2
3182-
; SSE-NEXT: pcmpeqd %xmm2, %xmm1
3183-
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
3184-
; SSE-NEXT: pxor %xmm2, %xmm1
3175+
; SSE-NEXT: pand %xmm1, %xmm0
31853176
; SSE-NEXT: pslld $31, %xmm0
31863177
; SSE-NEXT: movmskps %xmm0, %eax
31873178
; SSE-NEXT: movb %al, (%rdi)
3188-
; SSE-NEXT: movdqa %xmm1, %xmm0
3179+
; SSE-NEXT: xorps %xmm0, %xmm0
31893180
; SSE-NEXT: retq
31903181
;
3191-
; AVX1-LABEL: umulo_v4i1:
3192-
; AVX1: # %bb.0:
3193-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
3194-
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
3195-
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
3196-
; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1
3197-
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm0
3198-
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
3199-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
3200-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3201-
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
3202-
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
3203-
; AVX1-NEXT: vmovmskps %xmm1, %eax
3204-
; AVX1-NEXT: movb %al, (%rdi)
3205-
; AVX1-NEXT: retq
3206-
;
3207-
; AVX2-LABEL: umulo_v4i1:
3208-
; AVX2: # %bb.0:
3209-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
3210-
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
3211-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
3212-
; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm1
3213-
; AVX2-NEXT: vpsrld $1, %xmm1, %xmm0
3214-
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
3215-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
3216-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
3217-
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
3218-
; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
3219-
; AVX2-NEXT: vmovmskps %xmm1, %eax
3220-
; AVX2-NEXT: movb %al, (%rdi)
3221-
; AVX2-NEXT: retq
3182+
; AVX-LABEL: umulo_v4i1:
3183+
; AVX: # %bb.0:
3184+
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
3185+
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
3186+
; AVX-NEXT: vmovmskps %xmm0, %eax
3187+
; AVX-NEXT: movb %al, (%rdi)
3188+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
3189+
; AVX-NEXT: retq
32223190
;
32233191
; AVX512F-LABEL: umulo_v4i1:
32243192
; AVX512F: # %bb.0:
3225-
; AVX512F-NEXT: pushq %rbx
3193+
; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0
32263194
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
32273195
; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k0
3228-
; AVX512F-NEXT: kshiftrw $3, %k0, %k1
3229-
; AVX512F-NEXT: kmovw %k1, %r8d
3230-
; AVX512F-NEXT: andb $1, %r8b
3231-
; AVX512F-NEXT: vpslld $31, %xmm1, %xmm0
3232-
; AVX512F-NEXT: vptestmd %xmm0, %xmm0, %k1
3233-
; AVX512F-NEXT: kshiftrw $3, %k1, %k2
3234-
; AVX512F-NEXT: kmovw %k2, %r9d
3235-
; AVX512F-NEXT: andb $1, %r9b
3236-
; AVX512F-NEXT: kshiftrw $2, %k0, %k2
3237-
; AVX512F-NEXT: kmovw %k2, %r10d
3238-
; AVX512F-NEXT: andb $1, %r10b
3239-
; AVX512F-NEXT: kshiftrw $2, %k1, %k2
3240-
; AVX512F-NEXT: kmovw %k2, %r11d
3241-
; AVX512F-NEXT: andb $1, %r11b
3242-
; AVX512F-NEXT: kshiftrw $1, %k0, %k2
3243-
; AVX512F-NEXT: kmovw %k2, %ecx
3244-
; AVX512F-NEXT: andb $1, %cl
3245-
; AVX512F-NEXT: kshiftrw $1, %k1, %k2
3246-
; AVX512F-NEXT: kmovw %k2, %edx
3247-
; AVX512F-NEXT: andb $1, %dl
3248-
; AVX512F-NEXT: kmovw %k0, %eax
3249-
; AVX512F-NEXT: andb $1, %al
3250-
; AVX512F-NEXT: kmovw %k1, %esi
3251-
; AVX512F-NEXT: andb $1, %sil
3252-
; AVX512F-NEXT: movw $-3, %bx
3253-
; AVX512F-NEXT: kmovw %ebx, %k0
3254-
; AVX512F-NEXT: # kill: def $al killed $al killed $eax
3255-
; AVX512F-NEXT: mulb %sil
3256-
; AVX512F-NEXT: movl %eax, %esi
3257-
; AVX512F-NEXT: testb $2, %al
3258-
; AVX512F-NEXT: setne %al
3259-
; AVX512F-NEXT: kmovw %eax, %k1
3260-
; AVX512F-NEXT: kandw %k0, %k1, %k1
3261-
; AVX512F-NEXT: movl %ecx, %eax
3262-
; AVX512F-NEXT: mulb %dl
3263-
; AVX512F-NEXT: movl %eax, %ecx
3264-
; AVX512F-NEXT: testb $2, %al
3265-
; AVX512F-NEXT: setne %al
3266-
; AVX512F-NEXT: kmovw %eax, %k2
3267-
; AVX512F-NEXT: kshiftlw $15, %k2, %k2
3268-
; AVX512F-NEXT: kshiftrw $14, %k2, %k2
3269-
; AVX512F-NEXT: korw %k2, %k1, %k2
3270-
; AVX512F-NEXT: movw $-5, %ax
3271-
; AVX512F-NEXT: kmovw %eax, %k1
3272-
; AVX512F-NEXT: kandw %k1, %k2, %k2
3273-
; AVX512F-NEXT: movl %r10d, %eax
3274-
; AVX512F-NEXT: mulb %r11b
3275-
; AVX512F-NEXT: movl %eax, %edx
3276-
; AVX512F-NEXT: testb $2, %al
3277-
; AVX512F-NEXT: setne %al
3278-
; AVX512F-NEXT: kmovw %eax, %k3
3279-
; AVX512F-NEXT: kshiftlw $2, %k3, %k3
3280-
; AVX512F-NEXT: korw %k3, %k2, %k2
3281-
; AVX512F-NEXT: kshiftlw $13, %k2, %k2
3282-
; AVX512F-NEXT: kshiftrw $13, %k2, %k2
3283-
; AVX512F-NEXT: movl %r8d, %eax
3284-
; AVX512F-NEXT: mulb %r9b
3285-
; AVX512F-NEXT: # kill: def $al killed $al def $eax
3286-
; AVX512F-NEXT: testb $2, %al
3287-
; AVX512F-NEXT: setne %bl
3288-
; AVX512F-NEXT: kmovw %ebx, %k3
3289-
; AVX512F-NEXT: kshiftlw $3, %k3, %k3
3290-
; AVX512F-NEXT: korw %k3, %k2, %k2
3291-
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
3292-
; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
3293-
; AVX512F-NEXT: andl $1, %esi
3294-
; AVX512F-NEXT: kmovw %esi, %k2
3295-
; AVX512F-NEXT: kandw %k0, %k2, %k0
3296-
; AVX512F-NEXT: kmovw %ecx, %k2
3297-
; AVX512F-NEXT: kshiftlw $15, %k2, %k2
3298-
; AVX512F-NEXT: kshiftrw $14, %k2, %k2
3299-
; AVX512F-NEXT: korw %k2, %k0, %k0
3300-
; AVX512F-NEXT: kandw %k1, %k0, %k0
3301-
; AVX512F-NEXT: kmovw %edx, %k1
3302-
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
3303-
; AVX512F-NEXT: kshiftrw $13, %k1, %k1
3304-
; AVX512F-NEXT: korw %k1, %k0, %k0
3305-
; AVX512F-NEXT: movw $-9, %cx
3306-
; AVX512F-NEXT: kmovw %ecx, %k1
3307-
; AVX512F-NEXT: kandw %k1, %k0, %k0
3308-
; AVX512F-NEXT: kmovw %eax, %k1
3309-
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
3310-
; AVX512F-NEXT: kshiftrw $12, %k1, %k1
3311-
; AVX512F-NEXT: korw %k1, %k0, %k0
33123196
; AVX512F-NEXT: kmovw %k0, %eax
33133197
; AVX512F-NEXT: movb %al, (%rdi)
3314-
; AVX512F-NEXT: popq %rbx
3198+
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
33153199
; AVX512F-NEXT: retq
33163200
;
33173201
; AVX512BW-LABEL: umulo_v4i1:
33183202
; AVX512BW: # %bb.0:
3319-
; AVX512BW-NEXT: pushq %rbx
3203+
; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0
33203204
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
33213205
; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k0
3322-
; AVX512BW-NEXT: kshiftrw $3, %k0, %k1
3323-
; AVX512BW-NEXT: kmovd %k1, %r8d
3324-
; AVX512BW-NEXT: andb $1, %r8b
3325-
; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0
3326-
; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k1
3327-
; AVX512BW-NEXT: kshiftrw $3, %k1, %k2
3328-
; AVX512BW-NEXT: kmovd %k2, %r9d
3329-
; AVX512BW-NEXT: andb $1, %r9b
3330-
; AVX512BW-NEXT: kshiftrw $2, %k0, %k2
3331-
; AVX512BW-NEXT: kmovd %k2, %r10d
3332-
; AVX512BW-NEXT: andb $1, %r10b
3333-
; AVX512BW-NEXT: kshiftrw $2, %k1, %k2
3334-
; AVX512BW-NEXT: kmovd %k2, %r11d
3335-
; AVX512BW-NEXT: andb $1, %r11b
3336-
; AVX512BW-NEXT: kshiftrw $1, %k0, %k2
3337-
; AVX512BW-NEXT: kmovd %k2, %ecx
3338-
; AVX512BW-NEXT: andb $1, %cl
3339-
; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
3340-
; AVX512BW-NEXT: kmovd %k2, %edx
3341-
; AVX512BW-NEXT: andb $1, %dl
3342-
; AVX512BW-NEXT: kmovd %k0, %eax
3343-
; AVX512BW-NEXT: andb $1, %al
3344-
; AVX512BW-NEXT: kmovd %k1, %esi
3345-
; AVX512BW-NEXT: andb $1, %sil
3346-
; AVX512BW-NEXT: movw $-3, %bx
3347-
; AVX512BW-NEXT: kmovd %ebx, %k0
3348-
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
3349-
; AVX512BW-NEXT: mulb %sil
3350-
; AVX512BW-NEXT: movl %eax, %esi
3351-
; AVX512BW-NEXT: testb $2, %al
3352-
; AVX512BW-NEXT: setne %al
3353-
; AVX512BW-NEXT: kmovd %eax, %k1
3354-
; AVX512BW-NEXT: kandw %k0, %k1, %k1
3355-
; AVX512BW-NEXT: movl %ecx, %eax
3356-
; AVX512BW-NEXT: mulb %dl
3357-
; AVX512BW-NEXT: movl %eax, %ecx
3358-
; AVX512BW-NEXT: testb $2, %al
3359-
; AVX512BW-NEXT: setne %al
3360-
; AVX512BW-NEXT: kmovd %eax, %k2
3361-
; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
3362-
; AVX512BW-NEXT: kshiftrw $14, %k2, %k2
3363-
; AVX512BW-NEXT: korw %k2, %k1, %k2
3364-
; AVX512BW-NEXT: movw $-5, %ax
3365-
; AVX512BW-NEXT: kmovd %eax, %k1
3366-
; AVX512BW-NEXT: kandw %k1, %k2, %k2
3367-
; AVX512BW-NEXT: movl %r10d, %eax
3368-
; AVX512BW-NEXT: mulb %r11b
3369-
; AVX512BW-NEXT: movl %eax, %edx
3370-
; AVX512BW-NEXT: testb $2, %al
3371-
; AVX512BW-NEXT: setne %al
3372-
; AVX512BW-NEXT: kmovd %eax, %k3
3373-
; AVX512BW-NEXT: kshiftlw $2, %k3, %k3
3374-
; AVX512BW-NEXT: korw %k3, %k2, %k2
3375-
; AVX512BW-NEXT: kshiftlw $13, %k2, %k2
3376-
; AVX512BW-NEXT: kshiftrw $13, %k2, %k2
3377-
; AVX512BW-NEXT: movl %r8d, %eax
3378-
; AVX512BW-NEXT: mulb %r9b
3379-
; AVX512BW-NEXT: # kill: def $al killed $al def $eax
3380-
; AVX512BW-NEXT: testb $2, %al
3381-
; AVX512BW-NEXT: setne %bl
3382-
; AVX512BW-NEXT: kmovd %ebx, %k3
3383-
; AVX512BW-NEXT: kshiftlw $3, %k3, %k3
3384-
; AVX512BW-NEXT: korw %k3, %k2, %k2
3385-
; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
3386-
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k2} {z}
3387-
; AVX512BW-NEXT: andl $1, %esi
3388-
; AVX512BW-NEXT: kmovw %esi, %k2
3389-
; AVX512BW-NEXT: kandw %k0, %k2, %k0
3390-
; AVX512BW-NEXT: kmovd %ecx, %k2
3391-
; AVX512BW-NEXT: kshiftlw $15, %k2, %k2
3392-
; AVX512BW-NEXT: kshiftrw $14, %k2, %k2
3393-
; AVX512BW-NEXT: korw %k2, %k0, %k0
3394-
; AVX512BW-NEXT: kandw %k1, %k0, %k0
3395-
; AVX512BW-NEXT: kmovd %edx, %k1
3396-
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
3397-
; AVX512BW-NEXT: kshiftrw $13, %k1, %k1
3398-
; AVX512BW-NEXT: korw %k1, %k0, %k0
3399-
; AVX512BW-NEXT: movw $-9, %cx
3400-
; AVX512BW-NEXT: kmovd %ecx, %k1
3401-
; AVX512BW-NEXT: kandw %k1, %k0, %k0
3402-
; AVX512BW-NEXT: kmovd %eax, %k1
3403-
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
3404-
; AVX512BW-NEXT: kshiftrw $12, %k1, %k1
3405-
; AVX512BW-NEXT: korw %k1, %k0, %k0
34063206
; AVX512BW-NEXT: kmovd %k0, %eax
34073207
; AVX512BW-NEXT: movb %al, (%rdi)
3408-
; AVX512BW-NEXT: popq %rbx
3208+
; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
34093209
; AVX512BW-NEXT: retq
34103210
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
34113211
%val = extractvalue {<4 x i1>, <4 x i1>} %t, 0

0 commit comments

Comments
 (0)