Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 912c7ec

Browse files
committed
[SelectionDAG] Add support for vector demandedelts in UREM/SREM opcodes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286578 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent afe6870 commit 912c7ec

File tree

2 files changed

+14
-76
lines changed

2 files changed

+14
-76
lines changed

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2476,11 +2476,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
24762476
break;
24772477
}
24782478
case ISD::SREM:
2479-
if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2479+
if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
24802480
const APInt &RA = Rem->getAPIntValue().abs();
24812481
if (RA.isPowerOf2()) {
24822482
APInt LowBits = RA - 1;
2483-
computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
2483+
computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
2484+
Depth + 1);
24842485

24852486
// The low bits of the first operand are unchanged by the srem.
24862487
KnownZero = KnownZero2 & LowBits;
@@ -2500,11 +2501,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
25002501
}
25012502
break;
25022503
case ISD::UREM: {
2503-
if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2504+
if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
25042505
const APInt &RA = Rem->getAPIntValue();
25052506
if (RA.isPowerOf2()) {
25062507
APInt LowBits = (RA - 1);
2507-
computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
2508+
computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
2509+
Depth + 1);
25082510

25092511
// The upper bits are all zero, the lower ones are unchanged.
25102512
KnownZero = KnownZero2 | ~LowBits;
@@ -2515,8 +2517,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
25152517

25162518
// Since the result is less than or equal to either operand, any leading
25172519
// zero bits in either operand must also exist in the result.
2518-
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
2519-
computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
2520+
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
2521+
Depth + 1);
2522+
computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
2523+
Depth + 1);
25202524

25212525
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
25222526
KnownZero2.countLeadingOnes());

test/CodeGen/X86/known-bits-vector.ll

Lines changed: 4 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -274,64 +274,12 @@ define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
274274
define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
275275
; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
276276
; X32: # BB#0:
277-
; X32-NEXT: pushl %esi
278-
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,4294967295,4294967295,32767]
279-
; X32-NEXT: vpand %xmm2, %xmm0, %xmm0
280-
; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
281-
; X32-NEXT: vpextrd $1, %xmm0, %eax
282-
; X32-NEXT: vpextrd $1, %xmm1, %ecx
283-
; X32-NEXT: xorl %edx, %edx
284-
; X32-NEXT: divl %ecx
285-
; X32-NEXT: movl %edx, %ecx
286-
; X32-NEXT: vmovd %xmm0, %eax
287-
; X32-NEXT: vmovd %xmm1, %esi
288-
; X32-NEXT: xorl %edx, %edx
289-
; X32-NEXT: divl %esi
290-
; X32-NEXT: vmovd %edx, %xmm2
291-
; X32-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
292-
; X32-NEXT: vpextrd $2, %xmm0, %eax
293-
; X32-NEXT: vpextrd $2, %xmm1, %ecx
294-
; X32-NEXT: xorl %edx, %edx
295-
; X32-NEXT: divl %ecx
296-
; X32-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
297-
; X32-NEXT: vpextrd $3, %xmm0, %eax
298-
; X32-NEXT: vpextrd $3, %xmm1, %ecx
299-
; X32-NEXT: xorl %edx, %edx
300-
; X32-NEXT: divl %ecx
301-
; X32-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
302-
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
303-
; X32-NEXT: vpsrld $22, %xmm0, %xmm0
304-
; X32-NEXT: popl %esi
277+
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
305278
; X32-NEXT: retl
306279
;
307280
; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
308281
; X64: # BB#0:
309-
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,4294967295,4294967295,32767]
310-
; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
311-
; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
312-
; X64-NEXT: vpextrd $1, %xmm0, %eax
313-
; X64-NEXT: vpextrd $1, %xmm1, %ecx
314-
; X64-NEXT: xorl %edx, %edx
315-
; X64-NEXT: divl %ecx
316-
; X64-NEXT: movl %edx, %ecx
317-
; X64-NEXT: vmovd %xmm0, %eax
318-
; X64-NEXT: vmovd %xmm1, %esi
319-
; X64-NEXT: xorl %edx, %edx
320-
; X64-NEXT: divl %esi
321-
; X64-NEXT: vmovd %edx, %xmm2
322-
; X64-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
323-
; X64-NEXT: vpextrd $2, %xmm0, %eax
324-
; X64-NEXT: vpextrd $2, %xmm1, %ecx
325-
; X64-NEXT: xorl %edx, %edx
326-
; X64-NEXT: divl %ecx
327-
; X64-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
328-
; X64-NEXT: vpextrd $3, %xmm0, %eax
329-
; X64-NEXT: vpextrd $3, %xmm1, %ecx
330-
; X64-NEXT: xorl %edx, %edx
331-
; X64-NEXT: divl %ecx
332-
; X64-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
333-
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
334-
; X64-NEXT: vpsrld $22, %xmm0, %xmm0
282+
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
335283
; X64-NEXT: retq
336284
%1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
337285
%2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
@@ -344,26 +292,12 @@ define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1)
344292
define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
345293
; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
346294
; X32: # BB#0:
347-
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
348-
; X32-NEXT: vpsrad $31, %xmm0, %xmm1
349-
; X32-NEXT: vpsrld $28, %xmm1, %xmm1
350-
; X32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
351-
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1
352-
; X32-NEXT: vpsubd %xmm1, %xmm0, %xmm0
353-
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
354-
; X32-NEXT: vpsrld $22, %xmm0, %xmm0
295+
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
355296
; X32-NEXT: retl
356297
;
357298
; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
358299
; X64: # BB#0:
359-
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
360-
; X64-NEXT: vpsrad $31, %xmm0, %xmm1
361-
; X64-NEXT: vpsrld $28, %xmm1, %xmm1
362-
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
363-
; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
364-
; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm0
365-
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
366-
; X64-NEXT: vpsrld $22, %xmm0, %xmm0
300+
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
367301
; X64-NEXT: retq
368302
%1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
369303
%2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>

0 commit comments

Comments
 (0)