Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 116ba1a

Browse files
committed
[InstCombine][X86] Add MULDQ/MULUDQ undef handling
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292627 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 4a8de03 commit 116ba1a

File tree

3 files changed

+81
-12
lines changed

3 files changed

+81
-12
lines changed

lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,18 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
510510
return Builder.CreateAShr(Vec, ShiftVec);
511511
}
512512

513+
static Value *simplifyX86muldq(const IntrinsicInst &II) {
514+
Value *Arg0 = II.getArgOperand(0);
515+
Value *Arg1 = II.getArgOperand(1);
516+
Type *ResTy = II.getType();
517+
518+
// muldq/muludq(undef, undef) -> undef
519+
if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
520+
return UndefValue::get(ResTy);
521+
522+
return nullptr;
523+
}
524+
513525
static Value *simplifyX86movmsk(const IntrinsicInst &II,
514526
InstCombiner::BuilderTy &Builder) {
515527
Value *Arg = II.getArgOperand(0);
@@ -2142,6 +2154,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
21422154
case Intrinsic::x86_avx2_pmulu_dq:
21432155
case Intrinsic::x86_avx512_pmul_dq_512:
21442156
case Intrinsic::x86_avx512_pmulu_dq_512: {
2157+
if (Value *V = simplifyX86muldq(*II))
2158+
return replaceInstUsesWith(*II, V);
2159+
21452160
unsigned VWidth = II->getType()->getVectorNumElements();
21462161
APInt UndefElts(VWidth, 0);
21472162
APInt DemandedElts = APInt::getAllOnesValue(VWidth);

lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,6 +1469,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
14691469
Depth + 1);
14701470
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
14711471

1472+
// Output elements are undefined if both are undefined. Consider things
1473+
// like undef*0. The result is known zero, not undef.
1474+
for (unsigned i = 0; i != VWidth; ++i)
1475+
if (UndefElts2[i * 2] && UndefElts3[i * 2])
1476+
UndefElts.setBit(i);
1477+
14721478
break;
14731479
}
14741480

test/Transforms/InstCombine/x86-muldq.ll

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,58 +7,106 @@
77

88
define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
99
; CHECK-LABEL: @undef_pmuludq_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
11-
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
10+
; CHECK-NEXT: ret <2 x i64> undef
1211
;
1312
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
1413
ret <2 x i64> %1
1514
}
1615

1716
define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
1817
; CHECK-LABEL: @undef_pmuludq_256(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
20-
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
18+
; CHECK-NEXT: ret <4 x i64> undef
2119
;
2220
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
2321
ret <4 x i64> %1
2422
}
2523

2624
define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
2725
; CHECK-LABEL: @undef_pmuludq_512(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
29-
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
26+
; CHECK-NEXT: ret <8 x i64> undef
3027
;
3128
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
3229
ret <8 x i64> %1
3330
}
3431

3532
define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
3633
; CHECK-LABEL: @undef_pmuldq_128(
37-
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
38-
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
34+
; CHECK-NEXT: ret <2 x i64> undef
3935
;
4036
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
4137
ret <2 x i64> %1
4238
}
4339

4440
define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
4541
; CHECK-LABEL: @undef_pmuldq_256(
46-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
47-
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
42+
; CHECK-NEXT: ret <4 x i64> undef
4843
;
4944
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
5045
ret <4 x i64> %1
5146
}
5247

5348
define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
5449
; CHECK-LABEL: @undef_pmuldq_512(
55-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
56-
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
50+
; CHECK-NEXT: ret <8 x i64> undef
5751
;
5852
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
5953
ret <8 x i64> %1
6054
}
6155

56+
define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
57+
; CHECK-LABEL: @undef_zero_pmuludq_128(
58+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>)
59+
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
60+
;
61+
%1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
62+
ret <2 x i64> %1
63+
}
64+
65+
define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
66+
; CHECK-LABEL: @undef_zero_pmuludq_256(
67+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <8 x i32> undef)
68+
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
69+
;
70+
%1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
71+
ret <4 x i64> %1
72+
}
73+
74+
define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
75+
; CHECK-LABEL: @undef_zero_pmuludq_512(
76+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>)
77+
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
78+
;
79+
%1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
80+
ret <8 x i64> %1
81+
}
82+
83+
define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
84+
; CHECK-LABEL: @undef_zero_pmuldq_128(
85+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 0, i32 undef, i32 0, i32 undef>, <4 x i32> undef)
86+
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
87+
;
88+
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
89+
ret <2 x i64> %1
90+
}
91+
92+
define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
93+
; CHECK-LABEL: @undef_zero_pmuldq_256(
94+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>)
95+
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
96+
;
97+
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
98+
ret <4 x i64> %1
99+
}
100+
101+
define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
102+
; CHECK-LABEL: @undef_zero_pmuldq_512(
103+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <16 x i32> undef)
104+
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
105+
;
106+
%1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
107+
ret <8 x i64> %1
108+
}
109+
62110
;
63111
; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
64112
;

0 commit comments

Comments
 (0)