Skip to content

Commit d893ed7

Browse files
committed
[InstCombine][X86] Add undef arg handling for PMADDWD/PMADDUBSW intrinsics
These fold to zero, not undef, as the other arg could still be zero.
1 parent a7cddd4 commit d893ed7

File tree

3 files changed

+16
-24
lines changed

3 files changed

+16
-24
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,10 @@ static Value *simplifyX86pmadd(IntrinsicInst &II,
515515
ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
516516
"Unexpected PMADD types");
517517

518+
// Multiply by undef -> zero (NOT undef!) as other arg could still be zero.
519+
if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
520+
return ConstantAggregateZero::get(ResTy);
521+
518522
// Multiply by zero.
519523
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
520524
return ConstantAggregateZero::get(ResTy);

llvm/test/Transforms/InstCombine/X86/x86-pmaddubsw.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,47 @@
77

88
define <8 x i16> @undef_pmaddubsw_128(<16 x i8> %a0) {
99
; CHECK-LABEL: @undef_pmaddubsw_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> [[A0:%.*]], <16 x i8> undef)
11-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
10+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
1211
;
1312
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> undef)
1413
ret <8 x i16> %1
1514
}
1615

1716
define <8 x i16> @undef_pmaddubsw_128_commute(<16 x i8> %a0) {
1817
; CHECK-LABEL: @undef_pmaddubsw_128_commute(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> [[A0:%.*]])
20-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
18+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
2119
;
2220
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> %a0)
2321
ret <8 x i16> %1
2422
}
2523

2624
define <16 x i16> @undef_pmaddubsw_256(<32 x i8> %a0) {
2725
; CHECK-LABEL: @undef_pmaddubsw_256(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> undef)
29-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
26+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
3027
;
3128
%1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> undef)
3229
ret <16 x i16> %1
3330
}
3431

3532
define <16 x i16> @undef_pmaddubsw_256_commute(<32 x i8> %a0) {
3633
; CHECK-LABEL: @undef_pmaddubsw_256_commute(
37-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> [[A0:%.*]])
38-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
34+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
3935
;
4036
%1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> %a0)
4137
ret <16 x i16> %1
4238
}
4339

4440
define <32 x i16> @undef_pmaddubsw_512(<64 x i8> %a0) {
4541
; CHECK-LABEL: @undef_pmaddubsw_512(
46-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[A0:%.*]], <64 x i8> undef)
47-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
42+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
4843
;
4944
%1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> undef)
5045
ret <32 x i16> %1
5146
}
5247

5348
define <32 x i16> @undef_pmaddubsw_512_commute(<64 x i8> %a0) {
5449
; CHECK-LABEL: @undef_pmaddubsw_512_commute(
55-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> [[A0:%.*]])
56-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
50+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
5751
;
5852
%1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> %a0)
5953
ret <32 x i16> %1

llvm/test/Transforms/InstCombine/X86/x86-pmaddwd.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,47 @@
77

88
define <4 x i32> @undef_pmaddwd_128(<8 x i16> %a0) {
99
; CHECK-LABEL: @undef_pmaddwd_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> undef)
11-
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
10+
; CHECK-NEXT: ret <4 x i32> zeroinitializer
1211
;
1312
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> undef)
1413
ret <4 x i32> %1
1514
}
1615

1716
define <4 x i32> @undef_pmaddwd_128_commute(<8 x i16> %a0) {
1817
; CHECK-LABEL: @undef_pmaddwd_128_commute(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> [[A0:%.*]])
20-
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
18+
; CHECK-NEXT: ret <4 x i32> zeroinitializer
2119
;
2220
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> %a0)
2321
ret <4 x i32> %1
2422
}
2523

2624
define <8 x i32> @undef_pmaddwd_256(<16 x i16> %a0) {
2725
; CHECK-LABEL: @undef_pmaddwd_256(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> undef)
29-
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
26+
; CHECK-NEXT: ret <8 x i32> zeroinitializer
3027
;
3128
%1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> undef)
3229
ret <8 x i32> %1
3330
}
3431

3532
define <8 x i32> @undef_pmaddwd_256_commute(<16 x i16> %a0) {
3633
; CHECK-LABEL: @undef_pmaddwd_256_commute(
37-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> [[A0:%.*]])
38-
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
34+
; CHECK-NEXT: ret <8 x i32> zeroinitializer
3935
;
4036
%1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> %a0)
4137
ret <8 x i32> %1
4238
}
4339

4440
define <16 x i32> @undef_pmaddwd_512(<32 x i16> %a0) {
4541
; CHECK-LABEL: @undef_pmaddwd_512(
46-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[A0:%.*]], <32 x i16> undef)
47-
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
42+
; CHECK-NEXT: ret <16 x i32> zeroinitializer
4843
;
4944
%1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> undef)
5045
ret <16 x i32> %1
5146
}
5247

5348
define <16 x i32> @undef_pmaddwd_512_commute(<32 x i16> %a0) {
5449
; CHECK-LABEL: @undef_pmaddwd_512_commute(
55-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> [[A0:%.*]])
56-
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
50+
; CHECK-NEXT: ret <16 x i32> zeroinitializer
5751
;
5852
%1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> %a0)
5953
ret <16 x i32> %1

0 commit comments

Comments
 (0)