Skip to content

Commit 66caf01

Browse files
committed
[InstCombine][X86] Add commuted undef arg tests for PMADDWD/PMADDUBSW intrinsics
1 parent d23959b commit 66caf01

File tree

2 files changed

+72
-18
lines changed

2 files changed

+72
-18
lines changed

llvm/test/Transforms/InstCombine/X86/x86-pmaddubsw.ll

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,57 @@
55
; UNDEF Elts
66
;
77

8-
define <8 x i16> @undef_pmaddubsw_128() {
8+
define <8 x i16> @undef_pmaddubsw_128(<16 x i8> %a0) {
99
; CHECK-LABEL: @undef_pmaddubsw_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> undef)
10+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> [[A0:%.*]], <16 x i8> undef)
1111
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
1212
;
13-
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> undef)
13+
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> undef)
1414
ret <8 x i16> %1
1515
}
1616

17-
define <16 x i16> @undef_pmaddubsw_256() {
17+
define <8 x i16> @undef_pmaddubsw_128_commute(<16 x i8> %a0) {
18+
; CHECK-LABEL: @undef_pmaddubsw_128_commute(
19+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> [[A0:%.*]])
20+
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
21+
;
22+
%1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> undef, <16 x i8> %a0)
23+
ret <8 x i16> %1
24+
}
25+
26+
define <16 x i16> @undef_pmaddubsw_256(<32 x i8> %a0) {
1827
; CHECK-LABEL: @undef_pmaddubsw_256(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> undef)
28+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> undef)
2029
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2130
;
22-
%1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> undef)
31+
%1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> undef)
2332
ret <16 x i16> %1
2433
}
2534

26-
define <32 x i16> @undef_pmaddubsw_512() {
35+
define <16 x i16> @undef_pmaddubsw_256_commute(<32 x i8> %a0) {
36+
; CHECK-LABEL: @undef_pmaddubsw_256_commute(
37+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> [[A0:%.*]])
38+
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
39+
;
40+
%1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> undef, <32 x i8> %a0)
41+
ret <16 x i16> %1
42+
}
43+
44+
define <32 x i16> @undef_pmaddubsw_512(<64 x i8> %a0) {
2745
; CHECK-LABEL: @undef_pmaddubsw_512(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> undef)
46+
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> [[A0:%.*]], <64 x i8> undef)
47+
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
48+
;
49+
%1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %a0, <64 x i8> undef)
50+
ret <32 x i16> %1
51+
}
52+
53+
define <32 x i16> @undef_pmaddubsw_512_commute(<64 x i8> %a0) {
54+
; CHECK-LABEL: @undef_pmaddubsw_512_commute(
55+
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> [[A0:%.*]])
2956
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
3057
;
31-
%1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> undef)
58+
%1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> undef, <64 x i8> %a0)
3259
ret <32 x i16> %1
3360
}
3461

llvm/test/Transforms/InstCombine/X86/x86-pmaddwd.ll

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,57 @@
55
; UNDEF Elts
66
;
77

8-
define <4 x i32> @undef_pmaddwd_128() {
8+
define <4 x i32> @undef_pmaddwd_128(<8 x i16> %a0) {
99
; CHECK-LABEL: @undef_pmaddwd_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> undef)
10+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> undef)
1111
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1212
;
13-
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> undef)
13+
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> undef)
1414
ret <4 x i32> %1
1515
}
1616

17-
define <8 x i32> @undef_pmaddwd_256() {
17+
define <4 x i32> @undef_pmaddwd_128_commute(<8 x i16> %a0) {
18+
; CHECK-LABEL: @undef_pmaddwd_128_commute(
19+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> [[A0:%.*]])
20+
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
21+
;
22+
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> %a0)
23+
ret <4 x i32> %1
24+
}
25+
26+
define <8 x i32> @undef_pmaddwd_256(<16 x i16> %a0) {
1827
; CHECK-LABEL: @undef_pmaddwd_256(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> undef)
28+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> undef)
2029
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
2130
;
22-
%1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> undef)
31+
%1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> undef)
2332
ret <8 x i32> %1
2433
}
2534

26-
define <16 x i32> @undef_pmaddwd_512() {
35+
define <8 x i32> @undef_pmaddwd_256_commute(<16 x i16> %a0) {
36+
; CHECK-LABEL: @undef_pmaddwd_256_commute(
37+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> [[A0:%.*]])
38+
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
39+
;
40+
%1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> %a0)
41+
ret <8 x i32> %1
42+
}
43+
44+
define <16 x i32> @undef_pmaddwd_512(<32 x i16> %a0) {
2745
; CHECK-LABEL: @undef_pmaddwd_512(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> undef)
46+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[A0:%.*]], <32 x i16> undef)
47+
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
48+
;
49+
%1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> undef)
50+
ret <16 x i32> %1
51+
}
52+
53+
define <16 x i32> @undef_pmaddwd_512_commute(<32 x i16> %a0) {
54+
; CHECK-LABEL: @undef_pmaddwd_512_commute(
55+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> [[A0:%.*]])
2956
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
3057
;
31-
%1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> undef)
58+
%1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> %a0)
3259
ret <16 x i32> %1
3360
}
3461

0 commit comments

Comments
 (0)