|
7 | 7 |
|
8 | 8 | define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
9 | 9 | ; CHECK-LABEL: @undef_pmuludq_128(
|
10 |
| -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef) |
11 |
| -; CHECK-NEXT: ret <2 x i64> [[TMP1]] |
| 10 | +; CHECK-NEXT: ret <2 x i64> undef |
12 | 11 | ;
|
13 | 12 | %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
|
14 | 13 | ret <2 x i64> %1
|
15 | 14 | }
|
16 | 15 |
|
17 | 16 | define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
18 | 17 | ; CHECK-LABEL: @undef_pmuludq_256(
|
19 |
| -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef) |
20 |
| -; CHECK-NEXT: ret <4 x i64> [[TMP1]] |
| 18 | +; CHECK-NEXT: ret <4 x i64> undef |
21 | 19 | ;
|
22 | 20 | %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
|
23 | 21 | ret <4 x i64> %1
|
24 | 22 | }
|
25 | 23 |
|
26 | 24 | define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
27 | 25 | ; CHECK-LABEL: @undef_pmuludq_512(
|
28 |
| -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef) |
29 |
| -; CHECK-NEXT: ret <8 x i64> [[TMP1]] |
| 26 | +; CHECK-NEXT: ret <8 x i64> undef |
30 | 27 | ;
|
31 | 28 | %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
|
32 | 29 | ret <8 x i64> %1
|
33 | 30 | }
|
34 | 31 |
|
35 | 32 | define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
|
36 | 33 | ; CHECK-LABEL: @undef_pmuldq_128(
|
37 |
| -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef) |
38 |
| -; CHECK-NEXT: ret <2 x i64> [[TMP1]] |
| 34 | +; CHECK-NEXT: ret <2 x i64> undef |
39 | 35 | ;
|
40 | 36 | %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
|
41 | 37 | ret <2 x i64> %1
|
42 | 38 | }
|
43 | 39 |
|
44 | 40 | define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
|
45 | 41 | ; CHECK-LABEL: @undef_pmuldq_256(
|
46 |
| -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef) |
47 |
| -; CHECK-NEXT: ret <4 x i64> [[TMP1]] |
| 42 | +; CHECK-NEXT: ret <4 x i64> undef |
48 | 43 | ;
|
49 | 44 | %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
|
50 | 45 | ret <4 x i64> %1
|
51 | 46 | }
|
52 | 47 |
|
53 | 48 | define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
|
54 | 49 | ; CHECK-LABEL: @undef_pmuldq_512(
|
55 |
| -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef) |
56 |
| -; CHECK-NEXT: ret <8 x i64> [[TMP1]] |
| 50 | +; CHECK-NEXT: ret <8 x i64> undef |
57 | 51 | ;
|
58 | 52 | %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
|
59 | 53 | ret <8 x i64> %1
|
60 | 54 | }
|
61 | 55 |
|
| 56 | +define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { |
| 57 | +; CHECK-LABEL: @undef_zero_pmuludq_128( |
| 58 | +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>) |
| 59 | +; CHECK-NEXT: ret <2 x i64> [[TMP1]] |
| 60 | +; |
| 61 | + %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer) |
| 62 | + ret <2 x i64> %1 |
| 63 | +} |
| 64 | + |
| 65 | +define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { |
| 66 | +; CHECK-LABEL: @undef_zero_pmuludq_256( |
| 67 | +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <8 x i32> undef) |
| 68 | +; CHECK-NEXT: ret <4 x i64> [[TMP1]] |
| 69 | +; |
| 70 | + %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef) |
| 71 | + ret <4 x i64> %1 |
| 72 | +} |
| 73 | + |
| 74 | +define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { |
| 75 | +; CHECK-LABEL: @undef_zero_pmuludq_512( |
| 76 | +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>) |
| 77 | +; CHECK-NEXT: ret <8 x i64> [[TMP1]] |
| 78 | +; |
| 79 | + %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer) |
| 80 | + ret <8 x i64> %1 |
| 81 | +} |
| 82 | + |
| 83 | +define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { |
| 84 | +; CHECK-LABEL: @undef_zero_pmuldq_128( |
| 85 | +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 0, i32 undef, i32 0, i32 undef>, <4 x i32> undef) |
| 86 | +; CHECK-NEXT: ret <2 x i64> [[TMP1]] |
| 87 | +; |
| 88 | + %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef) |
| 89 | + ret <2 x i64> %1 |
| 90 | +} |
| 91 | + |
| 92 | +define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { |
| 93 | +; CHECK-LABEL: @undef_zero_pmuldq_256( |
| 94 | +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>) |
| 95 | +; CHECK-NEXT: ret <4 x i64> [[TMP1]] |
| 96 | +; |
| 97 | + %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer) |
| 98 | + ret <4 x i64> %1 |
| 99 | +} |
| 100 | + |
| 101 | +define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { |
| 102 | +; CHECK-LABEL: @undef_zero_pmuldq_512( |
| 103 | +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <16 x i32> undef) |
| 104 | +; CHECK-NEXT: ret <8 x i64> [[TMP1]] |
| 105 | +; |
| 106 | + %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef) |
| 107 | + ret <8 x i64> %1 |
| 108 | +} |
| 109 | + |
62 | 110 | ;
|
63 | 111 | ; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
|
64 | 112 | ;
|
|
0 commit comments