Skip to content

Commit 545de56

Browse files
committed
[ARM] Enabled VMLAV and Add instructions to use VMLAVA
Used InstCombine to enable VMLAV and Add instructions to generate VMLAVA instead with tests.
1 parent 6c5039a commit 545de56

File tree

2 files changed

+131
-0
lines changed

2 files changed

+131
-0
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ bool ARMTTIImpl::shouldFavorPostInc() const {
108108

109109
Optional<Instruction *>
110110
ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
111+
using namespace PatternMatch;
111112
Intrinsic::ID IID = II.getIntrinsicID();
112113
switch (IID) {
113114
default:
@@ -210,6 +211,29 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
210211
}
211212
break;
212213
}
214+
case Intrinsic::arm_mve_vmldava: {
215+
Instruction *I = cast<Instruction>(&II);
216+
if (I->hasOneUse()) {
217+
auto *User = cast<Instruction>(*I->user_begin());
218+
Value *OpZ;
219+
if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
220+
match(I->getOperand(3), m_Zero())) {
221+
Value *OpX = I->getOperand(4);
222+
Value *OpY = I->getOperand(5);
223+
Type *OpTy = OpX->getType();
224+
225+
IC.Builder.SetInsertPoint(User);
226+
Value *V =
227+
IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
228+
{I->getOperand(0), I->getOperand(1),
229+
I->getOperand(2), OpZ, OpX, OpY});
230+
231+
IC.replaceInstUsesWith(*User, V);
232+
return IC.eraseInstFromFunction(*User);
233+
}
234+
}
235+
return None;
236+
}
213237
}
214238
return None;
215239
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -instcombine -S -mtriple=arm -o - %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5+
6+
define arm_aapcs_vfpcc i32 @test_vmladavaq_s32(i32 %z, <4 x i32> %x, <4 x i32> %y) {
7+
; CHECK-LABEL: @test_vmladavaq_s32(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y)
10+
; CHECK-NEXT: ret i32 %0
11+
entry:
12+
%0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 0, <4 x i32> %x, <4 x i32> %y)
13+
%1 = add nsw i32 %0, %z
14+
ret i32 %1
15+
}
16+
17+
define arm_aapcs_vfpcc i32 @test_vmladavaq_s16(i32 %z, <8 x i16> %x, <8 x i16> %y) {
18+
; CHECK-LABEL: @test_vmladavaq_s16(
19+
; CHECK-NEXT: entry:
20+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y)
21+
; CHECK-NEXT: ret i32 %0
22+
entry:
23+
%0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 0, <8 x i16> %x, <8 x i16> %y)
24+
%1 = add nsw i32 %0, %z
25+
ret i32 %1
26+
}
27+
28+
define arm_aapcs_vfpcc i32 @test_vmladavaq_s8(i32 %z, <16 x i8> %x, <16 x i8> %y) {
29+
; CHECK-LABEL: @test_vmladavaq_s8(
30+
; CHECK-NEXT: entry:
31+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y)
32+
; CHECK-NEXT: ret i32 %0
33+
entry:
34+
%0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 0, <16 x i8> %x, <16 x i8> %y)
35+
%1 = add nsw i32 %0, %z
36+
ret i32 %1
37+
}
38+
39+
define arm_aapcs_vfpcc i32 @test_vmladavaq_u32(i32 %z, <4 x i32> %x, <4 x i32> %y) {
40+
; CHECK-LABEL: @test_vmladavaq_u32(
41+
; CHECK-NEXT: entry:
42+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y)
43+
; CHECK-NEXT: ret i32 %0
44+
entry:
45+
%0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 0, <4 x i32> %x, <4 x i32> %y)
46+
%1 = add nsw i32 %0, %z
47+
ret i32 %1
48+
}
49+
50+
define arm_aapcs_vfpcc i32 @test_vmladavaq_u16(i32 %z, <8 x i16> %x, <8 x i16> %y) {
51+
; CHECK-LABEL: @test_vmladavaq_u16(
52+
; CHECK-NEXT: entry:
53+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y)
54+
; CHECK-NEXT: ret i32 %0
55+
entry:
56+
%0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 0, <8 x i16> %x, <8 x i16> %y)
57+
%1 = add nsw i32 %0, %z
58+
ret i32 %1
59+
}
60+
61+
define arm_aapcs_vfpcc i32 @test_vmladavaq_u8(i32 %z, <16 x i8> %x, <16 x i8> %y) {
62+
; CHECK-LABEL: @test_vmladavaq_u8(
63+
; CHECK-NEXT: entry:
64+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y)
65+
; CHECK-NEXT: ret i32 %0
66+
entry:
67+
%0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 0, <16 x i8> %x, <16 x i8> %y)
68+
%1 = add nsw i32 %0, %z
69+
ret i32 %1
70+
}
71+
72+
define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s32(i32 %z, <4 x i32> %x, <4 x i32> %y) {
73+
; CHECK-LABEL: @test_vmlsdavaq_s32(
74+
; CHECK-NEXT: entry:
75+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y)
76+
; CHECK-NEXT: ret i32 %0
77+
entry:
78+
%0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 0, <4 x i32> %x, <4 x i32> %y)
79+
%1 = add nsw i32 %0, %z
80+
ret i32 %1
81+
}
82+
83+
define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s16(i32 %z, <8 x i16> %x, <8 x i16> %y) {
84+
; CHECK-LABEL: @test_vmlsdavaq_s16(
85+
; CHECK-NEXT: entry:
86+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y)
87+
; CHECK-NEXT: ret i32 %0
88+
entry:
89+
%0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 0, <8 x i16> %x, <8 x i16> %y)
90+
%1 = add nsw i32 %0, %z
91+
ret i32 %1
92+
}
93+
94+
define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s8(i32 %z, <16 x i8> %x, <16 x i8> %y) {
95+
; CHECK-LABEL: @test_vmlsdavaq_s8(
96+
; CHECK-NEXT: entry:
97+
; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y)
98+
; CHECK-NEXT: ret i32 %0
99+
entry:
100+
%0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 0, <16 x i8> %x, <16 x i8> %y)
101+
%1 = add nsw i32 %0, %z
102+
ret i32 %1
103+
}
104+
105+
declare i32 @llvm.arm.mve.vmldava.v4i32(i32, i32, i32, i32, <4 x i32>, <4 x i32>)
106+
declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>)
107+
declare i32 @llvm.arm.mve.vmldava.v16i8(i32, i32, i32, i32, <16 x i8>, <16 x i8>)

0 commit comments

Comments
 (0)