Skip to content

Commit fcff458

Browse files
authored
[LoongArch] Permit auto-vectorization using LSX/LASX with auto-vec feature (llvm#78943)
With enough codegen complete, we can now correctly report the size of vector registers for LSX/LASX, allowing auto vectorization (The `auto-vec` feature needs to be enabled simultaneously). As described, the `auto-vec` feature is an experimental one. To ensure that automatic vectorization is not enabled by default, because the information provided by the current `TTI` cannot yield additional benefits for automatic vectorization.
1 parent 907f2a0 commit fcff458

File tree

6 files changed

+96
-0
lines changed

6 files changed

+96
-0
lines changed

llvm/lib/Target/LoongArch/LoongArch.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,11 @@ def FeatureRelax
106106
: SubtargetFeature<"relax", "HasLinkerRelax", "true",
107107
"Enable Linker relaxation">;
108108

109+
// Experimental auto vectorization
110+
def FeatureAutoVec
111+
: SubtargetFeature<"auto-vec", "HasExpAutoVec", "true",
112+
"Experimental auto vectorization">;
113+
109114
//===----------------------------------------------------------------------===//
110115
// Registers, instruction descriptions ...
111116
//===----------------------------------------------------------------------===//

llvm/lib/Target/LoongArch/LoongArchSubtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
4444
bool HasLaLocalWithAbs = false;
4545
bool HasUAL = false;
4646
bool HasLinkerRelax = false;
47+
bool HasExpAutoVec = false;
4748
unsigned GRLen = 32;
4849
MVT GRLenVT = MVT::i32;
4950
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
@@ -102,6 +103,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
102103
bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
103104
bool hasUAL() const { return HasUAL; }
104105
bool hasLinkerRelax() const { return HasLinkerRelax; }
106+
bool hasExpAutoVec() const { return HasExpAutoVec; }
105107
MVT getGRLenVT() const { return GRLenVT; }
106108
unsigned getGRLen() const { return GRLen; }
107109
LoongArchABI::ABI getTargetABI() const { return TargetABI; }

llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,22 @@ using namespace llvm;
1919

2020
#define DEBUG_TYPE "loongarchtti"
2121

22+
TypeSize LoongArchTTIImpl::getRegisterBitWidth(
23+
TargetTransformInfo::RegisterKind K) const {
24+
switch (K) {
25+
case TargetTransformInfo::RGK_Scalar:
26+
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
27+
case TargetTransformInfo::RGK_FixedWidthVector:
28+
if (ST->hasExtLASX() && ST->hasExpAutoVec())
29+
return TypeSize::getFixed(256);
30+
if (ST->hasExtLSX() && ST->hasExpAutoVec())
31+
return TypeSize::getFixed(128);
32+
return TypeSize::getFixed(0);
33+
case TargetTransformInfo::RGK_ScalableVector:
34+
return TypeSize::getScalable(0);
35+
}
36+
37+
llvm_unreachable("Unsupported register kind");
38+
}
39+
2240
// TODO: Implement more hooks to provide TTI machinery for LoongArch.

llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
3939
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
4040
TLI(ST->getTargetLowering()) {}
4141

42+
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
43+
4244
// TODO: Implement more hooks to provide TTI machinery for LoongArch.
4345
};
4446

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s
3+
4+
;; This is a collection of tests whose only purpose is to show changes in the
5+
;; default configuration. Please keep these tests minimal - if you're testing
6+
;; functionality of some specific configuration, please place that in a
7+
;; seperate test file with a hard coded configuration (even if that
8+
;; configuration is the current default).
9+
10+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
11+
target triple = "loongarch64"
12+
13+
define void @vector_add(ptr noalias nocapture %a, i64 %v) {
14+
; CHECK-LABEL: define void @vector_add(
15+
; CHECK-SAME: ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
16+
; CHECK-NEXT: entry:
17+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
18+
; CHECK: vector.ph:
19+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0
20+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
21+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
22+
; CHECK: vector.body:
23+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
24+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
25+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
26+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
27+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
28+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
29+
; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8
30+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
31+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
32+
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
33+
; CHECK: middle.block:
34+
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
35+
; CHECK: scalar.ph:
36+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
37+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
38+
; CHECK: for.body:
39+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
40+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
41+
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
42+
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
43+
; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
44+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
45+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
46+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
47+
; CHECK: for.end:
48+
; CHECK-NEXT: ret void
49+
;
50+
entry:
51+
br label %for.body
52+
53+
for.body:
54+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
55+
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
56+
%elem = load i64, ptr %arrayidx
57+
%add = add i64 %elem, %v
58+
store i64 %add, ptr %arrayidx
59+
%iv.next = add nuw nsw i64 %iv, 1
60+
%exitcond.not = icmp eq i64 %iv.next, 1024
61+
br i1 %exitcond.not, label %for.end, label %for.body
62+
63+
for.end:
64+
ret void
65+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
config.suffixes = [".ll"]
2+
3+
if not "LoongArch" in config.root.targets:
4+
config.unsupported = True

0 commit comments

Comments
 (0)