Skip to content

Commit d25c79d

Browse files
leecheechenSixWeining
authored andcommitted
[LoongArch] Support InlineAsm for LSX and LASX
The author of the following files is licongtian <[email protected]>: - clang/lib/Basic/Targets/LoongArch.cpp - llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp - llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp The files mentioned above implement InlineAsm for LSX and LASX as follows: - Enable clang parsing LSX/LASX register name, such as $vr0. - Support the case which operand type is 128bit or 256bit when the constraints is 'f'. - Support the way of specifying LSX/LASX register by using constraint, such as "={$xr0}". - Support the operand modifiers 'u' and 'w'. - Support and legalize the data types and register classes involved in LSX/LASX in the lowering process. Reviewed By: xen0n, SixWeining Differential Revision: https://reviews.llvm.org/D154931
1 parent c299efb commit d25c79d

13 files changed

+307
-2
lines changed

clang/lib/Basic/Targets/LoongArch.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,17 @@ ArrayRef<const char *> LoongArchTargetInfo::getGCCRegNames() const {
3333
"$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27",
3434
"$f28", "$f29", "$f30", "$f31",
3535
// Condition flag registers.
36-
"$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7"};
36+
"$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7",
37+
// 128-bit vector registers.
38+
"$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8",
39+
"$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16",
40+
"$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24",
41+
"$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31",
42+
// 256-bit vector registers.
43+
"$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8",
44+
"$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16",
45+
"$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24",
46+
"$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31"};
3747
return llvm::ArrayRef(GCCRegNames);
3848
}
3949

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s
2+
3+
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
4+
5+
void test() {
6+
// CHECK: :[[#@LINE+1]]:28: error: unknown register name 'xr0' in asm
7+
register v32i8 p0 asm ("xr0");
8+
// CHECK: :[[#@LINE+1]]:29: error: unknown register name '$xr32' in asm
9+
register v32i8 p32 asm ("$xr32");
10+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "^define |tail call"
2+
// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s
3+
4+
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
5+
6+
// CHECK-LABEL: @test_xr0(
7+
// CHECK: tail call void asm sideeffect "", "{$xr0}"(<32 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2
8+
//
9+
void test_xr0() {
10+
register v32i8 a asm ("$xr0");
11+
asm ("" :: "f"(a));
12+
}
13+
14+
// CHECK-LABEL: @test_xr7(
15+
// CHECK: tail call void asm sideeffect "", "{$xr7}"(<32 x i8> undef) #[[ATTR1]], !srcloc !3
16+
//
17+
void test_xr7() {
18+
register v32i8 a asm ("$xr7");
19+
asm ("" :: "f"(a));
20+
}
21+
22+
// CHECK-LABEL: @test_xr15(
23+
// CHECK: tail call void asm sideeffect "", "{$xr15}"(<32 x i8> undef) #[[ATTR1]], !srcloc !4
24+
//
25+
void test_xr15() {
26+
register v32i8 a asm ("$xr15");
27+
asm ("" :: "f"(a));
28+
}
29+
30+
// CHECK-LABEL: @test_xr31(
31+
// CHECK: tail call void asm sideeffect "", "{$xr31}"(<32 x i8> undef) #[[ATTR1]], !srcloc !5
32+
//
33+
void test_xr31() {
34+
register v32i8 a asm ("$xr31");
35+
asm ("" :: "f"(a));
36+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2+
// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s
3+
4+
typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32)));
5+
6+
// CHECK-LABEL: define dso_local void @test_u
7+
// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2
10+
// CHECK-NEXT: ret void
11+
//
12+
void test_u() {
13+
v4i64 v4i64_r;
14+
asm volatile ("xvldi %u0, 1" : "=f" (v4i64_r));
15+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s
2+
3+
typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));
4+
5+
void test() {
6+
// CHECK: :[[#@LINE+1]]:28: error: unknown register name 'vr0' in asm
7+
register v16i8 p0 asm ("vr0");
8+
// CHECK: :[[#@LINE+1]]:29: error: unknown register name '$vr32' in asm
9+
register v16i8 p32 asm ("$vr32");
10+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "^define |tail call"
2+
// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s
3+
4+
typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));
5+
6+
// CHECK-LABEL: @test_vr0(
7+
// CHECK: tail call void asm sideeffect "", "{$vr0}"(<16 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2
8+
//
9+
void test_vr0() {
10+
register v16i8 a asm ("$vr0");
11+
asm ("" :: "f"(a));
12+
}
13+
14+
// CHECK-LABEL: @test_vr7(
15+
// CHECK: tail call void asm sideeffect "", "{$vr7}"(<16 x i8> undef) #[[ATTR1]], !srcloc !3
16+
//
17+
void test_vr7() {
18+
register v16i8 a asm ("$vr7");
19+
asm ("" :: "f"(a));
20+
}
21+
22+
// CHECK-LABEL: @test_vr15(
23+
// CHECK: tail call void asm sideeffect "", "{$vr15}"(<16 x i8> undef) #[[ATTR1]], !srcloc !4
24+
//
25+
void test_vr15() {
26+
register v16i8 a asm ("$vr15");
27+
asm ("" :: "f"(a));
28+
}
29+
30+
// CHECK-LABEL: @test_vr31(
31+
// CHECK: tail call void asm sideeffect "", "{$vr31}"(<16 x i8> undef) #[[ATTR1]], !srcloc !5
32+
//
33+
void test_vr31() {
34+
register v16i8 a asm ("$vr31");
35+
asm ("" :: "f"(a));
36+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2+
// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s
3+
4+
typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16)));
5+
6+
// CHECK-LABEL: define dso_local void @test_w
7+
// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2
10+
// CHECK-NEXT: ret void
11+
//
12+
void test_w() {
13+
v2i64 v2i64_r;
14+
asm volatile ("vldi %w0, 1" : "=f" (v2i64_r));
15+
}

llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,20 @@ bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
7575
return false;
7676
}
7777
break;
78+
case 'w': // Print LSX registers.
79+
if (MO.getReg().id() >= LoongArch::VR0 &&
80+
MO.getReg().id() <= LoongArch::VR31)
81+
break;
82+
// The modifier is 'w' but the operand is not an LSX register; Report an
83+
// unknown operand error.
84+
return true;
85+
case 'u': // Print LASX registers.
86+
if (MO.getReg().id() >= LoongArch::XR0 &&
87+
MO.getReg().id() <= LoongArch::XR31)
88+
break;
89+
// The modifier is 'u' but the operand is not an LASX register; Report an
90+
// unknown operand error.
91+
return true;
7892
// TODO: handle other extra codes if any.
7993
}
8094
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
5353
addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
5454
if (Subtarget.hasBasicD())
5555
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
56+
if (Subtarget.hasExtLSX())
57+
for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32,
58+
MVT::v2i64})
59+
addRegisterClass(VT, &LoongArch::LSX128RegClass);
60+
if (Subtarget.hasExtLASX())
61+
for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32,
62+
MVT::v4i64})
63+
addRegisterClass(VT, &LoongArch::LASX256RegClass);
5664

5765
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
5866
MVT::i1, Promote);
@@ -3048,6 +3056,12 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint(
30483056
return std::make_pair(0U, &LoongArch::FPR32RegClass);
30493057
if (Subtarget.hasBasicD() && VT == MVT::f64)
30503058
return std::make_pair(0U, &LoongArch::FPR64RegClass);
3059+
if (Subtarget.hasExtLSX() &&
3060+
TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
3061+
return std::make_pair(0U, &LoongArch::LSX128RegClass);
3062+
if (Subtarget.hasExtLASX() &&
3063+
TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
3064+
return std::make_pair(0U, &LoongArch::LASX256RegClass);
30513065
break;
30523066
default:
30533067
break;
@@ -3065,7 +3079,8 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint(
30653079
// decode the usage of register name aliases into their official names. And
30663080
// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
30673081
// official register names.
3068-
if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) {
3082+
if (Constraint.startswith("{$r") || Constraint.startswith("{$f") ||
3083+
Constraint.startswith("{$vr") || Constraint.startswith("{$xr")) {
30693084
bool IsFP = Constraint[2] == 'f';
30703085
std::pair<StringRef, StringRef> Temp = Constraint.split('$');
30713086
std::pair<unsigned, const TargetRegisterClass *> R;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
define void @test_u() nounwind {
5+
; CHECK-LABEL: test_u:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: #APP
8+
; CHECK-NEXT: xvldi $xr0, 1
9+
; CHECK-NEXT: #NO_APP
10+
; CHECK-NEXT: ret
11+
entry:
12+
%0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"()
13+
ret void
14+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
define void @register_xr1() nounwind {
5+
; CHECK-LABEL: register_xr1:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: #APP
8+
; CHECK-NEXT: xvldi $xr1, 1
9+
; CHECK-NEXT: #NO_APP
10+
; CHECK-NEXT: ret
11+
entry:
12+
%0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr1}"()
13+
ret void
14+
}
15+
16+
define void @register_xr7() nounwind {
17+
; CHECK-LABEL: register_xr7:
18+
; CHECK: # %bb.0: # %entry
19+
; CHECK-NEXT: #APP
20+
; CHECK-NEXT: xvldi $xr7, 1
21+
; CHECK-NEXT: #NO_APP
22+
; CHECK-NEXT: ret
23+
entry:
24+
%0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr7}"()
25+
ret void
26+
}
27+
28+
define void @register_xr23() nounwind {
29+
; CHECK-LABEL: register_xr23:
30+
; CHECK: # %bb.0: # %entry
31+
; CHECK-NEXT: #APP
32+
; CHECK-NEXT: xvldi $xr23, 1
33+
; CHECK-NEXT: #NO_APP
34+
; CHECK-NEXT: ret
35+
entry:
36+
%0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr23}"()
37+
ret void
38+
}
39+
40+
;; The lower 64-bit of the vector register '$xr31' is overlapped with
41+
;; the floating-point register '$f31' ('$fs7'). And '$f31' ('$fs7')
42+
;; is a callee-saved register which is preserved across calls.
43+
;; That's why the fst.d and fld.d instructions are emitted.
44+
define void @register_xr31() nounwind {
45+
; CHECK-LABEL: register_xr31:
46+
; CHECK: # %bb.0: # %entry
47+
; CHECK-NEXT: addi.d $sp, $sp, -16
48+
; CHECK-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill
49+
; CHECK-NEXT: #APP
50+
; CHECK-NEXT: xvldi $xr31, 1
51+
; CHECK-NEXT: #NO_APP
52+
; CHECK-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload
53+
; CHECK-NEXT: addi.d $sp, $sp, 16
54+
; CHECK-NEXT: ret
55+
entry:
56+
%0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr31}"()
57+
ret void
58+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
define void @test_w() nounwind {
5+
; CHECK-LABEL: test_w:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: #APP
8+
; CHECK-NEXT: vldi $vr0, 1
9+
; CHECK-NEXT: #NO_APP
10+
; CHECK-NEXT: ret
11+
entry:
12+
%0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"()
13+
ret void
14+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
define void @register_vr1() nounwind {
5+
; CHECK-LABEL: register_vr1:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: #APP
8+
; CHECK-NEXT: vldi $vr1, 1
9+
; CHECK-NEXT: #NO_APP
10+
; CHECK-NEXT: ret
11+
entry:
12+
%0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr1}"()
13+
ret void
14+
}
15+
16+
define void @register_vr7() nounwind {
17+
; CHECK-LABEL: register_vr7:
18+
; CHECK: # %bb.0: # %entry
19+
; CHECK-NEXT: #APP
20+
; CHECK-NEXT: vldi $vr7, 1
21+
; CHECK-NEXT: #NO_APP
22+
; CHECK-NEXT: ret
23+
entry:
24+
%0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr7}"()
25+
ret void
26+
}
27+
28+
define void @register_vr23() nounwind {
29+
; CHECK-LABEL: register_vr23:
30+
; CHECK: # %bb.0: # %entry
31+
; CHECK-NEXT: #APP
32+
; CHECK-NEXT: vldi $vr23, 1
33+
; CHECK-NEXT: #NO_APP
34+
; CHECK-NEXT: ret
35+
entry:
36+
%0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr23}"()
37+
ret void
38+
}
39+
40+
;; The lower half of the vector register '$vr31' is overlapped with
41+
;; the floating-point register '$f31'. And '$f31' is a callee-saved
42+
;; register which is preserved across calls. That's why the
43+
;; fst.d and fld.d instructions are emitted.
44+
define void @register_vr31() nounwind {
45+
; CHECK-LABEL: register_vr31:
46+
; CHECK: # %bb.0: # %entry
47+
; CHECK-NEXT: addi.d $sp, $sp, -16
48+
; CHECK-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill
49+
; CHECK-NEXT: #APP
50+
; CHECK-NEXT: vldi $vr31, 1
51+
; CHECK-NEXT: #NO_APP
52+
; CHECK-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload
53+
; CHECK-NEXT: addi.d $sp, $sp, 16
54+
; CHECK-NEXT: ret
55+
entry:
56+
%0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr31}"()
57+
ret void
58+
}

0 commit comments

Comments
 (0)