-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GlobalISel] Legalize G_LOAD for v4s8 Vector #82989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: None (chuongg3) ChangesLowers
Full diff: https://github.com/llvm/llvm-project/pull/82989.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 60e046bc6cf407..3a8962515590cd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -52,6 +52,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v16s8 = LLT::fixed_vector(16, 8);
const LLT v8s8 = LLT::fixed_vector(8, 8);
const LLT v4s8 = LLT::fixed_vector(4, 8);
+ const LLT v2s8 = LLT::fixed_vector(2, 8);
const LLT v8s16 = LLT::fixed_vector(8, 16);
const LLT v4s16 = LLT::fixed_vector(4, 16);
const LLT v2s16 = LLT::fixed_vector(2, 16);
@@ -387,8 +388,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, s32, 4)
.clampMaxNumElements(0, s64, 2)
.clampMaxNumElements(0, p0, 2)
+ // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
+ .bitcastIf(typeInSet(0, {v4s8}),
+ [=](const LegalityQuery &Query) {
+ const LLT VecTy = Query.Types[0];
+ return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
+ })
.customIf(IsPtrVecPred)
- .scalarizeIf(typeIs(0, v2s16), 0);
+ .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
getActionDefinitionsBuilder(G_STORE)
.customIf([=](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
new file mode 100644
index 00000000000000..3645e8d8f37db7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -0,0 +1,318 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; ===== Legal Scalars =====
+
+define i8 @load_i8(ptr %ptr){
+; CHECK-LABEL: load_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrb w0, [x0]
+; CHECK-NEXT: ret
+ %a = load i8 , ptr %ptr
+ ret i8 %a
+}
+
+define i16 @load_i16(ptr %ptr){
+; CHECK-LABEL: load_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w0, [x0]
+; CHECK-NEXT: ret
+ %a = load i16 , ptr %ptr
+ ret i16 %a
+}
+
+define i32 @load_i32(ptr %ptr){
+; CHECK-LABEL: load_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w0, [x0]
+; CHECK-NEXT: ret
+ %a = load i32 , ptr %ptr
+ ret i32 %a
+}
+
+define i64 @load_i64(ptr %ptr){
+; CHECK-LABEL: load_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x0, [x0]
+; CHECK-NEXT: ret
+ %a = load i64 , ptr %ptr
+ ret i64 %a
+}
+
+; ===== Legal Vector Types =====
+
+define <8 x i8> @load_v8i8(ptr %ptr){
+; CHECK-LABEL: load_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %ptr
+ ret <8 x i8> %a
+}
+
+define <16 x i8> @load_v16i8(ptr %ptr){
+; CHECK-LABEL: load_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, ptr %ptr
+ ret <16 x i8> %a
+}
+
+define <4 x i16> @load_v4i16(ptr %ptr){
+; CHECK-LABEL: load_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %ptr
+ ret <4 x i16> %a
+}
+
+define <8 x i16> @load_v8i16(ptr %ptr){
+; CHECK-LABEL: load_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, ptr %ptr
+ ret <8 x i16> %a
+}
+
+define <2 x i32> @load_v2i32(ptr %ptr){
+; CHECK-LABEL: load_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %ptr
+ ret <2 x i32> %a
+}
+
+define <4 x i32> @load_v4i32(ptr %ptr){
+; CHECK-LABEL: load_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, ptr %ptr
+ ret <4 x i32> %a
+}
+
+define <2 x i64> @load_v2i64(ptr %ptr){
+; CHECK-LABEL: load_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %a = load <2 x i64>, ptr %ptr
+ ret <2 x i64> %a
+}
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b){
+; CHECK-SD-LABEL: load_v2i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0]
+; CHECK-SD-NEXT: add x8, x0, #1
+; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v2i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr b0, [x0]
+; CHECK-GI-NEXT: ldr b1, [x0, #1]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+ %a = load <2 x i8>, ptr %ptr
+ ret <2 x i8> %a
+}
+
+define i32 @load_v4i8(ptr %ptr, <4 x i8> %b){
+; CHECK-LABEL: load_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr w0, [x0]
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %ptr
+ %c = bitcast <4 x i8> %a to i32
+ ret i32 %c
+}
+
+define <32 x i8> @load_v32i8(ptr %ptr){
+; CHECK-LABEL: load_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, ptr %ptr
+ ret <32 x i8> %a
+}
+
+define <2 x i16> @load_v2i16(ptr %ptr){
+; CHECK-SD-LABEL: load_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0]
+; CHECK-SD-NEXT: add x8, x0, #2
+; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+ %a = load <2 x i16>, ptr %ptr
+ ret <2 x i16> %a
+}
+
+define <16 x i16> @load_v16i16(ptr %ptr){
+; CHECK-LABEL: load_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, ptr %ptr
+ ret <16 x i16> %a
+}
+
+define <1 x i32> @load_v1i32(ptr %ptr){
+; CHECK-LABEL: load_v1i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %a = load <1 x i32>, ptr %ptr
+ ret <1 x i32> %a
+}
+
+define <8 x i32> @load_v8i32(ptr %ptr){
+; CHECK-LABEL: load_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, ptr %ptr
+ ret <8 x i32> %a
+}
+
+define <4 x i64> @load_v4i64(ptr %ptr){
+; CHECK-LABEL: load_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %a = load <4 x i64>, ptr %ptr
+ ret <4 x i64> %a
+}
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define <3 x i8> @load_v3i8(ptr %ptr){
+; CHECK-SD-LABEL: load_v3i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: umov w0, v0.b[0]
+; CHECK-SD-NEXT: umov w1, v0.b[1]
+; CHECK-SD-NEXT: umov w2, v0.b[2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v3i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldrb w8, [x0]
+; CHECK-GI-NEXT: ldrb w1, [x0, #1]
+; CHECK-GI-NEXT: ldrb w2, [x0, #2]
+; CHECK-GI-NEXT: mov w0, w8
+; CHECK-GI-NEXT: ret
+ %a = load <3 x i8>, ptr %ptr
+ ret <3 x i8> %a
+}
+
+define <7 x i8> @load_v7i8(ptr %ptr){
+; CHECK-SD-LABEL: load_v7i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v7i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr b0, [x0]
+; CHECK-GI-NEXT: ldr b1, [x0, #1]
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: ldr b1, [x0, #2]
+; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
+; CHECK-GI-NEXT: ldr b1, [x0, #3]
+; CHECK-GI-NEXT: mov v0.b[3], v1.b[0]
+; CHECK-GI-NEXT: ldr b1, [x0, #4]
+; CHECK-GI-NEXT: mov v0.b[4], v1.b[0]
+; CHECK-GI-NEXT: ldr b1, [x0, #5]
+; CHECK-GI-NEXT: mov v0.b[5], v1.b[0]
+; CHECK-GI-NEXT: ldr b1, [x0, #6]
+; CHECK-GI-NEXT: mov v0.b[6], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+ %a = load <7 x i8>, ptr %ptr
+ ret <7 x i8> %a
+}
+
+define <3 x i16> @load_v3i16(ptr %ptr){
+; CHECK-SD-LABEL: load_v3i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr d0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v3i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ldr h1, [x0, #4]
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+ %a = load <3 x i16>, ptr %ptr
+ ret <3 x i16> %a
+}
+
+define <7 x i16> @load_v7i16(ptr %ptr){
+; CHECK-SD-LABEL: load_v7i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v7i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ldr h1, [x0, #4]
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: ldr h1, [x0, #6]
+; CHECK-GI-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-NEXT: ldr h1, [x0, #8]
+; CHECK-GI-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NEXT: ldr h1, [x0, #10]
+; CHECK-GI-NEXT: mov v0.h[5], v1.h[0]
+; CHECK-GI-NEXT: ldr h1, [x0, #12]
+; CHECK-GI-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[7], v0.h[0]
+; CHECK-GI-NEXT: ret
+ %a = load <7 x i16>, ptr %ptr
+ ret <7 x i16> %a
+}
+
+define <3 x i32> @load_v3i32(ptr %ptr){
+; CHECK-SD-LABEL: load_v3i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: load_v3i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldp s0, s1, [x0]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: ldr s1, [x0, #8]
+; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
+ %a = load <3 x i32>, ptr %ptr
+ ret <3 x i32> %a
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, along with the G_STORE version.
The tests show there might be some opportunity to use larger loads for some of the odd-sized-vectors if we know they are aligned, but that is a separate issue from this.
llvm/test/CodeGen/AArch64/load.ll
Outdated
@@ -2,9 +2,6 @@ | |||
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD | |||
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can remove -global-isel-abort=2 and 2>&1 again if there are no fallbacks.
f432d91
to
15db1e6
Compare
Lowers `v4s8 = G_LOAD %ptr ptr` into `s32 = G_LOAD %ptr ptr` `v4s8 = G_BITCAST s32`
Lowers
v4s8 = G_LOAD %ptr ptr
intos32 = G_LOAD %ptr ptr
v4s8 = G_BITCAST s32