-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GlobalISel] Legalize G_STORE for v4s8 vector #82498
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: None (chuongg3) ChangesLowers
Full diff: https://github.com/llvm/llvm-project/pull/82498.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 60e046bc6cf407..db7515c2a064db 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -52,6 +52,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v16s8 = LLT::fixed_vector(16, 8);
const LLT v8s8 = LLT::fixed_vector(8, 8);
const LLT v4s8 = LLT::fixed_vector(4, 8);
+ const LLT v2s8 = LLT::fixed_vector(2, 8);
const LLT v8s16 = LLT::fixed_vector(8, 16);
const LLT v4s16 = LLT::fixed_vector(4, 16);
const LLT v2s16 = LLT::fixed_vector(2, 16);
@@ -422,8 +423,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, s64, 2)
.clampMaxNumElements(0, p0, 2)
.lowerIfMemSizeNotPow2()
+ // TODO: Use BITCAST for v2i8, v2i16
+ .customIf(typeInSet(0, {v4s8}))
.customIf(IsPtrVecPred)
- .scalarizeIf(typeIs(0, v2s16), 0);
+ .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0);
getActionDefinitionsBuilder(G_INDEXED_STORE)
// Idx 0 == Ptr, Idx 1 == Val
@@ -1599,6 +1602,18 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
Register ValReg = MI.getOperand(0).getReg();
const LLT ValTy = MRI.getType(ValReg);
+ // G_STORE v4s8, ptr => s32 = G_BITCAST v4s8
+ // G_STORE s32, ptr
+ if (ValTy.isVector() && ValTy.getNumElements() == 4 &&
+ ValTy.getScalarSizeInBits() == 8) {
+ Register MidReg =
+ MIRBuilder.buildBitcast(LLT::scalar(ValTy.getSizeInBits()), ValReg)
+ .getReg(0);
+ MI.getOperand(0).setReg(MidReg);
+ if (!ValTy.isPointerVector() || ValTy.getAddressSpace() != 0)
+ return true;
+ }
+
if (ValTy == LLT::scalar(128)) {
AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
diff --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll
new file mode 100644
index 00000000000000..788ac7913ddd50
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/store.ll
@@ -0,0 +1,321 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; ===== Legal Scalars =====
+define void @store_i8(i8 %a, ptr %ptr){
+; CHECK-LABEL: store_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: strb w0, [x1]
+; CHECK-NEXT: ret
+ store i8 %a, ptr %ptr
+ ret void
+}
+
+define void @store_i16(i16 %a, ptr %ptr){
+; CHECK-LABEL: store_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: strh w0, [x1]
+; CHECK-NEXT: ret
+ store i16 %a, ptr %ptr
+ ret void
+}
+
+define void @store_i32(i32 %a, ptr %ptr){
+; CHECK-LABEL: store_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str w0, [x1]
+; CHECK-NEXT: ret
+ store i32 %a, ptr %ptr
+ ret void
+}
+
+define void @store_i64(i64 %a, ptr %ptr){
+; CHECK-LABEL: store_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x0, [x1]
+; CHECK-NEXT: ret
+ store i64 %a, ptr %ptr
+ ret void
+}
+
+; ===== Legal Vector Types =====
+
+define void @store_v8i8(<8 x i8> %a, ptr %ptr){
+; CHECK-LABEL: store_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <8 x i8> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v16i8(<16 x i8> %a, ptr %ptr){
+; CHECK-LABEL: store_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <16 x i8> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v4i16(<4 x i16> %a, ptr %ptr){
+; CHECK-LABEL: store_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <4 x i16> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v8i16(<8 x i16> %a, ptr %ptr){
+; CHECK-LABEL: store_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <8 x i16> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v2i32(<2 x i32> %a, ptr %ptr){
+; CHECK-LABEL: store_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <2 x i32> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v4i32(<4 x i32> %a, ptr %ptr){
+; CHECK-LABEL: store_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <4 x i32> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v2i64(<2 x i64> %a, ptr %ptr){
+; CHECK-LABEL: store_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <2 x i64> %a, ptr %ptr
+ ret void
+}
+
+; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
+
+define void @store_v4i8(i32 %a, ptr %ptr) {
+; CHECK-LABEL: store_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str w0, [x1]
+; CHECK-NEXT: ret
+ %c = bitcast i32 %a to <4 x i8>
+ store <4 x i8> %c, ptr %ptr
+ ret void
+}
+
+define void @store_v32i8(<32 x i8> %a, ptr %ptr){
+; CHECK-LABEL: store_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ store <32 x i8> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v2i16(<2 x i16> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [x0]
+; CHECK-SD-NEXT: strh w8, [x0, #2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: str h0, [x0]
+; CHECK-GI-NEXT: str h1, [x0, #2]
+; CHECK-GI-NEXT: ret
+ store <2 x i16> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v16i16(<16 x i16> %a, ptr %ptr){
+; CHECK-LABEL: store_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ store <16 x i16> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v1i32(<1 x i32> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v1i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str s0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v1i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str s0, [x0]
+; CHECK-GI-NEXT: ret
+ store <1 x i32> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v8i32(<8 x i32> %a, ptr %ptr){
+; CHECK-LABEL: store_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ store <8 x i32> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v4i64(<4 x i64> %a, ptr %ptr){
+; CHECK-LABEL: store_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: ret
+ store <4 x i64> %a, ptr %ptr
+ ret void
+}
+
+; ===== Vectors with Non-Pow 2 Widths =====
+
+define void @store_v3i8(<3 x i8> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v3i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #16
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-SD-NEXT: str s0, [sp, #12]
+; CHECK-SD-NEXT: ldrh w8, [sp, #12]
+; CHECK-SD-NEXT: strb w2, [x3, #2]
+; CHECK-SD-NEXT: strh w8, [x3]
+; CHECK-SD-NEXT: add sp, sp, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v3i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: strb w0, [x3]
+; CHECK-GI-NEXT: strb w1, [x3, #1]
+; CHECK-GI-NEXT: strb w2, [x3, #2]
+; CHECK-GI-NEXT: ret
+ store <3 x i8> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v7i8(<7 x i8> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v7i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add x8, x0, #6
+; CHECK-SD-NEXT: add x9, x0, #4
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str s0, [x0]
+; CHECK-SD-NEXT: st1 { v0.b }[6], [x8]
+; CHECK-SD-NEXT: st1 { v0.h }[2], [x9]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v7i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add x8, x0, #1
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: add x9, x0, #2
+; CHECK-GI-NEXT: st1 { v0.b }[0], [x0]
+; CHECK-GI-NEXT: st1 { v0.b }[1], [x8]
+; CHECK-GI-NEXT: add x8, x0, #3
+; CHECK-GI-NEXT: st1 { v0.b }[3], [x8]
+; CHECK-GI-NEXT: add x8, x0, #4
+; CHECK-GI-NEXT: st1 { v0.b }[4], [x8]
+; CHECK-GI-NEXT: add x8, x0, #5
+; CHECK-GI-NEXT: st1 { v0.b }[5], [x8]
+; CHECK-GI-NEXT: add x8, x0, #6
+; CHECK-GI-NEXT: st1 { v0.b }[2], [x9]
+; CHECK-GI-NEXT: st1 { v0.b }[6], [x8]
+; CHECK-GI-NEXT: ret
+ store <7 x i8> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v3i16(<3 x i16> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v3i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add x8, x0, #4
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str s0, [x0]
+; CHECK-SD-NEXT: st1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v3i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add x8, x0, #2
+; CHECK-GI-NEXT: add x9, x0, #4
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: str h0, [x0]
+; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
+; CHECK-GI-NEXT: st1 { v0.h }[2], [x9]
+; CHECK-GI-NEXT: ret
+ store <3 x i16> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v7i16(<7 x i16> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v7i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add x8, x0, #12
+; CHECK-SD-NEXT: add x9, x0, #8
+; CHECK-SD-NEXT: str d0, [x0]
+; CHECK-SD-NEXT: st1 { v0.h }[6], [x8]
+; CHECK-SD-NEXT: st1 { v0.s }[2], [x9]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v7i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add x8, x0, #2
+; CHECK-GI-NEXT: add x9, x0, #4
+; CHECK-GI-NEXT: str h0, [x0]
+; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
+; CHECK-GI-NEXT: add x8, x0, #6
+; CHECK-GI-NEXT: st1 { v0.h }[3], [x8]
+; CHECK-GI-NEXT: add x8, x0, #8
+; CHECK-GI-NEXT: st1 { v0.h }[4], [x8]
+; CHECK-GI-NEXT: add x8, x0, #10
+; CHECK-GI-NEXT: st1 { v0.h }[5], [x8]
+; CHECK-GI-NEXT: add x8, x0, #12
+; CHECK-GI-NEXT: st1 { v0.h }[2], [x9]
+; CHECK-GI-NEXT: st1 { v0.h }[6], [x8]
+; CHECK-GI-NEXT: ret
+ store <7 x i16> %a, ptr %ptr
+ ret void
+}
+
+define void @store_v3i32(<3 x i32> %a, ptr %ptr){
+; CHECK-SD-LABEL: store_v3i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add x8, x0, #8
+; CHECK-SD-NEXT: str d0, [x0]
+; CHECK-SD-NEXT: st1 { v0.s }[2], [x8]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: store_v3i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add x8, x0, #4
+; CHECK-GI-NEXT: add x9, x0, #8
+; CHECK-GI-NEXT: str s0, [x0]
+; CHECK-GI-NEXT: st1 { v0.s }[1], [x8]
+; CHECK-GI-NEXT: st1 { v0.s }[2], [x9]
+; CHECK-GI-NEXT: ret
+ store <3 x i32> %a, ptr %ptr
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you check if this is endian-safe?
@@ -1599,6 +1602,18 @@ bool AArch64LegalizerInfo::legalizeLoadStore( | |||
Register ValReg = MI.getOperand(0).getReg(); | |||
const LLT ValTy = MRI.getType(ValReg); | |||
|
|||
// G_STORE v4s8, ptr => s32 = G_BITCAST v4s8 | |||
// G_STORE s32, ptr | |||
if (ValTy.isVector() && ValTy.getNumElements() == 4 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you're just checking for a specific type you can do if (ValTy == LLT::fixed_vector(4, 8))
I believe that as a bitcast is defined as a store+load, we end up with As far as I understand, Chuong was having a go at changing this to use bitcastIf instead of doing it in a custom method. |
38a6d39
to
c501011
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. This LGTM
llvm/test/CodeGen/AArch64/store.ll
Outdated
@@ -2,9 +2,6 @@ | |||
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD | |||
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If there is no longer any fallbacks, this could remove the -global-isel-abort=2 (and 2>&1). That might be easier to do if you land the initial tests separately
c501011
to
420b339
Compare
…602bc789c Local branch amd-gfx b0a602b Temporarily XFAIL tests Remote branch main ba69230 [AArch64][GlobalISel] Pre-Commit Test for G_STORE v4s8 (llvm#82498)
Lowers
G_STORE v4s8, ptr
intos32 = G_BITCAST v4s8
G_STORE s32, ptr