-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel][AArch64] Generate ptrtoint/inttoptr as opposed to bitcast in unmerge combine. #115225
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesWhen combining unmerge we could end up with ptr to i64 bitcasts. Make sure they are created as ptrtoint/inttoptr instead. Some of the test are still disabled as they hit other issues. Full diff: https://github.com/llvm/llvm-project/pull/115225.diff 2 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 471a7f70dd546c..9dea4c1b412dbb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -1218,8 +1218,14 @@ class LegalizationArtifactCombiner {
} else {
LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
- if (!ConvertOp && DestTy != MergeSrcTy)
- ConvertOp = TargetOpcode::G_BITCAST;
+ if (!ConvertOp && DestTy != MergeSrcTy) {
+ if (DestTy.isPointer())
+ ConvertOp = TargetOpcode::G_INTTOPTR;
+ else if (MergeSrcTy.isPointer())
+ ConvertOp = TargetOpcode::G_PTRTOINT;
+ else
+ ConvertOp = TargetOpcode::G_BITCAST;
+ }
if (ConvertOp) {
Builder.setInstr(MI);
diff --git a/llvm/test/CodeGen/AArch64/getelementptr.ll b/llvm/test/CodeGen/AArch64/getelementptr.ll
new file mode 100644
index 00000000000000..756bc6b3fb8b8a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/getelementptr.ll
@@ -0,0 +1,454 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define ptr @s(ptr %p, i32 %q) {
+; CHECK-LABEL: s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x0, x0, w1, sxtw #2
+; CHECK-NEXT: ret
+ %d = getelementptr i32, ptr %p, i32 %q
+ ret ptr %d
+}
+
+define <2 x ptr> @v2(<2 x ptr> %p, i32 %q) {
+; CHECK-SD-LABEL: v2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2s, w0
+; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #2
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v1.2s, w0
+; CHECK-GI-NEXT: adrp x8, .LCPI1_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: mov x11, v2.d[1]
+; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov x10, v1.d[1]
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v1.d[0], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, <2 x ptr> %p, i32 %q
+ ret <2 x ptr> %d
+}
+
+define <3 x ptr> @v3(<3 x ptr> %p, i32 %q) {
+; CHECK-SD-LABEL: v3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v3.2s, w0
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov w8, #2 // =0x2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: sshll v1.2d, v3.2s, #2
+; CHECK-SD-NEXT: fmov s3, w0
+; CHECK-SD-NEXT: sshll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: fmov d1, x8
+; CHECK-SD-NEXT: ushl d3, d3, d1
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: add d2, d2, d3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: sxtw x9, w0
+; CHECK-GI-NEXT: adrp x8, .LCPI2_0
+; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI2_0]
+; CHECK-GI-NEXT: dup v3.2d, x9
+; CHECK-GI-NEXT: fmov x9, d4
+; CHECK-GI-NEXT: mov x11, v4.d[1]
+; CHECK-GI-NEXT: fmov x8, d3
+; CHECK-GI-NEXT: mov x10, v3.d[1]
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: mov v3.d[0], x10
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: mov v3.d[1], x8
+; CHECK-GI-NEXT: mov w8, #4 // =0x4
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: smaddl x8, w0, w8, x9
+; CHECK-GI-NEXT: add v0.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, <3 x ptr> %p, i32 %q
+ ret <3 x ptr> %d
+}
+
+;define <4 x ptr> @v4(<4 x ptr> %p, i32 %q) {
+; %d = getelementptr i32, <4 x ptr> %p, i32 %q
+; ret <4 x ptr> %d
+;}
+
+define <2 x ptr> @v2b(ptr %p, <2 x i32> %q) {
+; CHECK-SD-LABEL: v2b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #2
+; CHECK-SD-NEXT: dup v1.2d, x0
+; CHECK-SD-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI3_0
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: dup v1.2d, x0
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, ptr %p, <2 x i32> %q
+ ret <2 x ptr> %d
+}
+
+define <3 x ptr> @v3b(ptr %p, <3 x i32> %q) {
+; CHECK-SD-LABEL: v3b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2d, x0
+; CHECK-SD-NEXT: sshll v2.2d, v0.2s, #2
+; CHECK-SD-NEXT: mov w8, #2 // =0x2
+; CHECK-SD-NEXT: sshll2 v3.2d, v0.4s, #0
+; CHECK-SD-NEXT: add v0.2d, v1.2d, v2.2d
+; CHECK-SD-NEXT: fmov d1, x8
+; CHECK-SD-NEXT: ushl d2, d3, d1
+; CHECK-SD-NEXT: fmov d3, x0
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: add d2, d3, d2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: smov x9, v0.s[0]
+; CHECK-GI-NEXT: smov x10, v0.s[1]
+; CHECK-GI-NEXT: adrp x8, .LCPI4_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
+; CHECK-GI-NEXT: mov x11, v2.d[1]
+; CHECK-GI-NEXT: mov v1.d[0], x9
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: dup v2.2d, x0
+; CHECK-GI-NEXT: mov v1.d[1], x10
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov x10, v1.d[1]
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v1.d[0], x8
+; CHECK-GI-NEXT: mov w8, v0.s[2]
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov w9, #4 // =0x4
+; CHECK-GI-NEXT: smaddl x8, w8, w9, x0
+; CHECK-GI-NEXT: add v1.2d, v2.2d, v1.2d
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov d0, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov d0, d1
+; CHECK-GI-NEXT: fmov d1, x10
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, ptr %p, <3 x i32> %q
+ ret <3 x ptr> %d
+}
+
+;define <4 x ptr> @v4b(ptr %p, <4 x i32> %q) {
+; %d = getelementptr i32, ptr %p, <4 x i32> %q
+; ret <4 x ptr> %d
+;}
+
+
+define ptr @s_10(ptr %p, i32 %q) {
+; CHECK-LABEL: s_10:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x0, x0, #40
+; CHECK-NEXT: ret
+ %d = getelementptr i32, ptr %p, i32 10
+ ret ptr %d
+}
+
+define <2 x ptr> @v2_10(<2 x ptr> %p, i32 %q) {
+; CHECK-SD-LABEL: v2_10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #40 // =0x28
+; CHECK-SD-NEXT: dup v1.2d, x8
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2_10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI6_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, <2 x ptr> %p, i32 10
+ ret <2 x ptr> %d
+}
+
+define <3 x ptr> @v3_10(<3 x ptr> %p, i32 %q) {
+; CHECK-SD-LABEL: v3_10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov w8, #40 // =0x28
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: dup v3.2d, x8
+; CHECK-SD-NEXT: add d2, d2, d3
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3_10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: adrp x8, .LCPI7_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: add x8, x8, #40
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, <3 x ptr> %p, i32 10
+ ret <3 x ptr> %d
+}
+
+;define <4 x ptr> @v4_10(<4 x ptr> %p, i32 %q) {
+; %d = getelementptr i32, <4 x ptr> %p, i32 10
+; ret <4 x ptr> %d
+;}
+
+define <2 x ptr> @v2b_10(ptr %p, <2 x i32> %q) {
+; CHECK-SD-LABEL: v2b_10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #40 // =0x28
+; CHECK-SD-NEXT: dup v0.2d, x0
+; CHECK-SD-NEXT: dup v1.2d, x8
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2b_10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2s, #10
+; CHECK-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: dup v1.2d, x0
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, ptr %p, <2 x i32> <i32 10, i32 10>
+ ret <2 x ptr> %d
+}
+
+define <3 x ptr> @v3b_10(ptr %p, <3 x i32> %q) {
+; CHECK-SD-LABEL: v3b_10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #40 // =0x28
+; CHECK-SD-NEXT: dup v0.2d, x0
+; CHECK-SD-NEXT: fmov d3, x0
+; CHECK-SD-NEXT: dup v2.2d, x8
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: add d2, d3, d2
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3b_10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI9_1
+; CHECK-GI-NEXT: adrp x9, .LCPI9_0
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI9_1]
+; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: dup v1.2d, x0
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: add x8, x0, #40
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, ptr %p, <3 x i32> <i32 10, i32 10, i32 10>
+ ret <3 x ptr> %d
+}
+
+;define <4 x ptr> @v4b_10(ptr %p, <4 x i32> %q) {
+; %d = getelementptr i32, ptr %p, <4 x i32> <i32 10, i32 10, i32 10, i32 10>
+; ret <4 x ptr> %d
+;}
+
+
+define ptr @s_m10(ptr %p, i32 %q) {
+; CHECK-LABEL: s_m10:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub x0, x0, #40
+; CHECK-NEXT: ret
+ %d = getelementptr i32, ptr %p, i32 -10
+ ret ptr %d
+}
+
+define <2 x ptr> @v2_m10(<2 x ptr> %p, i32 %q) {
+; CHECK-SD-LABEL: v2_m10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #-40 // =0xffffffffffffffd8
+; CHECK-SD-NEXT: dup v1.2d, x8
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2_m10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI11_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, <2 x ptr> %p, i32 -10
+ ret <2 x ptr> %d
+}
+
+define <3 x ptr> @v3_m10(<3 x ptr> %p, i32 %q) {
+; CHECK-SD-LABEL: v3_m10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov x8, #-40 // =0xffffffffffffffd8
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: dup v3.2d, x8
+; CHECK-SD-NEXT: add d2, d2, d3
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3_m10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: adrp x8, .LCPI12_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: sub x8, x8, #40
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, <3 x ptr> %p, i32 -10
+ ret <3 x ptr> %d
+}
+
+;define <4 x ptr> @v4_m10(<4 x ptr> %p, i32 %q) {
+; %d = getelementptr i32, <4 x ptr> %p, i32 -10
+; ret <4 x ptr> %d
+;}
+
+define <2 x ptr> @v2b_m10(ptr %p, <2 x i32> %q) {
+; CHECK-SD-LABEL: v2b_m10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #-40 // =0xffffffffffffffd8
+; CHECK-SD-NEXT: dup v1.2d, x0
+; CHECK-SD-NEXT: dup v0.2d, x8
+; CHECK-SD-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2b_m10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvni v0.2s, #9
+; CHECK-GI-NEXT: adrp x8, .LCPI13_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: dup v1.2d, x0
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, ptr %p, <2 x i32> <i32 -10, i32 -10>
+ ret <2 x ptr> %d
+}
+
+define <3 x ptr> @v3b_m10(ptr %p, <3 x i32> %q) {
+; CHECK-SD-LABEL: v3b_m10:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #-40 // =0xffffffffffffffd8
+; CHECK-SD-NEXT: dup v0.2d, x0
+; CHECK-SD-NEXT: fmov d3, x0
+; CHECK-SD-NEXT: dup v2.2d, x8
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: add d2, d3, d2
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v3b_m10:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI14_1
+; CHECK-GI-NEXT: adrp x9, .LCPI14_0
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_1]
+; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: dup v1.2d, x0
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: mul x9, x10, x11
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: sub x8, x0, #40
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: add v0.2d, v1.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ret
+ %d = getelementptr i32, ptr %p, <3 x i32> <i32 -10, i32 -10, i32 -10>
+ ret <3 x ptr> %d
+}
+
+;define <4 x ptr> @v4b_m10(ptr %p, <4 x i32> %q) {
+; %d = getelementptr i32, ptr %p, <4 x i32> <i32 -10, i32 -10, i32 -10, i32 -10>
+; ret <4 x ptr> %d
+;}
|
You mean existing disabled tests, hence they're unchanged by this commit? |
Oh I see the commented out test now. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we just not have coverage for this elsewhere in the AArch64 tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh It turns out I've been here before and already added a ptradd.ll test. I'll switch to that one.
@@ -0,0 +1,454 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use explicit -global-isel=0 for dag tests
if (DestTy.isPointer()) | ||
ConvertOp = TargetOpcode::G_INTTOPTR; | ||
else if (MergeSrcTy.isPointer()) | ||
ConvertOp = TargetOpcode::G_PTRTOINT; | ||
else | ||
ConvertOp = TargetOpcode::G_BITCAST; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I swear we have this exact code somewhere already
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah - there is some code in MachineIRBuilder::buildCast that does something quite similar
if (!ConvertOp && DestTy != MergeSrcTy) | ||
ConvertOp = TargetOpcode::G_BITCAST; | ||
if (!ConvertOp && DestTy != MergeSrcTy) { | ||
if (DestTy.isPointer()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mir test for this?
; CHECK-GI-NEXT: mov v1.d[1], x9 | ||
; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d | ||
; CHECK-GI-NEXT: ret | ||
%d = getelementptr i32, <2 x ptr> %p, i32 %q |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The generated code is pretty suboptimal compared to SDAG. Just curious, do you know why?
;define <4 x ptr> @v4b_m10(ptr %p, <4 x i32> %q) { | ||
; %d = getelementptr i32, ptr %p, <4 x i32> <i32 -10, i32 -10, i32 -10, i32 -10> | ||
; ret <4 x ptr> %d | ||
;} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove this code?
…t in unmerge combine. When combining unmerge we could end up with ptr to i64 bitcasts. Make sure they are created as ptrtoint/inttoptr instead.
62da62d
to
08d7411
Compare
…t in unmerge combine. (llvm#115225) When combining unmerge we could end up with ptr to i64 bitcasts. Make sure they are created as ptrtoint/inttoptr instead.
When combining unmerge we could end up with ptr to i64 bitcasts. Make sure they are created as ptrtoint/inttoptr instead.