Skip to content

Commit fb8f591

Browse files
committed
[AArch64][GlobalISel] Adopt dup(load) -> LD1R patterns from SelectionDAG
Follow-up of #65630.
1 parent de018f5 commit fb8f591

File tree

4 files changed

+31
-10
lines changed

4 files changed

+31
-10
lines changed

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,3 +496,20 @@ let AddedComplexity = 19 in {
496496
defm : VecROStoreLane64_0Pat<ro16, store, v4i16, i16, hsub, STRHroW, STRHroX>;
497497
defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
498498
}
499+
500+
def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
501+
(LD1Rv8b GPR64sp:$Rn)>;
502+
def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
503+
(LD1Rv16b GPR64sp:$Rn)>;
504+
def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
505+
(LD1Rv4h GPR64sp:$Rn)>;
506+
def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
507+
(LD1Rv8h GPR64sp:$Rn)>;
508+
def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
509+
(LD1Rv2s GPR64sp:$Rn)>;
510+
def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
511+
(LD1Rv4s GPR64sp:$Rn)>;
512+
def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
513+
(LD1Rv2d GPR64sp:$Rn)>;
514+
def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
515+
(LD1Rv1d GPR64sp:$Rn)>;

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13794,8 +13794,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
1379413794
;
1379513795
; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r:
1379613796
; CHECK-GISEL: ; %bb.0:
13797-
; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], #1
13798-
; CHECK-GISEL-NEXT: str x0, [x1]
13797+
; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
13798+
; CHECK-GISEL-NEXT: add x8, x0, #1
13799+
; CHECK-GISEL-NEXT: str x8, [x1]
1379913800
; CHECK-GISEL-NEXT: ret
1380013801
%tmp1 = load i8, ptr %bar
1380113802
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13828,8 +13829,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
1382813829
;
1382913830
; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1r:
1383013831
; CHECK-GISEL: ; %bb.0:
13831-
; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], x2
13832-
; CHECK-GISEL-NEXT: str x0, [x1]
13832+
; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
13833+
; CHECK-GISEL-NEXT: add x8, x0, x2
13834+
; CHECK-GISEL-NEXT: str x8, [x1]
1383313835
; CHECK-GISEL-NEXT: ret
1383413836
%tmp1 = load i8, ptr %bar
1383513837
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13862,8 +13864,9 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
1386213864
;
1386313865
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r:
1386413866
; CHECK-GISEL: ; %bb.0:
13865-
; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], #1
13866-
; CHECK-GISEL-NEXT: str x0, [x1]
13867+
; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
13868+
; CHECK-GISEL-NEXT: add x8, x0, #1
13869+
; CHECK-GISEL-NEXT: str x8, [x1]
1386713870
; CHECK-GISEL-NEXT: ret
1386813871
%tmp1 = load i8, ptr %bar
1386913872
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13888,8 +13891,9 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
1388813891
;
1388913892
; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1r:
1389013893
; CHECK-GISEL: ; %bb.0:
13891-
; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], x2
13892-
; CHECK-GISEL-NEXT: str x0, [x1]
13894+
; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
13895+
; CHECK-GISEL-NEXT: add x8, x0, x2
13896+
; CHECK-GISEL-NEXT: str x8, [x1]
1389313897
; CHECK-GISEL-NEXT: ret
1389413898
%tmp1 = load i8, ptr %bar
1389513899
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0

llvm/test/CodeGen/AArch64/arm64-ld1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
22
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
3+
; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
66
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }

llvm/test/CodeGen/AArch64/arm64-st1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2-
; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2+
; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
33
; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check.
44
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s
55

0 commit comments

Comments
 (0)