Skip to content

Commit af56c4a

Browse files
committed
[AArch64] Add an aarch64-enable-ext-to-tbl option. NFC
This transform has caused a few issues with operations that can naturally be extended. This patch just adds a debug option for disabling the transform, useful for testing cases where it might not be profitable.
1 parent 6bad175 commit af56c4a

File tree

2 files changed

+236
-17
lines changed

2 files changed

+236
-17
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
131131
"gather intrinsics"),
132132
cl::init(true));
133133

134+
static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
135+
cl::desc("Combine ext and trunc to TBL"),
136+
cl::init(true));
137+
134138
// All of the XOR, OR and CMP use ALU ports, and data dependency will become the
135139
// bottleneck after this transform on high end CPU. So this max leaf node
136140
// limitation is guard cmp+ccmp will be profitable.
@@ -14791,7 +14795,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
1479114795
Instruction *I, Loop *L, const TargetTransformInfo &TTI) const {
1479214796
// shuffle_vector instructions are serialized when targeting SVE,
1479314797
// see LowerSPLAT_VECTOR. This peephole is not beneficial.
14794-
if (Subtarget->useSVEForFixedLengthVectors())
14798+
if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
1479514799
return false;
1479614800

1479714801
// Try to optimize conversions using tbl. This requires materializing constant

llvm/test/CodeGen/AArch64/trunc-to-tbl.ll

Lines changed: 231 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
33
; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
4+
; RUN: llc -mtriple=aarch64_be-unknown-linux -aarch64-enable-ext-to-tbl=false -o - %s | FileCheck --check-prefix=CHECK-DISABLE %s
45

56
; CHECK-LABEL: lCPI0_0:
67
; CHECK-NEXT: .byte 0 ; 0x0
@@ -85,7 +86,30 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
8586
; CHECK-BE-NEXT: b.eq .LBB0_1
8687
; CHECK-BE-NEXT: // %bb.2: // %exit
8788
; CHECK-BE-NEXT: ret
88-
89+
;
90+
; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop:
91+
; CHECK-DISABLE: // %bb.0: // %entry
92+
; CHECK-DISABLE-NEXT: mov x8, xzr
93+
; CHECK-DISABLE-NEXT: .LBB0_1: // %loop
94+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
95+
; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
96+
; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
97+
; CHECK-DISABLE-NEXT: add x10, x9, #16
98+
; CHECK-DISABLE-NEXT: add x11, x9, #48
99+
; CHECK-DISABLE-NEXT: add x9, x9, #32
100+
; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
101+
; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x11]
102+
; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x9]
103+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
104+
; CHECK-DISABLE-NEXT: add x8, x8, #1
105+
; CHECK-DISABLE-NEXT: cmp x8, #1000
106+
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
107+
; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
108+
; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
109+
; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
110+
; CHECK-DISABLE-NEXT: b.eq .LBB0_1
111+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
112+
; CHECK-DISABLE-NEXT: ret
89113
entry:
90114
br label %loop
91115

@@ -131,6 +155,21 @@ define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
131155
; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
132156
; CHECK-BE-NEXT: st1 { v0.16b }, [x1]
133157
; CHECK-BE-NEXT: ret
158+
;
159+
; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_no_loop:
160+
; CHECK-DISABLE: // %bb.0: // %entry
161+
; CHECK-DISABLE-NEXT: add x8, x0, #16
162+
; CHECK-DISABLE-NEXT: add x9, x0, #48
163+
; CHECK-DISABLE-NEXT: add x10, x0, #32
164+
; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0]
165+
; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x8]
166+
; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x9]
167+
; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x10]
168+
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
169+
; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
170+
; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
171+
; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1]
172+
; CHECK-DISABLE-NEXT: ret
134173
entry:
135174
%l.A = load <16 x i32>, ptr %A
136175
%trunc = trunc <16 x i32> %l.A to <16 x i8>
@@ -216,7 +255,25 @@ define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
216255
; CHECK-BE-NEXT: b.eq .LBB2_1
217256
; CHECK-BE-NEXT: // %bb.2: // %exit
218257
; CHECK-BE-NEXT: ret
219-
258+
;
259+
; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop:
260+
; CHECK-DISABLE: // %bb.0: // %entry
261+
; CHECK-DISABLE-NEXT: mov x8, xzr
262+
; CHECK-DISABLE-NEXT: .LBB2_1: // %loop
263+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
264+
; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
265+
; CHECK-DISABLE-NEXT: add x10, x9, #16
266+
; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
267+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
268+
; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
269+
; CHECK-DISABLE-NEXT: add x8, x8, #1
270+
; CHECK-DISABLE-NEXT: cmp x8, #1000
271+
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
272+
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
273+
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
274+
; CHECK-DISABLE-NEXT: b.eq .LBB2_1
275+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
276+
; CHECK-DISABLE-NEXT: ret
220277
entry:
221278
br label %loop
222279

@@ -330,8 +387,42 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
330387
; CHECK-BE-NEXT: b.eq .LBB3_1
331388
; CHECK-BE-NEXT: // %bb.2: // %exit
332389
; CHECK-BE-NEXT: ret
333-
334-
390+
;
391+
; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop:
392+
; CHECK-DISABLE: // %bb.0: // %entry
393+
; CHECK-DISABLE-NEXT: mov x8, xzr
394+
; CHECK-DISABLE-NEXT: .LBB3_1: // %loop
395+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
396+
; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #7
397+
; CHECK-DISABLE-NEXT: add x10, x9, #16
398+
; CHECK-DISABLE-NEXT: add x11, x9, #48
399+
; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
400+
; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
401+
; CHECK-DISABLE-NEXT: add x10, x9, #112
402+
; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
403+
; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x10]
404+
; CHECK-DISABLE-NEXT: add x10, x9, #96
405+
; CHECK-DISABLE-NEXT: add x11, x9, #32
406+
; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
407+
; CHECK-DISABLE-NEXT: add x10, x9, #80
408+
; CHECK-DISABLE-NEXT: add x9, x9, #64
409+
; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x11]
410+
; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x10]
411+
; CHECK-DISABLE-NEXT: ld1 { v7.2d }, [x9]
412+
; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
413+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
414+
; CHECK-DISABLE-NEXT: add x8, x8, #1
415+
; CHECK-DISABLE-NEXT: uzp1 v3.4s, v4.4s, v3.4s
416+
; CHECK-DISABLE-NEXT: cmp x8, #1000
417+
; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v6.4s
418+
; CHECK-DISABLE-NEXT: uzp1 v2.4s, v5.4s, v2.4s
419+
; CHECK-DISABLE-NEXT: uzp1 v1.8h, v4.8h, v3.8h
420+
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
421+
; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
422+
; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
423+
; CHECK-DISABLE-NEXT: b.eq .LBB3_1
424+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
425+
; CHECK-DISABLE-NEXT: ret
335426
entry:
336427
br label %loop
337428

@@ -431,8 +522,31 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
431522
; CHECK-BE-NEXT: b.eq .LBB4_1
432523
; CHECK-BE-NEXT: // %bb.2: // %exit
433524
; CHECK-BE-NEXT: ret
434-
435-
525+
;
526+
; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop:
527+
; CHECK-DISABLE: // %bb.0: // %entry
528+
; CHECK-DISABLE-NEXT: mov x8, xzr
529+
; CHECK-DISABLE-NEXT: .LBB4_1: // %loop
530+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
531+
; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
532+
; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
533+
; CHECK-DISABLE-NEXT: add x10, x9, #16
534+
; CHECK-DISABLE-NEXT: add x11, x9, #48
535+
; CHECK-DISABLE-NEXT: add x9, x9, #32
536+
; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
537+
; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
538+
; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x9]
539+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
540+
; CHECK-DISABLE-NEXT: add x8, x8, #1
541+
; CHECK-DISABLE-NEXT: cmp x8, #1000
542+
; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
543+
; CHECK-DISABLE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
544+
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
545+
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
546+
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
547+
; CHECK-DISABLE-NEXT: b.eq .LBB4_1
548+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
549+
; CHECK-DISABLE-NEXT: ret
436550
entry:
437551
br label %loop
438552

@@ -529,7 +643,48 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
529643
; CHECK-BE-NEXT: b.eq .LBB5_1
530644
; CHECK-BE-NEXT: // %bb.2: // %exit
531645
; CHECK-BE-NEXT: ret
532-
646+
;
647+
; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop:
648+
; CHECK-DISABLE: // %bb.0: // %entry
649+
; CHECK-DISABLE-NEXT: mov x8, xzr
650+
; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
651+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
652+
; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
653+
; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18]
654+
; CHECK-DISABLE-NEXT: lsr x11, x9, #40
655+
; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7
656+
; CHECK-DISABLE-NEXT: lsr x15, x10, #45
657+
; CHECK-DISABLE-NEXT: lsr x13, x10, #40
658+
; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14
659+
; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7
660+
; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16]
661+
; CHECK-DISABLE-NEXT: fmov s0, w15
662+
; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14
663+
; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18
664+
; CHECK-DISABLE-NEXT: add x0, x0, #32
665+
; CHECK-DISABLE-NEXT: fmov s1, w11
666+
; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8
667+
; CHECK-DISABLE-NEXT: lsl x12, x9, #24
668+
; CHECK-DISABLE-NEXT: mov v0.s[1], w13
669+
; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
670+
; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
671+
; CHECK-DISABLE-NEXT: orr w12, w11, w12
672+
; CHECK-DISABLE-NEXT: mov v1.s[1], w14
673+
; CHECK-DISABLE-NEXT: lsr w12, w12, #19
674+
; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
675+
; CHECK-DISABLE-NEXT: mov v0.s[2], w13
676+
; CHECK-DISABLE-NEXT: mov v1.s[2], w12
677+
; CHECK-DISABLE-NEXT: mov v0.s[3], w9
678+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
679+
; CHECK-DISABLE-NEXT: add x8, x8, #1
680+
; CHECK-DISABLE-NEXT: cmp x8, #1000
681+
; CHECK-DISABLE-NEXT: mov v1.s[3], w11
682+
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
683+
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
684+
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
685+
; CHECK-DISABLE-NEXT: b.eq .LBB5_1
686+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
687+
; CHECK-DISABLE-NEXT: ret
533688
entry:
534689
br label %loop
535690

@@ -610,7 +765,41 @@ define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) {
610765
; CHECK-BE-NEXT: b.eq .LBB6_1
611766
; CHECK-BE-NEXT: // %bb.2: // %exit
612767
; CHECK-BE-NEXT: ret
613-
768+
;
769+
; CHECK-DISABLE-LABEL: trunc_v11i64_to_v11i8_in_loop:
770+
; CHECK-DISABLE: // %bb.0: // %entry
771+
; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
772+
; CHECK-DISABLE-NEXT: .LBB6_1: // %loop
773+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
774+
; CHECK-DISABLE-NEXT: add x9, x0, #64
775+
; CHECK-DISABLE-NEXT: add x10, x0, #16
776+
; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x0]
777+
; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
778+
; CHECK-DISABLE-NEXT: add x9, x0, #48
779+
; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
780+
; CHECK-DISABLE-NEXT: add x10, x0, #32
781+
; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x9]
782+
; CHECK-DISABLE-NEXT: ldr d5, [x0, #80]
783+
; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
784+
; CHECK-DISABLE-NEXT: add x9, x1, #10
785+
; CHECK-DISABLE-NEXT: subs x8, x8, #1
786+
; CHECK-DISABLE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
787+
; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v5.4s
788+
; CHECK-DISABLE-NEXT: add x0, x0, #128
789+
; CHECK-DISABLE-NEXT: uzp1 v2.4s, v4.4s, v2.4s
790+
; CHECK-DISABLE-NEXT: xtn v0.4h, v0.4s
791+
; CHECK-DISABLE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
792+
; CHECK-DISABLE-NEXT: uzp1 v1.16b, v1.16b, v0.16b
793+
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
794+
; CHECK-DISABLE-NEXT: rev16 v2.16b, v1.16b
795+
; CHECK-DISABLE-NEXT: rev64 v1.16b, v1.16b
796+
; CHECK-DISABLE-NEXT: st1 { v0.b }[2], [x9]
797+
; CHECK-DISABLE-NEXT: add x9, x1, #8
798+
; CHECK-DISABLE-NEXT: st1 { v2.h }[4], [x9]
799+
; CHECK-DISABLE-NEXT: str d1, [x1], #16
800+
; CHECK-DISABLE-NEXT: b.eq .LBB6_1
801+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
802+
; CHECK-DISABLE-NEXT: ret
614803
entry:
615804
br label %loop
616805

@@ -662,10 +851,24 @@ define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) {
662851
; CHECK-BE-NEXT: b.eq .LBB7_1
663852
; CHECK-BE-NEXT: // %bb.2: // %exit
664853
; CHECK-BE-NEXT: ret
665-
666-
667-
668-
854+
;
855+
; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop:
856+
; CHECK-DISABLE: // %bb.0: // %entry
857+
; CHECK-DISABLE-NEXT: mov x8, xzr
858+
; CHECK-DISABLE-NEXT: .LBB7_1: // %loop
859+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
860+
; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
861+
; CHECK-DISABLE-NEXT: add x10, x9, #16
862+
; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
863+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
864+
; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x10]
865+
; CHECK-DISABLE-NEXT: add x8, x8, #1
866+
; CHECK-DISABLE-NEXT: cmp x8, #1000
867+
; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
868+
; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
869+
; CHECK-DISABLE-NEXT: b.eq .LBB7_1
870+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
871+
; CHECK-DISABLE-NEXT: ret
669872
entry:
670873
br label %loop
671874

@@ -714,10 +917,22 @@ define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) {
714917
; CHECK-BE-NEXT: b.eq .LBB8_1
715918
; CHECK-BE-NEXT: // %bb.2: // %exit
716919
; CHECK-BE-NEXT: ret
717-
718-
719-
720-
920+
;
921+
; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop:
922+
; CHECK-DISABLE: // %bb.0: // %entry
923+
; CHECK-DISABLE-NEXT: mov x8, xzr
924+
; CHECK-DISABLE-NEXT: .LBB8_1: // %loop
925+
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
926+
; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #4
927+
; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
928+
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
929+
; CHECK-DISABLE-NEXT: add x8, x8, #1
930+
; CHECK-DISABLE-NEXT: cmp x8, #1000
931+
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
932+
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
933+
; CHECK-DISABLE-NEXT: b.eq .LBB8_1
934+
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
935+
; CHECK-DISABLE-NEXT: ret
721936
entry:
722937
br label %loop
723938

0 commit comments

Comments
 (0)