Skip to content

Commit 9a78430

Browse files
authored
[AArch64][GlobalISel] Legalize small G_TRUNC (#85625)
This is an alternative to #85610, that moreElement's small G_TRUNC vectors to widen the vectors. It needs to disable one of the existing Unmerge(Trunc(..)) combines, and some of the code is not as optimal as it could be. I believe with some extra optimizations it could look better (I was thinking combining trunc(buildvector) -> buildvector and possibly improving buildvector lowering by generating insert_vector_element earlier).
1 parent 705788c commit 9a78430

File tree

6 files changed

+80
-42
lines changed

6 files changed

+80
-42
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,9 +432,13 @@ class LegalizationArtifactCombiner {
432432
DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
433433
LLT UnmergeTy = CastSrcTy.changeElementCount(
434434
ElementCount::getFixed(UnmergeNumElts));
435+
LLT SrcWideTy =
436+
SrcTy.changeElementCount(ElementCount::getFixed(UnmergeNumElts));
435437

436438
if (isInstUnsupported(
437-
{TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
439+
{TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}) ||
440+
LI.getAction({TargetOpcode::G_TRUNC, {SrcWideTy, UnmergeTy}})
441+
.Action == LegalizeActions::MoreElements)
438442
return false;
439443

440444
Builder.setInstr(MI);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
628628
return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
629629
DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
630630
})
631-
631+
.clampMinNumElements(0, s8, 8)
632+
.clampMinNumElements(0, s16, 4)
633+
.clampMinNumElements(0, s32, 2)
632634
.alwaysLegal();
633635

634636
getActionDefinitionsBuilder(G_SEXT_INREG)

llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -607,9 +607,11 @@ body: |
607607
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
608608
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16)
609609
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s16)
610-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
611-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
612-
; CHECK-NEXT: $s0 = COPY [[TRUNC]](<2 x s16>)
610+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
611+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[DEF]](s32), [[DEF]](s32)
612+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
613+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>)
614+
; CHECK-NEXT: $s0 = COPY [[UV]](<2 x s16>)
613615
; CHECK-NEXT: RET_ReallyLR
614616
%0:_(p0) = COPY $x0
615617
%1(<2 x s16>) = G_LOAD %0(p0) :: (load (<2 x s16>))

llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,17 @@ body: |
540540
; CHECK: liveins: $d0
541541
; CHECK-NEXT: {{ $}}
542542
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
543-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[COPY]](<2 x s32>)
544-
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s8>), [[TRUNC]](<2 x s8>)
545-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS]](<4 x s8>)
543+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
544+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
545+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32)
546+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
547+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
548+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
549+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
550+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
551+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s8>), [[UV3:%[0-9]+]]:_(<2 x s8>), [[UV4:%[0-9]+]]:_(<2 x s8>), [[UV5:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[TRUNC2]](<8 x s8>)
552+
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[UV2]](<2 x s8>), [[UV2]](<2 x s8>)
553+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS1]](<4 x s8>)
546554
; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<4 x s16>)
547555
; CHECK-NEXT: RET_ReallyLR implicit $d0
548556
%0:_(<2 x s32>) = COPY $d0

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44

55
; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
66

7-
; CHECK-GI: warning: Instruction selection used fallback path for bitcast_v4i8_i32
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v4i8
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_i32
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
7+
; CHECK-GI: warning: Instruction selection used fallback path for bitcast_i32_v4i8
8+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
9+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
10+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
1311

1412
define <4 x i16> @foo1(<2 x i32> %a) {
1513
; CHECK-SD-LABEL: foo1:
@@ -54,15 +52,28 @@ define <4 x i16> @foo2(<2 x i32> %a) {
5452
; ===== To and From Scalar Types =====
5553

5654
define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
57-
; CHECK-LABEL: bitcast_v4i8_i32:
58-
; CHECK: // %bb.0:
59-
; CHECK-NEXT: sub sp, sp, #16
60-
; CHECK-NEXT: .cfi_def_cfa_offset 16
61-
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
62-
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
63-
; CHECK-NEXT: fmov w0, s0
64-
; CHECK-NEXT: add sp, sp, #16
65-
; CHECK-NEXT: ret
55+
; CHECK-SD-LABEL: bitcast_v4i8_i32:
56+
; CHECK-SD: // %bb.0:
57+
; CHECK-SD-NEXT: sub sp, sp, #16
58+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
59+
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
60+
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
61+
; CHECK-SD-NEXT: fmov w0, s0
62+
; CHECK-SD-NEXT: add sp, sp, #16
63+
; CHECK-SD-NEXT: ret
64+
;
65+
; CHECK-GI-LABEL: bitcast_v4i8_i32:
66+
; CHECK-GI: // %bb.0:
67+
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
68+
; CHECK-GI-NEXT: mov h1, v0.h[1]
69+
; CHECK-GI-NEXT: mov h2, v0.h[2]
70+
; CHECK-GI-NEXT: mov h3, v0.h[3]
71+
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
72+
; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
73+
; CHECK-GI-NEXT: mov v0.h[3], v3.h[0]
74+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
75+
; CHECK-GI-NEXT: fmov w0, s0
76+
; CHECK-GI-NEXT: ret
6677
%c = add <4 x i8> %a, %b
6778
%d = bitcast <4 x i8> %c to i32
6879
ret i32 %d
@@ -81,18 +92,27 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
8192
}
8293

8394
define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
84-
; CHECK-LABEL: bitcast_v2i16_i32:
85-
; CHECK: // %bb.0:
86-
; CHECK-NEXT: sub sp, sp, #16
87-
; CHECK-NEXT: .cfi_def_cfa_offset 16
88-
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
89-
; CHECK-NEXT: mov w8, v0.s[1]
90-
; CHECK-NEXT: fmov w9, s0
91-
; CHECK-NEXT: strh w9, [sp, #12]
92-
; CHECK-NEXT: strh w8, [sp, #14]
93-
; CHECK-NEXT: ldr w0, [sp, #12]
94-
; CHECK-NEXT: add sp, sp, #16
95-
; CHECK-NEXT: ret
95+
; CHECK-SD-LABEL: bitcast_v2i16_i32:
96+
; CHECK-SD: // %bb.0:
97+
; CHECK-SD-NEXT: sub sp, sp, #16
98+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
99+
; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
100+
; CHECK-SD-NEXT: mov w8, v0.s[1]
101+
; CHECK-SD-NEXT: fmov w9, s0
102+
; CHECK-SD-NEXT: strh w9, [sp, #12]
103+
; CHECK-SD-NEXT: strh w8, [sp, #14]
104+
; CHECK-SD-NEXT: ldr w0, [sp, #12]
105+
; CHECK-SD-NEXT: add sp, sp, #16
106+
; CHECK-SD-NEXT: ret
107+
;
108+
; CHECK-GI-LABEL: bitcast_v2i16_i32:
109+
; CHECK-GI: // %bb.0:
110+
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
111+
; CHECK-GI-NEXT: mov s1, v0.s[1]
112+
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
113+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
114+
; CHECK-GI-NEXT: fmov w0, s0
115+
; CHECK-GI-NEXT: ret
96116
%c = add <2 x i16> %a, %b
97117
%d = bitcast <2 x i16> %c to i32
98118
ret i32 %d

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5521,7 +5521,8 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
55215521
; CHECK-GI-FP16: // %bb.0: // %entry
55225522
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
55235523
; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
5524-
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
5524+
; CHECK-GI-FP16-NEXT: mov v0.s[1], v1.s[0]
5525+
; CHECK-GI-FP16-NEXT: xtn v0.4h, v0.4s
55255526
; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8
55265527
; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8
55275528
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
@@ -5580,12 +5581,13 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
55805581
;
55815582
; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16:
55825583
; CHECK-GI-FP16: // %bb.0: // %entry
5583-
; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff
5584-
; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
5585-
; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
5586-
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
5587-
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
5588-
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
5584+
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
5585+
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
5586+
; CHECK-GI-FP16-NEXT: fmov w9, s0
5587+
; CHECK-GI-FP16-NEXT: and w9, w9, #0xff
5588+
; CHECK-GI-FP16-NEXT: and w8, w8, #0xff
5589+
; CHECK-GI-FP16-NEXT: ucvtf h0, w9
5590+
; CHECK-GI-FP16-NEXT: ucvtf h1, w8
55895591
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
55905592
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
55915593
; CHECK-GI-FP16-NEXT: ret

0 commit comments

Comments
 (0)