Skip to content

Commit 843a978

Browse files
authored
[GlobalISel] Add support to moreElementsVector for G_SEXT, G_ZEXT and G_ANYEXT (#85038)
1 parent 6cc8d54 commit 843a978

File tree

6 files changed

+763
-164
lines changed

6 files changed

+763
-164
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5411,6 +5411,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
54115411
MI.eraseFromParent();
54125412
return Legalized;
54135413
}
5414+
case TargetOpcode::G_SEXT:
5415+
case TargetOpcode::G_ZEXT:
5416+
case TargetOpcode::G_ANYEXT:
54145417
case TargetOpcode::G_TRUNC:
54155418
case TargetOpcode::G_FPTRUNC:
54165419
case TargetOpcode::G_FPEXT:

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
600600
.legalIf(ExtLegalFunc)
601601
.legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
602602
.clampScalar(0, s64, s64) // Just for s128, others are handled above.
603-
.moreElementsToNextPow2(1)
603+
.moreElementsToNextPow2(0)
604604
.clampMaxNumElements(1, s8, 8)
605605
.clampMaxNumElements(1, s16, 4)
606606
.clampMaxNumElements(1, s32, 2)

llvm/test/CodeGen/AArch64/fcmp.ll

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,61 +1108,54 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
11081108
;
11091109
; CHECK-GI-FP16-LABEL: v7f16_i32:
11101110
; CHECK-GI-FP16: // %bb.0: // %entry
1111-
; CHECK-GI-FP16-NEXT: fcmgt v1.8h, v1.8h, v0.8h
1112-
; CHECK-GI-FP16-NEXT: mov w12, #31 // =0x1f
1113-
; CHECK-GI-FP16-NEXT: ldr s4, [sp]
1114-
; CHECK-GI-FP16-NEXT: fmov s2, w12
1111+
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
1112+
; CHECK-GI-FP16-NEXT: mov w10, #31 // =0x1f
1113+
; CHECK-GI-FP16-NEXT: ldr s3, [sp]
1114+
; CHECK-GI-FP16-NEXT: fmov s1, w10
11151115
; CHECK-GI-FP16-NEXT: fmov s6, w0
1116-
; CHECK-GI-FP16-NEXT: ldr s5, [sp, #8]
1116+
; CHECK-GI-FP16-NEXT: ldr s4, [sp, #8]
11171117
; CHECK-GI-FP16-NEXT: ldr s7, [sp, #24]
11181118
; CHECK-GI-FP16-NEXT: ldr s16, [sp, #32]
1119-
; CHECK-GI-FP16-NEXT: umov w9, v1.h[4]
1120-
; CHECK-GI-FP16-NEXT: umov w8, v1.h[0]
1121-
; CHECK-GI-FP16-NEXT: umov w11, v1.h[5]
1122-
; CHECK-GI-FP16-NEXT: umov w10, v1.h[1]
1123-
; CHECK-GI-FP16-NEXT: mov v2.s[1], w12
1124-
; CHECK-GI-FP16-NEXT: umov w13, v1.h[2]
1119+
; CHECK-GI-FP16-NEXT: umov w8, v0.h[4]
1120+
; CHECK-GI-FP16-NEXT: umov w9, v0.h[5]
1121+
; CHECK-GI-FP16-NEXT: mov v1.s[1], w10
11251122
; CHECK-GI-FP16-NEXT: mov v6.s[1], w1
11261123
; CHECK-GI-FP16-NEXT: mov v7.s[1], v16.s[0]
11271124
; CHECK-GI-FP16-NEXT: ldr s16, [sp, #40]
1128-
; CHECK-GI-FP16-NEXT: fmov s3, w9
1129-
; CHECK-GI-FP16-NEXT: fmov s0, w8
1130-
; CHECK-GI-FP16-NEXT: umov w8, v1.h[6]
1131-
; CHECK-GI-FP16-NEXT: mov v2.s[2], w12
1132-
; CHECK-GI-FP16-NEXT: umov w9, v1.h[3]
1125+
; CHECK-GI-FP16-NEXT: fmov s2, w8
1126+
; CHECK-GI-FP16-NEXT: umov w8, v0.h[6]
1127+
; CHECK-GI-FP16-NEXT: mov v1.s[2], w10
1128+
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
11331129
; CHECK-GI-FP16-NEXT: mov v6.s[2], w2
11341130
; CHECK-GI-FP16-NEXT: mov v7.s[2], v16.s[0]
1135-
; CHECK-GI-FP16-NEXT: mov v3.s[1], w11
1136-
; CHECK-GI-FP16-NEXT: mov v0.s[1], w10
1137-
; CHECK-GI-FP16-NEXT: mov w10, #-1 // =0xffffffff
1138-
; CHECK-GI-FP16-NEXT: fmov s1, w10
1139-
; CHECK-GI-FP16-NEXT: neg v17.4s, v2.4s
1131+
; CHECK-GI-FP16-NEXT: mov v2.s[1], w9
1132+
; CHECK-GI-FP16-NEXT: mov w9, #-1 // =0xffffffff
1133+
; CHECK-GI-FP16-NEXT: fmov s5, w9
1134+
; CHECK-GI-FP16-NEXT: neg v17.4s, v1.4s
1135+
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
11401136
; CHECK-GI-FP16-NEXT: mov v6.s[3], w3
1137+
; CHECK-GI-FP16-NEXT: mov v2.s[2], w8
1138+
; CHECK-GI-FP16-NEXT: fmov w8, s3
1139+
; CHECK-GI-FP16-NEXT: fmov s3, w7
1140+
; CHECK-GI-FP16-NEXT: mov v5.s[1], w9
1141+
; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
1142+
; CHECK-GI-FP16-NEXT: mov v3.s[1], w8
1143+
; CHECK-GI-FP16-NEXT: fmov w8, s4
1144+
; CHECK-GI-FP16-NEXT: ldr s4, [sp, #16]
1145+
; CHECK-GI-FP16-NEXT: ushl v1.4s, v2.4s, v1.4s
1146+
; CHECK-GI-FP16-NEXT: fmov s2, w4
1147+
; CHECK-GI-FP16-NEXT: mov v5.s[2], w9
1148+
; CHECK-GI-FP16-NEXT: mov v2.s[1], w5
11411149
; CHECK-GI-FP16-NEXT: mov v3.s[2], w8
1150+
; CHECK-GI-FP16-NEXT: sshl v1.4s, v1.4s, v17.4s
11421151
; CHECK-GI-FP16-NEXT: fmov w8, s4
1143-
; CHECK-GI-FP16-NEXT: fmov s4, w7
1144-
; CHECK-GI-FP16-NEXT: mov v0.s[2], w13
1145-
; CHECK-GI-FP16-NEXT: mov v1.s[1], w10
1146-
; CHECK-GI-FP16-NEXT: mov v4.s[1], w8
1147-
; CHECK-GI-FP16-NEXT: fmov w8, s5
1148-
; CHECK-GI-FP16-NEXT: ldr s5, [sp, #16]
1149-
; CHECK-GI-FP16-NEXT: ushl v2.4s, v3.4s, v2.4s
1150-
; CHECK-GI-FP16-NEXT: fmov s3, w4
1151-
; CHECK-GI-FP16-NEXT: mov v0.s[3], w9
1152-
; CHECK-GI-FP16-NEXT: mov v1.s[2], w10
1153-
; CHECK-GI-FP16-NEXT: mov v3.s[1], w5
1154-
; CHECK-GI-FP16-NEXT: mov v4.s[2], w8
1155-
; CHECK-GI-FP16-NEXT: sshl v2.4s, v2.4s, v17.4s
1156-
; CHECK-GI-FP16-NEXT: fmov w8, s5
1157-
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
1158-
; CHECK-GI-FP16-NEXT: eor v1.16b, v2.16b, v1.16b
1159-
; CHECK-GI-FP16-NEXT: mov v3.s[2], w6
1160-
; CHECK-GI-FP16-NEXT: mov v4.s[3], w8
1161-
; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
1162-
; CHECK-GI-FP16-NEXT: and v1.16b, v7.16b, v1.16b
1163-
; CHECK-GI-FP16-NEXT: and v2.16b, v3.16b, v2.16b
1164-
; CHECK-GI-FP16-NEXT: bsl v0.16b, v6.16b, v4.16b
1165-
; CHECK-GI-FP16-NEXT: orr v1.16b, v2.16b, v1.16b
1152+
; CHECK-GI-FP16-NEXT: eor v4.16b, v1.16b, v5.16b
1153+
; CHECK-GI-FP16-NEXT: mov v2.s[2], w6
1154+
; CHECK-GI-FP16-NEXT: mov v3.s[3], w8
1155+
; CHECK-GI-FP16-NEXT: and v1.16b, v2.16b, v1.16b
1156+
; CHECK-GI-FP16-NEXT: and v2.16b, v7.16b, v4.16b
1157+
; CHECK-GI-FP16-NEXT: bsl v0.16b, v6.16b, v3.16b
1158+
; CHECK-GI-FP16-NEXT: orr v1.16b, v1.16b, v2.16b
11661159
; CHECK-GI-FP16-NEXT: mov s2, v0.s[1]
11671160
; CHECK-GI-FP16-NEXT: mov s3, v0.s[2]
11681161
; CHECK-GI-FP16-NEXT: mov s4, v0.s[3]

llvm/test/CodeGen/AArch64/sext.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -280,13 +280,12 @@ define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) {
280280
;
281281
; CHECK-GI-LABEL: sext_v3i8_v3i64:
282282
; CHECK-GI: // %bb.0: // %entry
283-
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
284-
; CHECK-GI-NEXT: fmov d0, x0
285-
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
283+
; CHECK-GI-NEXT: fmov s0, w0
286284
; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
287285
; CHECK-GI-NEXT: sxtb x8, w2
288286
; CHECK-GI-NEXT: fmov d2, x8
289-
; CHECK-GI-NEXT: mov v0.d[1], x1
287+
; CHECK-GI-NEXT: mov v0.s[1], w1
288+
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
290289
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
291290
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56
292291
; CHECK-GI-NEXT: mov d1, v0.d[1]
@@ -444,13 +443,12 @@ define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) {
444443
;
445444
; CHECK-GI-LABEL: sext_v3i10_v3i64:
446445
; CHECK-GI: // %bb.0: // %entry
447-
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
448-
; CHECK-GI-NEXT: fmov d0, x0
449-
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
446+
; CHECK-GI-NEXT: fmov s0, w0
450447
; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2
451448
; CHECK-GI-NEXT: sbfx x8, x2, #0, #10
452449
; CHECK-GI-NEXT: fmov d2, x8
453-
; CHECK-GI-NEXT: mov v0.d[1], x1
450+
; CHECK-GI-NEXT: mov v0.s[1], w1
451+
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
454452
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
455453
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
456454
; CHECK-GI-NEXT: mov d1, v0.d[1]

0 commit comments

Comments
 (0)