Skip to content

Commit 5b5614c

Browse files
authored
[AArch64][GlobalISel] Add legalization for vecreduce.fmul (#73309)
There are no native operations that we can use for floating point mul, so lower by splitting the vector into chunks multiple times. There is still a missing fold for fmul_indexed, that could help the gisel test cases a bit.
1 parent 7e186d3 commit 5b5614c

File tree

5 files changed

+364
-84
lines changed

5 files changed

+364
-84
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2831,6 +2831,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28312831
return Legalized;
28322832
}
28332833
case TargetOpcode::G_VECREDUCE_FADD:
2834+
case TargetOpcode::G_VECREDUCE_FMUL:
28342835
case TargetOpcode::G_VECREDUCE_FMIN:
28352836
case TargetOpcode::G_VECREDUCE_FMAX:
28362837
case TargetOpcode::G_VECREDUCE_FMINIMUM:

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
989989
.clampMaxNumElements(1, s16, 8)
990990
.lower();
991991

992+
// For fmul reductions we need to split up into individual operations. We
993+
// clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
994+
// smaller types, followed by scalarizing what remains.
995+
getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
996+
.minScalarOrElt(0, MinFPScalar)
997+
.clampMaxNumElements(1, s64, 2)
998+
.clampMaxNumElements(1, s32, 4)
999+
.clampMaxNumElements(1, s16, 8)
1000+
.clampMaxNumElements(1, s32, 2)
1001+
.clampMaxNumElements(1, s16, 4)
1002+
.scalarize(1)
1003+
.lower();
1004+
9921005
getActionDefinitionsBuilder(G_VECREDUCE_ADD)
9931006
.legalFor({{s8, v16s8},
9941007
{s8, v8s8},
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
3+
4+
---
5+
name: mul_2H
6+
tracksRegLiveness: true
7+
body: |
8+
bb.1:
9+
liveins: $q0, $q1
10+
11+
; CHECK-LABEL: name: mul_2H
12+
; CHECK: liveins: $q0, $q1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
16+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[COPY1]]
17+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FMUL]](<4 x s32>)
18+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s32>) = G_FMUL [[UV]], [[UV1]]
19+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMUL1]](<2 x s32>)
20+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV3]]
21+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FMUL2]](s32)
22+
; CHECK-NEXT: $s0 = COPY [[COPY2]](s32)
23+
; CHECK-NEXT: RET_ReallyLR implicit $s0
24+
%1:_(<4 x s32>) = COPY $q0
25+
%2:_(<4 x s32>) = COPY $q1
26+
%0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>)
27+
%5:_(s32) = nnan ninf nsz arcp contract afn reassoc G_VECREDUCE_FMUL %0(<8 x s32>)
28+
$s0 = COPY %5(s32)
29+
RET_ReallyLR implicit $s0
30+
31+
...

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,8 +736,8 @@
736736
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
737737
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
738738
# DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
739-
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
740-
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
739+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
740+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
741741
# DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
742742
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
743743
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected

0 commit comments

Comments
 (0)