Skip to content

Commit 7e91c59

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Legalize 32-bit + narrow G_SMULO + G_UMULO
SDAG lowers 32-bit and 64-bit G_SMULO + G_UMULO. We were missing the 32-bit case. For other sizes, make the 0th type a power of 2 and clamp it to either 32 bits or 64 bits. Right now, this will allow us to handle narrow types (e.g. s4, s24, etc.). The LegalizerHelper doesn't support narrowing G_SMULO or G_UMULO right now. I think we want clamping behaviour either way, so we might as well include it now to be explicit. Differential Revision: https://reviews.llvm.org/D108240
1 parent 16caf63 commit 7e91c59

File tree

4 files changed

+108
-5
lines changed

4 files changed

+108
-5
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
169169
getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
170170
.lowerFor({s1, s8, s16, s32, s64});
171171

172-
getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
172+
getActionDefinitionsBuilder({G_SMULO, G_UMULO})
173+
.widenScalarToNextPow2(0, /*Min = */ 32)
174+
.clampScalar(0, s32, s64)
175+
.lowerIf(typeIs(1, s1));
173176

174177
getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
175178

llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,19 @@ entry:
136136
ret void
137137
}
138138

139+
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %4:_(s128), %5:_(s1) = G_UMULO %0:_, %6:_ (in function: umul_s128)
140+
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for umul_s128
141+
; FALLBACK-WITH-REPORT-OUT-LABEL: umul_s128
142+
declare {i128, i1} @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
143+
define zeroext i1 @umul_s128(i128 %v1, i128* %res) {
144+
entry:
145+
%t = call {i128, i1} @llvm.umul.with.overflow.i128(i128 %v1, i128 2)
146+
%val = extractvalue {i128, i1} %t, 0
147+
%obit = extractvalue {i128, i1} %t, 1
148+
store i128 %val, i128* %res
149+
ret i1 %obit
150+
}
151+
139152
attributes #1 = { "target-features"="+sve" }
140153
attributes #2 = { "target-features"="+ls64" }
141154

llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,93 @@ body: |
6565
%4:_(s32) = G_ANYEXT %3(s1)
6666
$w0 = COPY %4(s32)
6767
68+
...
69+
---
70+
name: test_smul_overflow_s32
71+
body: |
72+
bb.0:
73+
; CHECK-LABEL: name: test_smul_overflow_s32
74+
; CHECK: %lhs:_(s32) = COPY $w0
75+
; CHECK: %rhs:_(s32) = COPY $w1
76+
; CHECK: [[SMULH:%[0-9]+]]:_(s32) = G_SMULH %lhs, %rhs
77+
; CHECK: %mul:_(s32) = G_MUL %lhs, %rhs
78+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
79+
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR %mul, [[C]](s64)
80+
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s32), [[ASHR]]
81+
; CHECK: $w0 = COPY %mul(s32)
82+
; CHECK: %ext_overflow:_(s32) = COPY [[ICMP]](s32)
83+
; CHECK: $w0 = COPY %ext_overflow(s32)
84+
; CHECK: RET_ReallyLR implicit $w0
85+
%lhs:_(s32) = COPY $w0
86+
%rhs:_(s32) = COPY $w1
87+
%mul:_(s32), %overflow:_(s1) = G_SMULO %lhs, %rhs
88+
$w0 = COPY %mul(s32)
89+
%ext_overflow:_(s32) = G_ANYEXT %overflow(s1)
90+
$w0 = COPY %ext_overflow(s32)
91+
RET_ReallyLR implicit $w0
92+
93+
...
94+
---
95+
name: test_umul_overflow_s32
96+
body: |
97+
bb.0:
98+
; CHECK-LABEL: name: test_umul_overflow_s32
99+
; CHECK: %lhs:_(s32) = COPY $w0
100+
; CHECK: %rhs:_(s32) = COPY $w1
101+
; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH %lhs, %rhs
102+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
103+
; CHECK: %mul:_(s32) = G_MUL %lhs, %rhs
104+
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C]]
105+
; CHECK: $w0 = COPY %mul(s32)
106+
; CHECK: %ext_overflow:_(s32) = COPY [[ICMP]](s32)
107+
; CHECK: $w0 = COPY %ext_overflow(s32)
108+
; CHECK: RET_ReallyLR implicit $w0
109+
%lhs:_(s32) = COPY $w0
110+
%rhs:_(s32) = COPY $w1
111+
%mul:_(s32), %overflow:_(s1) = G_UMULO %lhs, %rhs
112+
$w0 = COPY %mul(s32)
113+
%ext_overflow:_(s32) = G_ANYEXT %overflow(s1)
114+
$w0 = COPY %ext_overflow(s32)
115+
RET_ReallyLR implicit $w0
116+
117+
...
118+
---
119+
name: test_umul_overflow_s24
120+
body: |
121+
bb.0:
122+
; CHECK-LABEL: name: test_umul_overflow_s24
123+
; CHECK: %lhs_wide:_(s32) = COPY $w0
124+
; CHECK: %rhs_wide:_(s32) = COPY $w1
125+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
126+
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %lhs_wide(s32)
127+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
128+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %rhs_wide(s32)
129+
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
130+
; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[AND1]]
131+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
132+
; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]]
133+
; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s32), [[C1]]
134+
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C]]
135+
; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[MUL]](s32), [[AND2]]
136+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
137+
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
138+
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]]
139+
; CHECK: %ext_mul:_(s32) = COPY [[MUL]](s32)
140+
; CHECK: $w0 = COPY %ext_mul(s32)
141+
; CHECK: %ext_overflow:_(s32) = COPY [[OR]](s32)
142+
; CHECK: $w0 = COPY %ext_overflow(s32)
143+
; CHECK: RET_ReallyLR implicit $w0
144+
%lhs_wide:_(s32) = COPY $w0
145+
%rhs_wide:_(s32) = COPY $w1
146+
%lhs:_(s24) = G_TRUNC %lhs_wide
147+
%rhs:_(s24) = G_TRUNC %rhs_wide
148+
%mul:_(s24), %overflow:_(s1) = G_UMULO %lhs, %rhs
149+
%ext_mul:_(s32) = G_ANYEXT %mul
150+
$w0 = COPY %ext_mul(s32)
151+
%ext_overflow:_(s32) = G_ANYEXT %overflow(s1)
152+
$w0 = COPY %ext_overflow(s32)
153+
RET_ReallyLR implicit $w0
154+
68155
...
69156
---
70157
name: vector_mul_scalarize

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,11 +341,11 @@
341341
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
342342
# DEBUG-NEXT: G_UMULO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
343343
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
344-
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
345-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
344+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
345+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
346346
# DEBUG-NEXT: G_SMULO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
347-
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
348-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
347+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
348+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
349349
# DEBUG-NEXT: G_UMULH (opcode {{[0-9]+}}): 1 type index, 0 imm indices
350350
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
351351
# DEBUG-NEXT: .. the first uncovered type index: 1, OK

0 commit comments

Comments
 (0)