Skip to content

Commit 9aa5de9

Browse files
committed
[LoongArch] Break MUL into SLLI and SUB or ADD
Further, after MUL is decomposed, use ALSL instead of SLLI and ADD Differential Revision: https://reviews.llvm.org/D140282
1 parent 4a2807d commit 9aa5de9

File tree

4 files changed

+131
-30
lines changed

4 files changed

+131
-30
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/IR/IntrinsicsLoongArch.h"
2727
#include "llvm/Support/Debug.h"
2828
#include "llvm/Support/KnownBits.h"
29+
#include "llvm/Support/MathExtras.h"
2930

3031
using namespace llvm;
3132

@@ -2858,3 +2859,24 @@ LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
28582859
StringRef(RegName) + "\"."));
28592860
return Reg;
28602861
}
2862+
2863+
bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
2864+
EVT VT, SDValue C) const {
2865+
// TODO: Support vectors.
2866+
if (!VT.isScalarInteger())
2867+
return false;
2868+
2869+
// Omit the optimization if the data size exceeds GRLen.
2870+
if (VT.getSizeInBits() > Subtarget.getGRLen())
2871+
return false;
2872+
2873+
// Break MUL into (SLLI + ADD/SUB) or ALSL.
2874+
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
2875+
const APInt &Imm = ConstNode->getAPIntValue();
2876+
if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
2877+
(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
2878+
return true;
2879+
}
2880+
2881+
return false;
2882+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ class LoongArchTargetLowering : public TargetLowering {
169169
Register getRegisterByName(const char *RegName, LLT VT,
170170
const MachineFunction &MF) const override;
171171

172+
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
173+
SDValue C) const override;
174+
172175
private:
173176
/// Target-specific function used to lower LoongArch calling conventions.
174177
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,

llvm/test/CodeGen/LoongArch/alsl.ll

Lines changed: 101 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -128,19 +128,16 @@ entry:
128128
ret i64 %zext
129129
}
130130

131-
;; Check that alsl.w or alsl.d is not emitted.
132131
define i8 @mul_add_i8(i8 signext %a, i8 signext %b) nounwind {
133132
; LA32-LABEL: mul_add_i8:
134133
; LA32: # %bb.0: # %entry
135-
; LA32-NEXT: ori $a2, $zero, 3
136-
; LA32-NEXT: mul.w $a0, $a0, $a2
134+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 1
137135
; LA32-NEXT: add.w $a0, $a1, $a0
138136
; LA32-NEXT: ret
139137
;
140138
; LA64-LABEL: mul_add_i8:
141139
; LA64: # %bb.0: # %entry
142-
; LA64-NEXT: ori $a2, $zero, 3
143-
; LA64-NEXT: mul.d $a0, $a0, $a2
140+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 1
144141
; LA64-NEXT: add.d $a0, $a1, $a0
145142
; LA64-NEXT: ret
146143
entry:
@@ -192,21 +189,23 @@ entry:
192189
define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind {
193190
; LA32-LABEL: mul_add_i64:
194191
; LA32: # %bb.0: # %entry
192+
; LA32-NEXT: slli.w $a4, $a1, 4
193+
; LA32-NEXT: sub.w $a1, $a4, $a1
195194
; LA32-NEXT: ori $a4, $zero, 15
196-
; LA32-NEXT: mul.w $a1, $a1, $a4
197-
; LA32-NEXT: mulh.wu $a5, $a0, $a4
198-
; LA32-NEXT: add.w $a1, $a5, $a1
195+
; LA32-NEXT: mulh.wu $a4, $a0, $a4
196+
; LA32-NEXT: add.w $a1, $a4, $a1
199197
; LA32-NEXT: add.w $a1, $a3, $a1
200-
; LA32-NEXT: mul.w $a0, $a0, $a4
198+
; LA32-NEXT: slli.w $a3, $a0, 4
199+
; LA32-NEXT: sub.w $a0, $a3, $a0
201200
; LA32-NEXT: add.w $a0, $a2, $a0
202201
; LA32-NEXT: sltu $a2, $a0, $a2
203202
; LA32-NEXT: add.w $a1, $a1, $a2
204203
; LA32-NEXT: ret
205204
;
206205
; LA64-LABEL: mul_add_i64:
207206
; LA64: # %bb.0: # %entry
208-
; LA64-NEXT: ori $a2, $zero, 15
209-
; LA64-NEXT: mul.d $a0, $a0, $a2
207+
; LA64-NEXT: slli.d $a2, $a0, 4
208+
; LA64-NEXT: sub.d $a0, $a2, $a0
210209
; LA64-NEXT: add.d $a0, $a1, $a0
211210
; LA64-NEXT: ret
212211
entry:
@@ -218,16 +217,14 @@ entry:
218217
define i32 @mul_add_zext_i8(i8 signext %a, i8 signext %b) nounwind {
219218
; LA32-LABEL: mul_add_zext_i8:
220219
; LA32: # %bb.0: # %entry
221-
; LA32-NEXT: ori $a2, $zero, 5
222-
; LA32-NEXT: mul.w $a0, $a0, $a2
220+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 2
223221
; LA32-NEXT: add.w $a0, $a1, $a0
224222
; LA32-NEXT: andi $a0, $a0, 255
225223
; LA32-NEXT: ret
226224
;
227225
; LA64-LABEL: mul_add_zext_i8:
228226
; LA64: # %bb.0: # %entry
229-
; LA64-NEXT: ori $a2, $zero, 5
230-
; LA64-NEXT: mul.d $a0, $a0, $a2
227+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 2
231228
; LA64-NEXT: add.d $a0, $a1, $a0
232229
; LA64-NEXT: andi $a0, $a0, 255
233230
; LA64-NEXT: ret
@@ -241,16 +238,16 @@ entry:
241238
define i32 @mul_add_zext_i16(i16 signext %a, i16 signext %b) nounwind {
242239
; LA32-LABEL: mul_add_zext_i16:
243240
; LA32: # %bb.0: # %entry
244-
; LA32-NEXT: ori $a2, $zero, 15
245-
; LA32-NEXT: mul.w $a0, $a0, $a2
241+
; LA32-NEXT: slli.w $a2, $a0, 4
242+
; LA32-NEXT: sub.w $a0, $a2, $a0
246243
; LA32-NEXT: add.w $a0, $a1, $a0
247244
; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
248245
; LA32-NEXT: ret
249246
;
250247
; LA64-LABEL: mul_add_zext_i16:
251248
; LA64: # %bb.0: # %entry
252-
; LA64-NEXT: ori $a2, $zero, 15
253-
; LA64-NEXT: mul.d $a0, $a0, $a2
249+
; LA64-NEXT: slli.d $a2, $a0, 4
250+
; LA64-NEXT: sub.d $a0, $a2, $a0
254251
; LA64-NEXT: add.d $a0, $a1, $a0
255252
; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
256253
; LA64-NEXT: ret
@@ -261,20 +258,17 @@ entry:
261258
ret i32 %zext
262259
}
263260

264-
;; Check that alsl.wu is not emitted.
265261
define i64 @mul_add_zext_i32(i32 signext %a, i32 signext %b) nounwind {
266262
; LA32-LABEL: mul_add_zext_i32:
267263
; LA32: # %bb.0: # %entry
268-
; LA32-NEXT: ori $a2, $zero, 5
269-
; LA32-NEXT: mul.w $a0, $a0, $a2
264+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 2
270265
; LA32-NEXT: add.w $a0, $a1, $a0
271266
; LA32-NEXT: move $a1, $zero
272267
; LA32-NEXT: ret
273268
;
274269
; LA64-LABEL: mul_add_zext_i32:
275270
; LA64: # %bb.0: # %entry
276-
; LA64-NEXT: ori $a2, $zero, 5
277-
; LA64-NEXT: mul.d $a0, $a0, $a2
271+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 2
278272
; LA64-NEXT: add.d $a0, $a1, $a0
279273
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
280274
; LA64-NEXT: ret
@@ -284,3 +278,86 @@ entry:
284278
%zext = zext i32 %add to i64
285279
ret i64 %zext
286280
}
281+
282+
define i8 @alsl_neg_i8(i8 signext %a, i8 signext %b) nounwind {
283+
; LA32-LABEL: alsl_neg_i8:
284+
; LA32: # %bb.0: # %entry
285+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 1
286+
; LA32-NEXT: sub.w $a0, $a1, $a0
287+
; LA32-NEXT: ret
288+
;
289+
; LA64-LABEL: alsl_neg_i8:
290+
; LA64: # %bb.0: # %entry
291+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 1
292+
; LA64-NEXT: sub.d $a0, $a1, $a0
293+
; LA64-NEXT: ret
294+
entry:
295+
%mul = mul nsw i8 %a, -3
296+
%add = add nsw i8 %b, %mul
297+
ret i8 %add
298+
}
299+
300+
define i16 @alsl_neg_i16(i16 signext %a, i16 signext %b) nounwind {
301+
; LA32-LABEL: alsl_neg_i16:
302+
; LA32: # %bb.0: # %entry
303+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 2
304+
; LA32-NEXT: sub.w $a0, $a1, $a0
305+
; LA32-NEXT: ret
306+
;
307+
; LA64-LABEL: alsl_neg_i16:
308+
; LA64: # %bb.0: # %entry
309+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 2
310+
; LA64-NEXT: sub.d $a0, $a1, $a0
311+
; LA64-NEXT: ret
312+
entry:
313+
%mul = mul nsw i16 %a, -5
314+
%add = add nsw i16 %b, %mul
315+
ret i16 %add
316+
}
317+
318+
define i32 @alsl_neg_i32(i32 signext %a, i32 signext %b) nounwind {
319+
; LA32-LABEL: alsl_neg_i32:
320+
; LA32: # %bb.0: # %entry
321+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 3
322+
; LA32-NEXT: sub.w $a0, $a1, $a0
323+
; LA32-NEXT: ret
324+
;
325+
; LA64-LABEL: alsl_neg_i32:
326+
; LA64: # %bb.0: # %entry
327+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 3
328+
; LA64-NEXT: sub.d $a0, $a1, $a0
329+
; LA64-NEXT: ret
330+
entry:
331+
%mul = mul nsw i32 %a, -9
332+
%add = add nsw i32 %b, %mul
333+
ret i32 %add
334+
}
335+
336+
define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind {
337+
; LA32-LABEL: mul_add_neg_i64:
338+
; LA32: # %bb.0: # %entry
339+
; LA32-NEXT: slli.w $a4, $a1, 4
340+
; LA32-NEXT: sub.w $a1, $a1, $a4
341+
; LA32-NEXT: addi.w $a4, $zero, -15
342+
; LA32-NEXT: mulh.wu $a4, $a0, $a4
343+
; LA32-NEXT: sub.w $a4, $a4, $a0
344+
; LA32-NEXT: add.w $a1, $a4, $a1
345+
; LA32-NEXT: add.w $a1, $a3, $a1
346+
; LA32-NEXT: slli.w $a3, $a0, 4
347+
; LA32-NEXT: sub.w $a0, $a0, $a3
348+
; LA32-NEXT: add.w $a0, $a2, $a0
349+
; LA32-NEXT: sltu $a2, $a0, $a2
350+
; LA32-NEXT: add.w $a1, $a1, $a2
351+
; LA32-NEXT: ret
352+
;
353+
; LA64-LABEL: mul_add_neg_i64:
354+
; LA64: # %bb.0: # %entry
355+
; LA64-NEXT: slli.d $a2, $a0, 4
356+
; LA64-NEXT: sub.d $a0, $a0, $a2
357+
; LA64-NEXT: add.d $a0, $a1, $a0
358+
; LA64-NEXT: ret
359+
entry:
360+
%mul = mul nsw i64 %a, -15
361+
%add = add nsw i64 %b, %mul
362+
ret i64 %add
363+
}

llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,16 +105,15 @@ define i64 @mul_p5(i64 %a) {
105105
; LA32-LABEL: mul_p5:
106106
; LA32: # %bb.0:
107107
; LA32-NEXT: ori $a2, $zero, 5
108-
; LA32-NEXT: mul.w $a1, $a1, $a2
109-
; LA32-NEXT: mulh.wu $a3, $a0, $a2
110-
; LA32-NEXT: add.w $a1, $a3, $a1
111-
; LA32-NEXT: mul.w $a0, $a0, $a2
108+
; LA32-NEXT: mulh.wu $a2, $a0, $a2
109+
; LA32-NEXT: alsl.w $a1, $a1, $a1, 2
110+
; LA32-NEXT: add.w $a1, $a2, $a1
111+
; LA32-NEXT: alsl.w $a0, $a0, $a0, 2
112112
; LA32-NEXT: ret
113113
;
114114
; LA64-LABEL: mul_p5:
115115
; LA64: # %bb.0:
116-
; LA64-NEXT: ori $a1, $zero, 5
117-
; LA64-NEXT: mul.d $a0, $a0, $a1
116+
; LA64-NEXT: alsl.d $a0, $a0, $a0, 2
118117
; LA64-NEXT: ret
119118
%1 = mul i64 %a, 5
120119
ret i64 %1

0 commit comments

Comments
 (0)