Skip to content

Commit 5baf58b

Browse files
authored
[RISCV] Improve use of BSETI/BCLRI in constant materialization. (#91546)
We failed to use BSETI when bit 31 was set and a few bits above bit 31 were set. We also failed to use multiple BSETI when the low 32 bits were zero. I've removed the special cases for constants 0x80000000-0xffffffff and wrote a more generic algorithm for BSETI. I've rewritten the BCLRI handling to be similar to the new BSETI algorithm. This picks up cases where bit 31 is 0 and only a few high bits are 0.
1 parent 3a3aeb8 commit 5baf58b

File tree

3 files changed

+47
-64
lines changed

3 files changed

+47
-64
lines changed

llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp

Lines changed: 33 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -310,56 +310,45 @@ InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) {
310310
}
311311
}
312312

313-
// Perform optimization with BCLRI/BSETI in the Zbs extension.
313+
// Perform optimization with BSETI in the Zbs extension.
314314
if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
315-
// 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
316-
// call generateInstSeqImpl with Val|0x80000000 (which is expected be
317-
// an int32), then emit (BCLRI r, 31).
318-
// 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
319-
// with Val&~0x80000000 (which is expected to be an int32), then
320-
// emit (BSETI r, 31).
321-
int64_t NewVal;
322-
unsigned Opc;
323-
if (Val < 0) {
324-
Opc = RISCV::BCLRI;
325-
NewVal = Val | 0x80000000ll;
326-
} else {
327-
Opc = RISCV::BSETI;
328-
NewVal = Val & ~0x80000000ll;
329-
}
330-
if (isInt<32>(NewVal)) {
331-
RISCVMatInt::InstSeq TmpSeq;
332-
generateInstSeqImpl(NewVal, STI, TmpSeq);
333-
if ((TmpSeq.size() + 1) < Res.size()) {
334-
TmpSeq.emplace_back(Opc, 31);
335-
Res = TmpSeq;
336-
}
315+
// Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to zero.
316+
// Xor that with original value to get which bits should be set by BSETI.
317+
uint64_t Lo = Val & 0x7fffffff;
318+
uint64_t Hi = Val ^ Lo;
319+
assert(Hi != 0);
320+
RISCVMatInt::InstSeq TmpSeq;
321+
322+
if (Lo != 0)
323+
generateInstSeqImpl(Lo, STI, TmpSeq);
324+
325+
if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
326+
do {
327+
TmpSeq.emplace_back(RISCV::BSETI, llvm::countr_zero(Hi));
328+
Hi &= (Hi - 1); // Clear lowest set bit.
329+
} while (Hi != 0);
330+
Res = TmpSeq;
337331
}
332+
}
333+
334+
// Perform optimization with BCLRI in the Zbs extension.
335+
if (Res.size() > 2 && STI.hasFeature(RISCV::FeatureStdExtZbs)) {
336+
// Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to one.
337+
// Xor that with original value to get which bits should be cleared by
338+
// BCLRI.
339+
uint64_t Lo = Val | 0xffffffff80000000;
340+
uint64_t Hi = Val ^ Lo;
341+
assert(Hi != 0);
338342

339-
// Try to use BCLRI for upper 32 bits if the original lower 32 bits are
340-
// negative int32, or use BSETI for upper 32 bits if the original lower
341-
// 32 bits are positive int32.
342-
int32_t Lo = Lo_32(Val);
343-
uint32_t Hi = Hi_32(Val);
344-
Opc = 0;
345343
RISCVMatInt::InstSeq TmpSeq;
346344
generateInstSeqImpl(Lo, STI, TmpSeq);
347-
// Check if it is profitable to use BCLRI/BSETI.
348-
if (Lo > 0 && TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
349-
Opc = RISCV::BSETI;
350-
} else if (Lo < 0 && TmpSeq.size() + llvm::popcount(~Hi) < Res.size()) {
351-
Opc = RISCV::BCLRI;
352-
Hi = ~Hi;
353-
}
354-
// Search for each bit and build corresponding BCLRI/BSETI.
355-
if (Opc > 0) {
356-
while (Hi != 0) {
357-
unsigned Bit = llvm::countr_zero(Hi);
358-
TmpSeq.emplace_back(Opc, Bit + 32);
345+
346+
if (TmpSeq.size() + llvm::popcount(Hi) < Res.size()) {
347+
do {
348+
TmpSeq.emplace_back(RISCV::BCLRI, llvm::countr_zero(Hi));
359349
Hi &= (Hi - 1); // Clear lowest set bit.
360-
}
361-
if (TmpSeq.size() < Res.size())
362-
Res = TmpSeq;
350+
} while (Hi != 0);
351+
Res = TmpSeq;
363352
}
364353
}
365354

llvm/test/CodeGen/RISCV/imm.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4025,9 +4025,8 @@ define i64 @imm64_0x8000080000000() {
40254025
;
40264026
; RV64IZBS-LABEL: imm64_0x8000080000000:
40274027
; RV64IZBS: # %bb.0:
4028-
; RV64IZBS-NEXT: lui a0, 256
4029-
; RV64IZBS-NEXT: addiw a0, a0, 1
4030-
; RV64IZBS-NEXT: slli a0, a0, 31
4028+
; RV64IZBS-NEXT: bseti a0, zero, 31
4029+
; RV64IZBS-NEXT: bseti a0, a0, 51
40314030
; RV64IZBS-NEXT: ret
40324031
;
40334032
; RV64IXTHEADBB-LABEL: imm64_0x8000080000000:
@@ -4083,9 +4082,8 @@ define i64 @imm64_0x10000100000000() {
40834082
;
40844083
; RV64IZBS-LABEL: imm64_0x10000100000000:
40854084
; RV64IZBS: # %bb.0:
4086-
; RV64IZBS-NEXT: lui a0, 256
4087-
; RV64IZBS-NEXT: addi a0, a0, 1
4088-
; RV64IZBS-NEXT: slli a0, a0, 32
4085+
; RV64IZBS-NEXT: bseti a0, zero, 32
4086+
; RV64IZBS-NEXT: bseti a0, a0, 52
40894087
; RV64IZBS-NEXT: ret
40904088
;
40914089
; RV64IXTHEADBB-LABEL: imm64_0x10000100000000:
@@ -4146,10 +4144,9 @@ define i64 @imm64_0xFF7FFFFF7FFFFFFE() {
41464144
;
41474145
; RV64IZBS-LABEL: imm64_0xFF7FFFFF7FFFFFFE:
41484146
; RV64IZBS: # %bb.0:
4149-
; RV64IZBS-NEXT: lui a0, 1044480
4150-
; RV64IZBS-NEXT: addiw a0, a0, -1
4151-
; RV64IZBS-NEXT: slli a0, a0, 31
4152-
; RV64IZBS-NEXT: addi a0, a0, -1
4147+
; RV64IZBS-NEXT: li a0, -1
4148+
; RV64IZBS-NEXT: bclri a0, a0, 31
4149+
; RV64IZBS-NEXT: bclri a0, a0, 55
41534150
; RV64IZBS-NEXT: ret
41544151
;
41554152
; RV64IXTHEADBB-LABEL: imm64_0xFF7FFFFF7FFFFFFE:

llvm/test/CodeGen/RISCV/rv64-legal-i32/imm.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,9 +2648,8 @@ define i64 @imm64_0x8000080000000() {
26482648
;
26492649
; RV64IZBS-LABEL: imm64_0x8000080000000:
26502650
; RV64IZBS: # %bb.0:
2651-
; RV64IZBS-NEXT: lui a0, 256
2652-
; RV64IZBS-NEXT: addiw a0, a0, 1
2653-
; RV64IZBS-NEXT: slli a0, a0, 31
2651+
; RV64IZBS-NEXT: bseti a0, zero, 31
2652+
; RV64IZBS-NEXT: bseti a0, a0, 51
26542653
; RV64IZBS-NEXT: ret
26552654
;
26562655
; RV64IXTHEADBB-LABEL: imm64_0x8000080000000:
@@ -2686,9 +2685,8 @@ define i64 @imm64_0x10000100000000() {
26862685
;
26872686
; RV64IZBS-LABEL: imm64_0x10000100000000:
26882687
; RV64IZBS: # %bb.0:
2689-
; RV64IZBS-NEXT: lui a0, 256
2690-
; RV64IZBS-NEXT: addi a0, a0, 1
2691-
; RV64IZBS-NEXT: slli a0, a0, 32
2688+
; RV64IZBS-NEXT: bseti a0, zero, 32
2689+
; RV64IZBS-NEXT: bseti a0, a0, 52
26922690
; RV64IZBS-NEXT: ret
26932691
;
26942692
; RV64IXTHEADBB-LABEL: imm64_0x10000100000000:
@@ -2727,10 +2725,9 @@ define i64 @imm64_0xFF7FFFFF7FFFFFFE() {
27272725
;
27282726
; RV64IZBS-LABEL: imm64_0xFF7FFFFF7FFFFFFE:
27292727
; RV64IZBS: # %bb.0:
2730-
; RV64IZBS-NEXT: lui a0, 1044480
2731-
; RV64IZBS-NEXT: addiw a0, a0, -1
2732-
; RV64IZBS-NEXT: slli a0, a0, 31
2733-
; RV64IZBS-NEXT: addi a0, a0, -1
2728+
; RV64IZBS-NEXT: li a0, -1
2729+
; RV64IZBS-NEXT: bclri a0, a0, 31
2730+
; RV64IZBS-NEXT: bclri a0, a0, 55
27342731
; RV64IZBS-NEXT: ret
27352732
;
27362733
; RV64IXTHEADBB-LABEL: imm64_0xFF7FFFFF7FFFFFFE:

0 commit comments

Comments
 (0)