Skip to content

Commit 84220ec

Browse files
authored
[LoongArch] Add generation support for preld instruction (#118436)
Instruction `preld` is used to prefetch one cache-line of data from memory in advance into the cache. This commit allows it to be generated automatically.
1 parent 6972788 commit 84220ec

File tree

5 files changed

+101
-0
lines changed

5 files changed

+101
-0
lines changed

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,28 @@ bool LoongArchDAGToDAGISel::selectNonFIBaseAddr(SDValue Addr, SDValue &Base) {
245245
return true;
246246
}
247247

248+
bool LoongArchDAGToDAGISel::SelectAddrRegImm12(SDValue Addr, SDValue &Base,
249+
SDValue &Offset) {
250+
SDLoc DL(Addr);
251+
MVT VT = Addr.getSimpleValueType();
252+
253+
// The address is the result of an ADD. Here we only consider reg+simm12.
254+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
255+
int64_t Imm = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
256+
if (isInt<12>(Imm)) {
257+
Base = Addr.getOperand(0);
258+
Offset = CurDAG->getTargetConstant(SignExtend64<12>(Imm), DL, VT);
259+
return true;
260+
}
261+
}
262+
263+
// Otherwise, we assume Addr as the base address and use constant 0 as the
264+
// offset.
265+
Base = Addr;
266+
Offset = CurDAG->getTargetConstant(0, DL, VT);
267+
return true;
268+
}
269+
248270
bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
249271
SDValue &ShAmt) {
250272
// Shift instructions on LoongArch only read the lower 5 or 6 bits of the

llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
4343
bool SelectBaseAddr(SDValue Addr, SDValue &Base);
4444
bool SelectAddrConstant(SDValue Addr, SDValue &Base, SDValue &Offset);
4545
bool selectNonFIBaseAddr(SDValue Addr, SDValue &Base);
46+
bool SelectAddrRegImm12(SDValue Addr, SDValue &Base, SDValue &Offset);
4647

4748
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
4849
bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
9999
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100100
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101101

102+
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
103+
102104
// Expand bitreverse.i16 with native-width bitrev and shift for now, before
103105
// we get to know which of sll and revb.2h is faster.
104106
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ def HI16ForAddu16idAddiPair: SDNodeXForm<imm, [{
522522
def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
523523
def AddrConstant : ComplexPattern<iPTR, 2, "SelectAddrConstant">;
524524
def NonFIBaseAddr : ComplexPattern<iPTR, 1, "selectNonFIBaseAddr">;
525+
def AddrRegImm : ComplexPattern<iPTR, 2, "SelectAddrRegImm12">;
525526

526527
def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa),
527528
(fma node:$fj, node:$fk, node:$fa), [{
@@ -2011,6 +2012,14 @@ class PseudoMaskedAMMinMax
20112012
def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax;
20122013
def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
20132014

2015+
// Data prefetch
2016+
2017+
// TODO: Supports for preldx instruction.
2018+
def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 0), timm, (i32 1)),
2019+
(PRELD 0, GPR:$rj, simm12:$imm12)>; // data prefetch for loads
2020+
def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 1), timm, (i32 1)),
2021+
(PRELD 8, GPR:$rj, simm12:$imm12)>; // data prefetch for stores
2022+
20142023
/// Compare and exchange
20152024

20162025
class PseudoCmpXchg

llvm/test/CodeGen/LoongArch/preld.ll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
3+
; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
4+
5+
declare void @llvm.prefetch(ptr, i32, i32, i32)
6+
7+
define void @load_prefetch_no_offset(ptr %a) {
8+
; LA32-LABEL: load_prefetch_no_offset:
9+
; LA32: # %bb.0: # %entry
10+
; LA32-NEXT: preld 0, $a0, 0
11+
; LA32-NEXT: ret
12+
;
13+
; LA64-LABEL: load_prefetch_no_offset:
14+
; LA64: # %bb.0: # %entry
15+
; LA64-NEXT: preld 0, $a0, 0
16+
; LA64-NEXT: ret
17+
entry:
18+
call void @llvm.prefetch(ptr %a, i32 0, i32 3, i32 1)
19+
ret void
20+
}
21+
22+
define void @store_prefetch_no_offset(ptr %a) {
23+
; LA32-LABEL: store_prefetch_no_offset:
24+
; LA32: # %bb.0: # %entry
25+
; LA32-NEXT: preld 8, $a0, 0
26+
; LA32-NEXT: ret
27+
;
28+
; LA64-LABEL: store_prefetch_no_offset:
29+
; LA64: # %bb.0: # %entry
30+
; LA64-NEXT: preld 8, $a0, 0
31+
; LA64-NEXT: ret
32+
entry:
33+
call void @llvm.prefetch(ptr %a, i32 1, i32 3, i32 1)
34+
ret void
35+
}
36+
37+
define void @load_prefetch_with_offset(ptr %a) {
38+
; LA32-LABEL: load_prefetch_with_offset:
39+
; LA32: # %bb.0: # %entry
40+
; LA32-NEXT: preld 0, $a0, 200
41+
; LA32-NEXT: ret
42+
;
43+
; LA64-LABEL: load_prefetch_with_offset:
44+
; LA64: # %bb.0: # %entry
45+
; LA64-NEXT: preld 0, $a0, 200
46+
; LA64-NEXT: ret
47+
entry:
48+
%addr = getelementptr i8, ptr %a, i64 200
49+
call void @llvm.prefetch(ptr %addr, i32 0, i32 3, i32 1)
50+
ret void
51+
}
52+
53+
define void @store_prefetch_with_offset(ptr %a) {
54+
; LA32-LABEL: store_prefetch_with_offset:
55+
; LA32: # %bb.0: # %entry
56+
; LA32-NEXT: preld 8, $a0, 200
57+
; LA32-NEXT: ret
58+
;
59+
; LA64-LABEL: store_prefetch_with_offset:
60+
; LA64: # %bb.0: # %entry
61+
; LA64-NEXT: preld 8, $a0, 200
62+
; LA64-NEXT: ret
63+
entry:
64+
%addr = getelementptr i8, ptr %a, i64 200
65+
call void @llvm.prefetch(ptr %addr, i32 1, i32 3, i32 1)
66+
ret void
67+
}

0 commit comments

Comments
 (0)