Skip to content

Commit 86a03c3

Browse files
committed
fixup! put remainder in offset
1 parent d25e275 commit 86a03c3

File tree

3 files changed

+73
-61
lines changed

3 files changed

+73
-61
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,45 +1744,60 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
17441744

17451745
void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) {
17461746
// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
1747-
// If the vector select parameter is an immediate in the range 0-15 then we
1748-
// can emit it directly into the instruction as it's a legal operand.
1749-
// Otherwise we must emit 0 as the vector select operand and modify the base
1750-
// register instead.
1747+
// If the vector number is an immediate between 0 and 15 inclusive then we can
1748+
// put that directly into the immediate field of the instruction. If it's
1749+
// outside of that range then we modify the base and slice by the greatest
1750+
// multiple of 15 smaller than that number and put the remainder in the
1751+
// instruction field. If it's not an immediate then we modify the base and
1752+
// slice registers by that number and put 0 in the instruction.
17511753
SDLoc DL(N);
17521754

1753-
SDValue VecNum = N->getOperand(4), Base = N->getOperand(3),
1754-
TileSlice = N->getOperand(2);
1755-
int Imm = -1;
1756-
if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum))
1757-
Imm = ImmNode->getZExtValue();
1755+
SDValue TileSlice = N->getOperand(2);
1756+
SDValue Base = N->getOperand(3);
1757+
SDValue VecNum = N->getOperand(4);
1758+
SDValue Remainder = CurDAG->getTargetConstant(0, DL, MVT::i32);
1759+
1760+
// true if the base and slice registers need to me modified
1761+
bool NeedsAdd = true;
1762+
if (auto ImmNode = dyn_cast<ConstantSDNode>(VecNum)) {
1763+
int Imm = ImmNode->getSExtValue();
1764+
if (Imm >= 0 && Imm <= 15) {
1765+
Remainder = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
1766+
NeedsAdd = false;
1767+
} else {
1768+
Remainder = CurDAG->getTargetConstant(Imm % 15, DL, MVT::i32);
1769+
NeedsAdd = true;
1770+
VecNum =
1771+
SDValue(CurDAG->getMachineNode(AArch64::MOVi32imm, DL, MVT::i32,
1772+
CurDAG->getTargetConstant(
1773+
Imm - (Imm % 15), DL, MVT::i32)),
1774+
0);
1775+
}
1776+
}
17581777

1759-
if (Imm >= 0 && Imm <= 15) {
1760-
// 0-15 is a legal immediate so just pass it directly as a TargetConstant
1761-
VecNum = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
1762-
} else {
1778+
if (NeedsAdd) {
17631779
// Get the vector length that will be multiplied by vnum
17641780
auto SVL = SDValue(
17651781
CurDAG->getMachineNode(AArch64::RDSVLI_XI, DL, MVT::i64,
17661782
CurDAG->getTargetConstant(1, DL, MVT::i32)),
17671783
0);
17681784

1769-
// Multiply SVL and vnum then add it to the base register
1770-
if (VecNum.getValueType() == MVT::i32)
1771-
VecNum = Widen(CurDAG, VecNum);
1772-
SDValue AddOps[] = {SVL, VecNum, Base};
1773-
auto Add = SDValue(
1774-
CurDAG->getMachineNode(AArch64::MADDXrrr, DL, MVT::i64, AddOps), 0);
1775-
1776-
// The base register has been modified to take vnum into account so just
1777-
// pass 0
1778-
VecNum = CurDAG->getTargetConstant(0, DL, MVT::i32);
1779-
Base = Add;
1785+
// Multiply SVL and vnum then add it to the base
1786+
// Just add vnum to the tileslice
1787+
SDValue BaseAddOps[] = {
1788+
SVL, VecNum.getValueType() == MVT::i32 ? Widen(CurDAG, VecNum) : VecNum,
1789+
Base};
1790+
SDValue SliceAddOps[] = {TileSlice, VecNum};
1791+
Base = SDValue(
1792+
CurDAG->getMachineNode(AArch64::MADDXrrr, DL, MVT::i64, BaseAddOps), 0);
1793+
TileSlice = SDValue(
1794+
CurDAG->getMachineNode(AArch64::ADDWrr, DL, MVT::i32, SliceAddOps), 0);
17801795
}
17811796

1782-
SmallVector<SDValue, 6> Ops = {TileSlice, VecNum, Base};
1797+
SmallVector<SDValue, 6> Ops = {TileSlice, Remainder, Base};
17831798
if (!IsLoad) {
17841799
Ops.insert(Ops.begin(), CurDAG->getRegister(AArch64::ZA, MVT::Other));
1785-
Ops.push_back(VecNum);
1800+
Ops.push_back(Remainder);
17861801
}
17871802
auto LdrStr =
17881803
CurDAG->getMachineNode(IsLoad ? AArch64::LDR_ZA_PSEUDO : AArch64::STR_ZA,

llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -296,38 +296,39 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
296296
ret void;
297297
}
298298

299-
define void @ldr_with_off_var(ptr %base, i32 %off) {
299+
define void @ldr_with_off_var(i32 %slice, ptr %base, i32 %off) {
300300
; CHECK-LABEL: ldr_with_off_var:
301301
; CHECK: // %bb.0:
302302
; CHECK-NEXT: rdsvl x8, #1
303-
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
304-
; CHECK-NEXT: mov w12, #16 // =0x10
305-
; CHECK-NEXT: madd x8, x8, x1, x0
303+
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
304+
; CHECK-NEXT: add w12, w0, w2
305+
; CHECK-NEXT: madd x8, x8, x2, x1
306306
; CHECK-NEXT: ldr za[w12, 0], [x8]
307307
; CHECK-NEXT: ret
308-
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 %off)
308+
call void @llvm.aarch64.sme.ldr(i32 %slice, ptr %base, i32 %off)
309309
ret void;
310310
}
311311

312-
define void @ldr_with_off_15imm(ptr %base) {
312+
define void @ldr_with_off_15imm(i32 %slice, ptr %base) {
313313
; CHECK-LABEL: ldr_with_off_15imm:
314314
; CHECK: // %bb.0:
315-
; CHECK-NEXT: mov w12, #16 // =0x10
316-
; CHECK-NEXT: ldr za[w12, 15], [x0, #15, mul vl]
315+
; CHECK-NEXT: mov w12, w0
316+
; CHECK-NEXT: ldr za[w12, 15], [x1, #15, mul vl]
317317
; CHECK-NEXT: ret
318-
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 15)
318+
call void @llvm.aarch64.sme.ldr(i32 %slice, ptr %base, i32 15)
319319
ret void;
320320
}
321321

322-
define void @ldr_with_off_16imm(ptr %base) {
322+
define void @ldr_with_off_16imm(i32 %slice, ptr %base) {
323323
; CHECK-LABEL: ldr_with_off_16imm:
324324
; CHECK: // %bb.0:
325-
; CHECK-NEXT: rdsvl x8, #1
326-
; CHECK-NEXT: mov w12, #16 // =0x10
327-
; CHECK-NEXT: madd x8, x8, x12, x0
328-
; CHECK-NEXT: ldr za[w12, 0], [x8]
325+
; CHECK-NEXT: mov w8, #15 // =0xf
326+
; CHECK-NEXT: rdsvl x9, #1
327+
; CHECK-NEXT: madd x9, x9, x8, x1
328+
; CHECK-NEXT: add w12, w0, w8
329+
; CHECK-NEXT: ldr za[w12, 1], [x9, #1, mul vl]
329330
; CHECK-NEXT: ret
330-
call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 16)
331+
call void @llvm.aarch64.sme.ldr(i32 %slice, ptr %base, i32 16)
331332
ret void;
332333
}
333334

llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -296,43 +296,39 @@ define void @str_with_off_16mulvl(ptr %ptr) {
296296
ret void;
297297
}
298298

299-
define void @str_with_off_var(ptr %base, i32 %off) {
299+
define void @str_with_off_var(i32 %slice, ptr %base, i32 %off) {
300300
; CHECK-LABEL: str_with_off_var:
301301
; CHECK: // %bb.0:
302302
; CHECK-NEXT: rdsvl x8, #1
303-
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
304-
; CHECK-NEXT: mov w12, #16 // =0x10
305-
; CHECK-NEXT: madd x8, x8, x1, x0
303+
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
304+
; CHECK-NEXT: add w12, w0, w2
305+
; CHECK-NEXT: madd x8, x8, x2, x1
306306
; CHECK-NEXT: str za[w12, 0], [x8]
307307
; CHECK-NEXT: ret
308-
call void @llvm.aarch64.sme.str(i32 16, ptr %base, i32 %off)
308+
call void @llvm.aarch64.sme.str(i32 %slice, ptr %base, i32 %off)
309309
ret void;
310310
}
311311

312-
define void @str_with_off_15imm(ptr %ptr) {
312+
define void @str_with_off_15imm(i32 %slice, ptr %base) {
313313
; CHECK-LABEL: str_with_off_15imm:
314314
; CHECK: // %bb.0:
315-
; CHECK-NEXT: mov w12, #15 // =0xf
316-
; CHECK-NEXT: add x8, x0, #15
317-
; CHECK-NEXT: str za[w12, 15], [x8, #15, mul vl]
315+
; CHECK-NEXT: mov w12, w0
316+
; CHECK-NEXT: str za[w12, 15], [x1, #15, mul vl]
318317
; CHECK-NEXT: ret
319-
%base = getelementptr i8, ptr %ptr, i64 15
320-
call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 15)
318+
call void @llvm.aarch64.sme.str(i32 %slice, ptr %base, i32 15)
321319
ret void;
322320
}
323321

324-
define void @str_with_off_16imm(ptr %ptr) {
322+
define void @str_with_off_16imm(i32 %slice, ptr %base) {
325323
; CHECK-LABEL: str_with_off_16imm:
326324
; CHECK: // %bb.0:
327-
; CHECK-NEXT: rdsvl x8, #1
328-
; CHECK-NEXT: mov w9, #16 // =0x10
329-
; CHECK-NEXT: add x10, x0, #15
330-
; CHECK-NEXT: madd x8, x8, x9, x10
331-
; CHECK-NEXT: mov w12, #15 // =0xf
332-
; CHECK-NEXT: str za[w12, 0], [x8]
325+
; CHECK-NEXT: mov w8, #15 // =0xf
326+
; CHECK-NEXT: rdsvl x9, #1
327+
; CHECK-NEXT: madd x9, x9, x8, x1
328+
; CHECK-NEXT: add w12, w0, w8
329+
; CHECK-NEXT: str za[w12, 1], [x9, #1, mul vl]
333330
; CHECK-NEXT: ret
334-
%base = getelementptr i8, ptr %ptr, i64 15
335-
call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 16)
331+
call void @llvm.aarch64.sme.str(i32 %slice, ptr %base, i32 16)
336332
ret void;
337333
}
338334

0 commit comments

Comments
 (0)