Skip to content

Commit cf3d136

Browse files
authored
[AArch64] Do not generate ld1IndexPost when inserting into lane 0 of a zero vector (#145723)
If we are inserting into lane 0 of a zero vector, we can use the ldr instructions to get the upper-lane zero for free. Do not attempt to make post-inc operations in that case, which should be less micro-ops overall.
1 parent e980523 commit cf3d136

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23389,6 +23389,8 @@ static SDValue performPostLD1Combine(SDNode *N,
2338923389
auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
2339023390
if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
2339123391
return SDValue();
23392+
if (LaneC->getZExtValue() == 0 && isNullOrNullSplat(N->getOperand(0)))
23393+
return SDValue();
2339223394
}
2339323395

2339423396
LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13340,9 +13340,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
1334013340
define <16 x i8> @test_v16i8_post_reg_ld1lane_zero(ptr %bar, ptr %ptr, i64 %inc) {
1334113341
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane_zero:
1334213342
; CHECK-SD: ; %bb.0:
13343-
; CHECK-SD-NEXT: movi.2d v0, #0000000000000000
13344-
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0], x2
13345-
; CHECK-SD-NEXT: str x0, [x1]
13343+
; CHECK-SD-NEXT: ldr b0, [x0]
13344+
; CHECK-SD-NEXT: add x8, x0, x2
13345+
; CHECK-SD-NEXT: str x8, [x1]
1334613346
; CHECK-SD-NEXT: ret
1334713347
;
1334813348
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane_zero:
@@ -14133,17 +14133,15 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
1413314133
define void @chained_insert_zero(ptr noundef %fenc, ptr noundef %pred, ptr noundef %residual, i32 noundef %stride) {
1413414134
; CHECK-SD-LABEL: chained_insert_zero:
1413514135
; CHECK-SD: ; %bb.0: ; %entry
14136-
; CHECK-SD-NEXT: movi.2d v0, #0000000000000000
14137-
; CHECK-SD-NEXT: movi.2d v1, #0000000000000000
14136+
; CHECK-SD-NEXT: ldr s0, [x1]
14137+
; CHECK-SD-NEXT: ldr s1, [x0]
1413814138
; CHECK-SD-NEXT: ; kill: def $w3 killed $w3 def $x3
1413914139
; CHECK-SD-NEXT: sxtw x8, w3
14140-
; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0], x8
14141-
; CHECK-SD-NEXT: ld1.s { v1 }[0], [x1], x8
14142-
; CHECK-SD-NEXT: sbfiz x8, x3, #1, #32
14143-
; CHECK-SD-NEXT: usubl.8h v0, v0, v1
14140+
; CHECK-SD-NEXT: usubl.8h v0, v1, v0
1414414141
; CHECK-SD-NEXT: str d0, [x2]
14145-
; CHECK-SD-NEXT: ldr s0, [x0]
14146-
; CHECK-SD-NEXT: ldr s1, [x1]
14142+
; CHECK-SD-NEXT: ldr s0, [x0, x8]
14143+
; CHECK-SD-NEXT: ldr s1, [x1, x8]
14144+
; CHECK-SD-NEXT: sbfiz x8, x3, #1, #32
1414714145
; CHECK-SD-NEXT: usubl.8h v0, v0, v1
1414814146
; CHECK-SD-NEXT: str d0, [x2, x8]
1414914147
; CHECK-SD-NEXT: ret

0 commit comments

Comments
 (0)