Skip to content

Commit 3c7c727

Browse files
committed
Add missing folds
1 parent 594347d commit 3c7c727

File tree

2 files changed

+31
-11
lines changed

2 files changed

+31
-11
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,8 +2016,12 @@ let Predicates = [HasSVE_or_SME] in {
20162016
defm : SVEVecStoreLane0Pat<am_unscaled16, truncstorei16, nxv8i16, i32, f16, hsub, simm9, STURHi>;
20172017

20182018
// Lane 0 stores
2019-
defm : SVEVecStoreLane0Pat<am_indexed32, store, nxv4i32, i32, f32, ssub, uimm12s4, STRSui>;
2020-
defm : SVEVecStoreLane0Pat<am_indexed64, store, nxv2i64, i64, f64, dsub, uimm12s4, STRDui>;
2019+
// i32
2020+
defm : SVEVecStoreLane0Pat<am_indexed32, store, nxv4i32, i32, f32, ssub, uimm12s4, STRSui>;
2021+
defm : SVEVecStoreLane0Pat<am_unscaled32, store, nxv4i32, i32, f32, ssub, simm9, STURSi>;
2022+
// i64
2023+
defm : SVEVecStoreLane0Pat<am_indexed64, store, nxv2i64, i64, f64, dsub, uimm12s4, STRDui>;
2024+
defm : SVEVecStoreLane0Pat<am_unscaled64, store, nxv2i64, i64, f64, dsub, simm9, STURDi>;
20212025
}
20222026

20232027
// Insert subvectors into FP SVE vectors.

llvm/test/CodeGen/AArch64/aarch64-sve-ldst-one.ll

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,24 @@ entry:
4444
ret void
4545
}
4646

47+
define void @test_str_lane0_s32_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
48+
; CHECK-LABEL: test_str_lane0_s32_negative_offset:
49+
; CHECK: // %bb.0: // %entry
50+
; CHECK-NEXT: stur s0, [x0, #-32]
51+
; CHECK-NEXT: ret
52+
;
53+
; STREAMING-COMPAT-LABEL: test_str_lane0_s32_negative_offset:
54+
; STREAMING-COMPAT: // %bb.0: // %entry
55+
; STREAMING-COMPAT-NEXT: stur s0, [x0, #-32]
56+
; STREAMING-COMPAT-NEXT: ret
57+
58+
entry:
59+
%0 = extractelement <vscale x 4 x i32> %b, i32 0
60+
%out_ptr = getelementptr inbounds i32, ptr %a, i64 -8
61+
store i32 %0, ptr %out_ptr, align 4
62+
ret void
63+
}
64+
4765
define void @test_str_lane_s64(ptr %a, <vscale x 2 x i64> %b) {
4866
; CHECK-LABEL: test_str_lane_s64:
4967
; CHECK: // %bb.0: // %entry
@@ -281,7 +299,7 @@ define void @test_str_reduction_i32_to_i32_negative_offset(ptr %ptr, <vscale x 4
281299

282300
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
283301
%trunc = trunc i64 %reduce to i32
284-
%out_ptr = getelementptr inbounds float, ptr %ptr, i64 -8
302+
%out_ptr = getelementptr inbounds i32, ptr %ptr, i64 -8
285303
store i32 %trunc, ptr %out_ptr, align 4
286304
ret void
287305
}
@@ -290,19 +308,17 @@ define void @test_str_reduction_i32_to_i64_negative_offset(ptr %ptr, <vscale x 4
290308
; CHECK-LABEL: test_str_reduction_i32_to_i64_negative_offset:
291309
; CHECK: // %bb.0:
292310
; CHECK-NEXT: uaddv d0, p0, z0.s
293-
; CHECK-NEXT: fmov x8, d0
294-
; CHECK-NEXT: stur x8, [x0, #-32]
311+
; CHECK-NEXT: stur d0, [x0, #-64]
295312
; CHECK-NEXT: ret
296313
;
297314
; STREAMING-COMPAT-LABEL: test_str_reduction_i32_to_i64_negative_offset:
298315
; STREAMING-COMPAT: // %bb.0:
299316
; STREAMING-COMPAT-NEXT: uaddv d0, p0, z0.s
300-
; STREAMING-COMPAT-NEXT: fmov x8, d0
301-
; STREAMING-COMPAT-NEXT: stur x8, [x0, #-32]
317+
; STREAMING-COMPAT-NEXT: stur d0, [x0, #-64]
302318
; STREAMING-COMPAT-NEXT: ret
303319

304320
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
305-
%out_ptr = getelementptr inbounds float, ptr %ptr, i64 -8
321+
%out_ptr = getelementptr inbounds i64, ptr %ptr, i64 -8
306322
store i64 %reduce, ptr %out_ptr, align 8
307323
ret void
308324
}
@@ -311,18 +327,18 @@ define void @test_str_reduction_i32_to_i16_negative_offset(ptr %ptr, <vscale x 4
311327
; CHECK-LABEL: test_str_reduction_i32_to_i16_negative_offset:
312328
; CHECK: // %bb.0:
313329
; CHECK-NEXT: uaddv d0, p0, z0.s
314-
; CHECK-NEXT: stur h0, [x0, #-32]
330+
; CHECK-NEXT: stur h0, [x0, #-16]
315331
; CHECK-NEXT: ret
316332
;
317333
; STREAMING-COMPAT-LABEL: test_str_reduction_i32_to_i16_negative_offset:
318334
; STREAMING-COMPAT: // %bb.0:
319335
; STREAMING-COMPAT-NEXT: uaddv d0, p0, z0.s
320-
; STREAMING-COMPAT-NEXT: stur h0, [x0, #-32]
336+
; STREAMING-COMPAT-NEXT: stur h0, [x0, #-16]
321337
; STREAMING-COMPAT-NEXT: ret
322338

323339
%reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> %p0, <vscale x 4 x i32> %v)
324340
%trunc = trunc i64 %reduce to i16
325-
%out_ptr = getelementptr inbounds float, ptr %ptr, i64 -8
341+
%out_ptr = getelementptr inbounds i16, ptr %ptr, i64 -8
326342
store i16 %trunc, ptr %out_ptr, align 2
327343
ret void
328344
}

0 commit comments

Comments
 (0)