Skip to content

Commit 277ca48

Browse files
committed
[AArch64] Additional saddlv store tests. NFC
The select-intrinsic-uaddlv.mir test now lower via G_SADDLV / G_UADDLV so is no longer needed.
1 parent fa9f6b5 commit 277ca48

File tree

3 files changed

+100
-149
lines changed

3 files changed

+100
-149
lines changed

llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir

Lines changed: 0 additions & 109 deletions
This file was deleted.

llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,101 @@ entry:
485485
store <4 x float> %2, ptr %0, align 8
486486
ret void
487487
}
488+
489+
define void @store_saddlv_v8i8(ptr %H, <8 x i8> %sum_h, i32 %idx) {
490+
; CHECK-LABEL: store_saddlv_v8i8:
491+
; CHECK: ; %bb.0: ; %entry
492+
; CHECK-NEXT: saddlv.8b h0, v0
493+
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
494+
; CHECK-NEXT: sbfiz x9, x1, #3, #32
495+
; CHECK-NEXT: smov.h w8, v0[0]
496+
; CHECK-NEXT: str w8, [x0, x9]
497+
; CHECK-NEXT: ret
498+
entry:
499+
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %sum_h)
500+
%idxprom = sext i32 %idx to i64
501+
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
502+
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
503+
ret void
504+
}
505+
506+
define void @store_saddlv_v16i8(ptr %H, <16 x i8> %sum_h, i32 %idx) {
507+
; CHECK-LABEL: store_saddlv_v16i8:
508+
; CHECK: ; %bb.0: ; %entry
509+
; CHECK-NEXT: saddlv.16b h0, v0
510+
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
511+
; CHECK-NEXT: sbfiz x9, x1, #3, #32
512+
; CHECK-NEXT: smov.h w8, v0[0]
513+
; CHECK-NEXT: str w8, [x0, x9]
514+
; CHECK-NEXT: ret
515+
entry:
516+
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %sum_h)
517+
%idxprom = sext i32 %idx to i64
518+
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
519+
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
520+
ret void
521+
}
522+
523+
define void @store_saddlv_v4i16(ptr %H, <4 x i16> %sum_h, i32 %idx) {
524+
; CHECK-LABEL: store_saddlv_v4i16:
525+
; CHECK: ; %bb.0: ; %entry
526+
; CHECK-NEXT: saddlv.4h s0, v0
527+
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
528+
; CHECK-NEXT: sbfiz x8, x1, #3, #32
529+
; CHECK-NEXT: fmov w9, s0
530+
; CHECK-NEXT: str w9, [x0, x8]
531+
; CHECK-NEXT: ret
532+
entry:
533+
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %sum_h)
534+
%idxprom = sext i32 %idx to i64
535+
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
536+
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
537+
ret void
538+
}
539+
540+
define void @store_saddlv_v8i16(ptr %H, <8 x i16> %sum_h, i32 %idx) {
541+
; CHECK-LABEL: store_saddlv_v8i16:
542+
; CHECK: ; %bb.0: ; %entry
543+
; CHECK-NEXT: saddlv.8h s0, v0
544+
; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
545+
; CHECK-NEXT: sbfiz x8, x1, #3, #32
546+
; CHECK-NEXT: fmov w9, s0
547+
; CHECK-NEXT: str w9, [x0, x8]
548+
; CHECK-NEXT: ret
549+
entry:
550+
%vaddlvq_s32.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %sum_h)
551+
%idxprom = sext i32 %idx to i64
552+
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
553+
store i32 %vaddlvq_s32.i, ptr %arrayidx, align 8
554+
ret void
555+
}
556+
557+
define void @store_saddlv_v2i32(ptr %H, <2 x i32> %sum_h, i32 %idx) {
558+
; CHECK-LABEL: store_saddlv_v2i32:
559+
; CHECK: ; %bb.0: ; %entry
560+
; CHECK-NEXT: saddlp.1d v0, v0
561+
; CHECK-NEXT: fmov x8, d0
562+
; CHECK-NEXT: str x8, [x0, w1, sxtw #3]
563+
; CHECK-NEXT: ret
564+
entry:
565+
%vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %sum_h)
566+
%idxprom = sext i32 %idx to i64
567+
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
568+
store i64 %vaddlvq_s32.i, ptr %arrayidx, align 8
569+
ret void
570+
}
571+
572+
define void @store_saddlv_v4i32(ptr %H, <4 x i32> %sum_h, i32 %idx) {
573+
; CHECK-LABEL: store_saddlv_v4i32:
574+
; CHECK: ; %bb.0: ; %entry
575+
; CHECK-NEXT: saddlv.4s d0, v0
576+
; CHECK-NEXT: fmov x8, d0
577+
; CHECK-NEXT: str x8, [x0, w1, sxtw #3]
578+
; CHECK-NEXT: ret
579+
entry:
580+
%vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %sum_h)
581+
%idxprom = sext i32 %idx to i64
582+
%arrayidx = getelementptr inbounds i64, ptr %H, i64 %idxprom
583+
store i64 %vaddlvq_s32.i, ptr %arrayidx, align 8
584+
ret void
585+
}

llvm/test/CodeGen/AArch64/arm64-neon-across.ll

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,45 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -global-isel=1 -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-GI
2+
; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon -global-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
6-
76
declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
8-
97
declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
10-
118
declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
12-
139
declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
14-
1510
declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
16-
1711
declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
18-
1912
declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
20-
2113
declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
22-
2314
declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
24-
2515
declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
26-
2716
declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
28-
2917
declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
30-
3118
declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
32-
3319
declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
34-
3520
declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
36-
3721
declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
38-
3922
declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
40-
4123
declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
42-
4324
declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
44-
4525
declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
46-
4726
declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
48-
4927
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
50-
5128
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
52-
5329
declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
54-
5530
declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
56-
5731
declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
58-
5932
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
60-
6133
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
62-
6334
declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
64-
6535
declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
66-
6736
declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
68-
6937
declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
70-
7138
declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
72-
7339
declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
74-
7540
declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
76-
7741
declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
78-
7942
declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
80-
8143
declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
8244

8345
define i16 @test_vaddlv_s8(<8 x i8> %a) {

0 commit comments

Comments
 (0)