1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2
- ; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
2
+ ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
3
3
4
4
define <16 x i16 > @zext_avgflooru (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
5
5
; CHECK-LABEL: zext_avgflooru:
@@ -17,6 +17,28 @@ define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
17
17
ret <16 x i16 > %avg
18
18
}
19
19
20
+ define void @zext_mload_avgflooru (ptr %p1 , ptr %p2 , <vscale x 8 x i1 > %mask ) {
21
+ ; CHECK-LABEL: zext_mload_avgflooru:
22
+ ; CHECK: // %bb.0:
23
+ ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
24
+ ; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
25
+ ; CHECK-NEXT: and z0.h, z0.h, #0xff
26
+ ; CHECK-NEXT: and z1.h, z1.h, #0xff
27
+ ; CHECK-NEXT: add z0.h, z0.h, z1.h
28
+ ; CHECK-NEXT: lsr z0.h, z0.h, #1
29
+ ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
30
+ ; CHECK-NEXT: ret
31
+ %ld1 = call <vscale x 8 x i8 > @llvm.masked.load (ptr %p1 , i32 16 , <vscale x 8 x i1 > %mask , <vscale x 8 x i8 > zeroinitializer )
32
+ %ld2 = call <vscale x 8 x i8 > @llvm.masked.load (ptr %p2 , i32 16 , <vscale x 8 x i1 > %mask , <vscale x 8 x i8 > zeroinitializer )
33
+ %and = and <vscale x 8 x i8 > %ld1 , %ld2
34
+ %xor = xor <vscale x 8 x i8 > %ld1 , %ld2
35
+ %shift = lshr <vscale x 8 x i8 > %xor , splat(i8 1 )
36
+ %avg = add <vscale x 8 x i8 > %and , %shift
37
+ %avgext = zext <vscale x 8 x i8 > %avg to <vscale x 8 x i16 >
38
+ call void @llvm.masked.store.nxv8i16 (<vscale x 8 x i16 > %avgext , ptr %p1 , i32 16 , <vscale x 8 x i1 > %mask )
39
+ ret void
40
+ }
41
+
20
42
define <16 x i16 > @zext_avgflooru_mismatch (<16 x i8 > %a0 , <16 x i4 > %a1 ) {
21
43
; CHECK-LABEL: zext_avgflooru_mismatch:
22
44
; CHECK: // %bb.0:
@@ -51,6 +73,32 @@ define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
51
73
ret <16 x i16 > %avg
52
74
}
53
75
76
+ define void @zext_mload_avgceilu (ptr %p1 , ptr %p2 , <vscale x 8 x i1 > %mask ) {
77
+ ; CHECK-LABEL: zext_mload_avgceilu:
78
+ ; CHECK: // %bb.0:
79
+ ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
80
+ ; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
81
+ ; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
82
+ ; CHECK-NEXT: and z0.h, z0.h, #0xff
83
+ ; CHECK-NEXT: and z1.h, z1.h, #0xff
84
+ ; CHECK-NEXT: eor z0.d, z0.d, z2.d
85
+ ; CHECK-NEXT: sub z0.h, z1.h, z0.h
86
+ ; CHECK-NEXT: lsr z0.h, z0.h, #1
87
+ ; CHECK-NEXT: st1b { z0.h }, p0, [x0]
88
+ ; CHECK-NEXT: ret
89
+ %ld1 = call <vscale x 8 x i8 > @llvm.masked.load (ptr %p1 , i32 16 , <vscale x 8 x i1 > %mask , <vscale x 8 x i8 > zeroinitializer )
90
+ %ld2 = call <vscale x 8 x i8 > @llvm.masked.load (ptr %p2 , i32 16 , <vscale x 8 x i1 > %mask , <vscale x 8 x i8 > zeroinitializer )
91
+ %zext1 = zext <vscale x 8 x i8 > %ld1 to <vscale x 8 x i16 >
92
+ %zext2 = zext <vscale x 8 x i8 > %ld2 to <vscale x 8 x i16 >
93
+ %add1 = add nuw nsw <vscale x 8 x i16 > %zext1 , splat(i16 1 )
94
+ %add2 = add nuw nsw <vscale x 8 x i16 > %add1 , %zext2
95
+ %shift = lshr <vscale x 8 x i16 > %add2 , splat(i16 1 )
96
+ %trunc = trunc <vscale x 8 x i16 > %shift to <vscale x 8 x i8 >
97
+ call void @llvm.masked.store.nxv8i8 (<vscale x 8 x i8 > %trunc , ptr %p1 , i32 16 , <vscale x 8 x i1 > %mask )
98
+ ret void
99
+ }
100
+
101
+
54
102
define <16 x i16 > @zext_avgceilu_mismatch (<16 x i4 > %a0 , <16 x i8 > %a1 ) {
55
103
; CHECK-LABEL: zext_avgceilu_mismatch:
56
104
; CHECK: // %bb.0:
0 commit comments