@@ -32,3 +32,117 @@ entry:
32
32
ret <8 x i8 > %vtbl11.i
33
33
}
34
34
35
+ define <8 x i8 > @tbl1v8i8 (ptr nocapture noundef readonly %in , <8 x i8 > noundef %idx ) {
36
+ ; CHECK-LABEL: tbl1v8i8:
37
+ ; CHECK: // %bb.0: // %entry
38
+ ; CHECK-NEXT: ldr q1, [x0]
39
+ ; CHECK-NEXT: shrn v1.8b, v1.8h, #4
40
+ ; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b
41
+ ; CHECK-NEXT: ret
42
+ entry:
43
+ %0 = load <8 x i16 >, ptr %in , align 2
44
+ %1 = lshr <8 x i16 > %0 , <i16 4 , i16 4 , i16 4 , i16 4 , i16 4 , i16 4 , i16 4 , i16 4 >
45
+ %vshrn_n = trunc <8 x i16 > %1 to <8 x i8 >
46
+ %vtbl1.i = shufflevector <8 x i8 > %vshrn_n , <8 x i8 > zeroinitializer , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
47
+ %vtbl11.i = tail call <8 x i8 > @llvm.aarch64.neon.tbl1.v8i8 (<16 x i8 > %vtbl1.i , <8 x i8 > %idx )
48
+ ret <8 x i8 > %vtbl11.i
49
+ }
50
+
51
+ define <8 x i16 > @addpv4i16 (<4 x i16 > noundef %a , <4 x i16 > noundef %b ) {
52
+ ; CHECK-LABEL: addpv4i16:
53
+ ; CHECK: // %bb.0: // %entry
54
+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
55
+ ; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h
56
+ ; CHECK-NEXT: mov v0.d[1], v2.d[0]
57
+ ; CHECK-NEXT: ret
58
+ entry:
59
+ %vpadd_v2.i = tail call <4 x i16 > @llvm.aarch64.neon.addp.v4i16 (<4 x i16 > %a , <4 x i16 > %b )
60
+ %shuffle.i = shufflevector <4 x i16 > %vpadd_v2.i , <4 x i16 > zeroinitializer , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
61
+ ret <8 x i16 > %shuffle.i
62
+ }
63
+
64
+ define <8 x i16 > @addv4i16 (<4 x i16 > noundef %a , <4 x i16 > noundef %b ) {
65
+ ; CHECK-LABEL: addv4i16:
66
+ ; CHECK: // %bb.0: // %entry
67
+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
68
+ ; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
69
+ ; CHECK-NEXT: mov v0.d[1], v2.d[0]
70
+ ; CHECK-NEXT: ret
71
+ entry:
72
+ %add.i = add <4 x i16 > %b , %a
73
+ %shuffle.i = shufflevector <4 x i16 > %add.i , <4 x i16 > zeroinitializer , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
74
+ ret <8 x i16 > %shuffle.i
75
+ }
76
+
77
+ define <16 x i8 > @rshrn (<8 x i16 > noundef %a , <4 x i16 > noundef %b ) {
78
+ ; CHECK-LABEL: rshrn:
79
+ ; CHECK: // %bb.0: // %entry
80
+ ; CHECK-NEXT: rshrn v0.8b, v0.8h, #3
81
+ ; CHECK-NEXT: ret
82
+ entry:
83
+ %vrshrn_n1 = tail call <8 x i8 > @llvm.aarch64.neon.rshrn.v8i8 (<8 x i16 > %a , i32 3 )
84
+ %shuffle.i = shufflevector <8 x i8 > %vrshrn_n1 , <8 x i8 > zeroinitializer , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
85
+ ret <16 x i8 > %shuffle.i
86
+ }
87
+
88
+ define <16 x i8 > @tbl1 (<16 x i8 > %a , <8 x i8 > %b ) {
89
+ ; CHECK-LABEL: tbl1:
90
+ ; CHECK: // %bb.0: // %entry
91
+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
92
+ ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
93
+ ; CHECK-NEXT: mov v0.d[1], v2.d[0]
94
+ ; CHECK-NEXT: ret
95
+ entry:
96
+ %vtbl11 = tail call <8 x i8 > @llvm.aarch64.neon.tbl1.v8i8 (<16 x i8 > %a , <8 x i8 > %b )
97
+ %shuffle.i = shufflevector <8 x i8 > %vtbl11 , <8 x i8 > zeroinitializer , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
98
+ ret <16 x i8 > %shuffle.i
99
+ }
100
+
101
+ define <2 x double > @fadd (double noundef %x , double noundef %y ) {
102
+ ; CHECK-LABEL: fadd:
103
+ ; CHECK: // %bb.0: // %entry
104
+ ; CHECK-NEXT: movi v2.2d, #0000000000000000
105
+ ; CHECK-NEXT: fadd d0, d0, d1
106
+ ; CHECK-NEXT: mov v2.d[0], v0.d[0]
107
+ ; CHECK-NEXT: mov v0.16b, v2.16b
108
+ ; CHECK-NEXT: ret
109
+ entry:
110
+ %add = fadd double %x , %y
111
+ %vecinit1 = insertelement <2 x double > poison, double %add , i64 0
112
+ %vecinit2 = insertelement <2 x double > %vecinit1 , double 0 .0 , i64 1
113
+ ret <2 x double > %vecinit2
114
+ }
115
+
116
+ define <16 x i8 > @bsl (<4 x i16 > noundef %a , <4 x i16 > noundef %c , <4 x i16 > noundef %d , <4 x i16 > noundef %b ) {
117
+ ; CHECK-LABEL: bsl:
118
+ ; CHECK: // %bb.0: // %entry
119
+ ; CHECK-NEXT: movi v3.2d, #0000000000000000
120
+ ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
121
+ ; CHECK-NEXT: mov v0.d[1], v3.d[0]
122
+ ; CHECK-NEXT: ret
123
+ entry:
124
+ %vbsl3.i = and <4 x i16 > %c , %a
125
+ %0 = xor <4 x i16 > %a , <i16 -1 , i16 -1 , i16 -1 , i16 -1 >
126
+ %vbsl4.i = and <4 x i16 > %0 , %d
127
+ %vbsl5.i = or <4 x i16 > %vbsl4.i , %vbsl3.i
128
+ %1 = bitcast <4 x i16 > %vbsl5.i to <8 x i8 >
129
+ %shuffle.i = shufflevector <8 x i8 > %1 , <8 x i8 > zeroinitializer , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
130
+ ret <16 x i8 > %shuffle.i
131
+ }
132
+
133
+ define <16 x i8 > @load (ptr %a , <8 x i8 > %b ) {
134
+ ; CHECK-LABEL: load:
135
+ ; CHECK: // %bb.0: // %entry
136
+ ; CHECK-NEXT: movi v1.2d, #0000000000000000
137
+ ; CHECK-NEXT: ldr d0, [x0]
138
+ ; CHECK-NEXT: mov v0.d[1], v1.d[0]
139
+ ; CHECK-NEXT: ret
140
+ entry:
141
+ %vtbl11 = load <8 x i8 >, ptr %a
142
+ %shuffle.i = shufflevector <8 x i8 > %vtbl11 , <8 x i8 > zeroinitializer , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
143
+ ret <16 x i8 > %shuffle.i
144
+ }
145
+
146
+
147
+ declare <8 x i8 > @llvm.aarch64.neon.rshrn.v8i8 (<8 x i16 >, i32 )
148
+ declare <4 x i16 > @llvm.aarch64.neon.addp.v4i16 (<4 x i16 >, <4 x i16 >)
0 commit comments