|
8 | 8 | ; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_udot_v48i8
|
9 | 9 | ; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_sdot_v24i8
|
10 | 10 | ; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for test_sdot_v48i8
|
11 |
| -; CHECK-GI-BASE-NEXT: warning: Instruction selection used fallback path for full |
12 |
| - |
13 |
| -; CHECK-GI-DOT: warning: Instruction selection used fallback path for full |
14 | 11 |
|
15 | 12 | define i32 @addv_v2i32(<2 x i32> %a) {
|
16 | 13 | ; CHECK-LABEL: addv_v2i32:
|
@@ -5183,115 +5180,104 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
|
5183 | 5180 | ; CHECK-SD-DOT-NEXT: fmov w0, s0
|
5184 | 5181 | ; CHECK-SD-DOT-NEXT: ret
|
5185 | 5182 | ;
|
5186 |
| -; CHECK-GI-BASE-LABEL: full: |
5187 |
| -; CHECK-GI-BASE: // %bb.0: // %entry |
5188 |
| -; CHECK-GI-BASE-NEXT: ldr d0, [x2] |
5189 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x0] |
5190 |
| -; CHECK-GI-BASE-NEXT: // kill: def $w3 killed $w3 def $x3 |
5191 |
| -; CHECK-GI-BASE-NEXT: // kill: def $w1 killed $w1 def $x1 |
5192 |
| -; CHECK-GI-BASE-NEXT: sxtw x8, w3 |
5193 |
| -; CHECK-GI-BASE-NEXT: sxtw x9, w1 |
5194 |
| -; CHECK-GI-BASE-NEXT: uabdl v0.8h, v1.8b, v0.8b |
5195 |
| -; CHECK-GI-BASE-NEXT: add x11, x2, x8 |
5196 |
| -; CHECK-GI-BASE-NEXT: add x10, x0, x9 |
5197 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11] |
5198 |
| -; CHECK-GI-BASE-NEXT: add x11, x11, x8 |
5199 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10] |
5200 |
| -; CHECK-GI-BASE-NEXT: add x10, x10, x9 |
5201 |
| -; CHECK-GI-BASE-NEXT: uaddlp v0.4s, v0.8h |
5202 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5203 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11] |
5204 |
| -; CHECK-GI-BASE-NEXT: add x11, x11, x8 |
5205 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5206 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10] |
5207 |
| -; CHECK-GI-BASE-NEXT: add x10, x10, x9 |
5208 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5209 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11] |
5210 |
| -; CHECK-GI-BASE-NEXT: add x11, x11, x8 |
5211 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5212 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10] |
5213 |
| -; CHECK-GI-BASE-NEXT: add x10, x10, x9 |
5214 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5215 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11] |
5216 |
| -; CHECK-GI-BASE-NEXT: add x11, x11, x8 |
5217 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5218 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10] |
5219 |
| -; CHECK-GI-BASE-NEXT: add x10, x10, x9 |
5220 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5221 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11] |
5222 |
| -; CHECK-GI-BASE-NEXT: add x11, x11, x8 |
5223 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5224 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10] |
5225 |
| -; CHECK-GI-BASE-NEXT: add x10, x10, x9 |
5226 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5227 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11] |
5228 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5229 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10] |
5230 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5231 |
| -; CHECK-GI-BASE-NEXT: ldr d2, [x11, x8] |
5232 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5233 |
| -; CHECK-GI-BASE-NEXT: ldr d1, [x10, x9] |
5234 |
| -; CHECK-GI-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b |
5235 |
| -; CHECK-GI-BASE-NEXT: uadalp v0.4s, v1.8h |
5236 |
| -; CHECK-GI-BASE-NEXT: addv s0, v0.4s |
5237 |
| -; CHECK-GI-BASE-NEXT: fmov w0, s0 |
5238 |
| -; CHECK-GI-BASE-NEXT: ret |
5239 |
| -; |
5240 |
| -; CHECK-GI-DOT-LABEL: full: |
5241 |
| -; CHECK-GI-DOT: // %bb.0: // %entry |
5242 |
| -; CHECK-GI-DOT-NEXT: ldr d0, [x0] |
5243 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x2] |
5244 |
| -; CHECK-GI-DOT-NEXT: // kill: def $w3 killed $w3 def $x3 |
5245 |
| -; CHECK-GI-DOT-NEXT: // kill: def $w1 killed $w1 def $x1 |
5246 |
| -; CHECK-GI-DOT-NEXT: sxtw x8, w3 |
5247 |
| -; CHECK-GI-DOT-NEXT: sxtw x9, w1 |
5248 |
| -; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000 |
5249 |
| -; CHECK-GI-DOT-NEXT: movi v3.8b, #1 |
5250 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b |
5251 |
| -; CHECK-GI-DOT-NEXT: add x11, x2, x8 |
5252 |
| -; CHECK-GI-DOT-NEXT: add x10, x0, x9 |
5253 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11] |
5254 |
| -; CHECK-GI-DOT-NEXT: add x11, x11, x8 |
5255 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10] |
5256 |
| -; CHECK-GI-DOT-NEXT: add x10, x10, x9 |
5257 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5258 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5259 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10] |
5260 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11] |
5261 |
| -; CHECK-GI-DOT-NEXT: add x10, x10, x9 |
5262 |
| -; CHECK-GI-DOT-NEXT: add x11, x11, x8 |
5263 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5264 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5265 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10] |
5266 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11] |
5267 |
| -; CHECK-GI-DOT-NEXT: add x10, x10, x9 |
5268 |
| -; CHECK-GI-DOT-NEXT: add x11, x11, x8 |
5269 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5270 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5271 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10] |
5272 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11] |
5273 |
| -; CHECK-GI-DOT-NEXT: add x10, x10, x9 |
5274 |
| -; CHECK-GI-DOT-NEXT: add x11, x11, x8 |
5275 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5276 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5277 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10] |
5278 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11] |
5279 |
| -; CHECK-GI-DOT-NEXT: add x10, x10, x9 |
5280 |
| -; CHECK-GI-DOT-NEXT: add x11, x11, x8 |
5281 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5282 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5283 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10] |
5284 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11] |
5285 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5286 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5287 |
| -; CHECK-GI-DOT-NEXT: ldr d1, [x10, x9] |
5288 |
| -; CHECK-GI-DOT-NEXT: ldr d4, [x11, x8] |
5289 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5290 |
| -; CHECK-GI-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b |
5291 |
| -; CHECK-GI-DOT-NEXT: udot v2.2s, v0.8b, v3.8b |
5292 |
| -; CHECK-GI-DOT-NEXT: addp v0.2s, v2.2s, v2.2s |
5293 |
| -; CHECK-GI-DOT-NEXT: fmov w0, s0 |
5294 |
| -; CHECK-GI-DOT-NEXT: ret |
| 5183 | +; CHECK-GI-LABEL: full: |
| 5184 | +; CHECK-GI: // %bb.0: // %entry |
| 5185 | +; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 |
| 5186 | +; CHECK-GI-NEXT: // kill: def $w3 killed $w3 def $x3 |
| 5187 | +; CHECK-GI-NEXT: sxtw x8, w1 |
| 5188 | +; CHECK-GI-NEXT: sxtw x9, w3 |
| 5189 | +; CHECK-GI-NEXT: ldr d0, [x0] |
| 5190 | +; CHECK-GI-NEXT: ldr d1, [x2] |
| 5191 | +; CHECK-GI-NEXT: add x10, x0, x8 |
| 5192 | +; CHECK-GI-NEXT: add x11, x2, x9 |
| 5193 | +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 |
| 5194 | +; CHECK-GI-NEXT: ldr d2, [x10] |
| 5195 | +; CHECK-GI-NEXT: ldr d3, [x11] |
| 5196 | +; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 |
| 5197 | +; CHECK-GI-NEXT: add x10, x10, x8 |
| 5198 | +; CHECK-GI-NEXT: add x11, x11, x9 |
| 5199 | +; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 |
| 5200 | +; CHECK-GI-NEXT: ushll v3.8h, v3.8b, #0 |
| 5201 | +; CHECK-GI-NEXT: ldr d4, [x10] |
| 5202 | +; CHECK-GI-NEXT: ldr d5, [x11] |
| 5203 | +; CHECK-GI-NEXT: add x10, x10, x8 |
| 5204 | +; CHECK-GI-NEXT: add x11, x11, x9 |
| 5205 | +; CHECK-GI-NEXT: uabdl v6.4s, v0.4h, v1.4h |
| 5206 | +; CHECK-GI-NEXT: uabdl2 v0.4s, v0.8h, v1.8h |
| 5207 | +; CHECK-GI-NEXT: ldr d1, [x10] |
| 5208 | +; CHECK-GI-NEXT: ushll v4.8h, v4.8b, #0 |
| 5209 | +; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0 |
| 5210 | +; CHECK-GI-NEXT: ldr d7, [x11] |
| 5211 | +; CHECK-GI-NEXT: uabdl v16.4s, v2.4h, v3.4h |
| 5212 | +; CHECK-GI-NEXT: uabdl2 v2.4s, v2.8h, v3.8h |
| 5213 | +; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0 |
| 5214 | +; CHECK-GI-NEXT: ushll v7.8h, v7.8b, #0 |
| 5215 | +; CHECK-GI-NEXT: add x10, x10, x8 |
| 5216 | +; CHECK-GI-NEXT: add x11, x11, x9 |
| 5217 | +; CHECK-GI-NEXT: uabdl v1.4s, v4.4h, v5.4h |
| 5218 | +; CHECK-GI-NEXT: uabdl2 v4.4s, v4.8h, v5.8h |
| 5219 | +; CHECK-GI-NEXT: ldr d5, [x10] |
| 5220 | +; CHECK-GI-NEXT: add v2.4s, v16.4s, v2.4s |
| 5221 | +; CHECK-GI-NEXT: ldr d16, [x11] |
| 5222 | +; CHECK-GI-NEXT: add v0.4s, v6.4s, v0.4s |
| 5223 | +; CHECK-GI-NEXT: uabdl v6.4s, v3.4h, v7.4h |
| 5224 | +; CHECK-GI-NEXT: uabdl2 v3.4s, v3.8h, v7.8h |
| 5225 | +; CHECK-GI-NEXT: ushll v5.8h, v5.8b, #0 |
| 5226 | +; CHECK-GI-NEXT: add x10, x10, x8 |
| 5227 | +; CHECK-GI-NEXT: ushll v7.8h, v16.8b, #0 |
| 5228 | +; CHECK-GI-NEXT: add x11, x11, x9 |
| 5229 | +; CHECK-GI-NEXT: ldr d16, [x10] |
| 5230 | +; CHECK-GI-NEXT: ldr d17, [x11] |
| 5231 | +; CHECK-GI-NEXT: add v1.4s, v1.4s, v4.4s |
| 5232 | +; CHECK-GI-NEXT: add x10, x10, x8 |
| 5233 | +; CHECK-GI-NEXT: add x11, x11, x9 |
| 5234 | +; CHECK-GI-NEXT: add v3.4s, v6.4s, v3.4s |
| 5235 | +; CHECK-GI-NEXT: ushll v16.8h, v16.8b, #0 |
| 5236 | +; CHECK-GI-NEXT: ushll v17.8h, v17.8b, #0 |
| 5237 | +; CHECK-GI-NEXT: uabdl v22.4s, v5.4h, v7.4h |
| 5238 | +; CHECK-GI-NEXT: uabdl2 v5.4s, v5.8h, v7.8h |
| 5239 | +; CHECK-GI-NEXT: ldr d18, [x10] |
| 5240 | +; CHECK-GI-NEXT: ldr d19, [x11] |
| 5241 | +; CHECK-GI-NEXT: addv s0, v0.4s |
| 5242 | +; CHECK-GI-NEXT: addv s2, v2.4s |
| 5243 | +; CHECK-GI-NEXT: addv s1, v1.4s |
| 5244 | +; CHECK-GI-NEXT: ushll v18.8h, v18.8b, #0 |
| 5245 | +; CHECK-GI-NEXT: ushll v19.8h, v19.8b, #0 |
| 5246 | +; CHECK-GI-NEXT: uabdl v4.4s, v16.4h, v17.4h |
| 5247 | +; CHECK-GI-NEXT: uabdl2 v16.4s, v16.8h, v17.8h |
| 5248 | +; CHECK-GI-NEXT: add v5.4s, v22.4s, v5.4s |
| 5249 | +; CHECK-GI-NEXT: ldr d20, [x10, x8] |
| 5250 | +; CHECK-GI-NEXT: ldr d21, [x11, x9] |
| 5251 | +; CHECK-GI-NEXT: addv s3, v3.4s |
| 5252 | +; CHECK-GI-NEXT: fmov w8, s2 |
| 5253 | +; CHECK-GI-NEXT: fmov w9, s0 |
| 5254 | +; CHECK-GI-NEXT: ushll v7.8h, v20.8b, #0 |
| 5255 | +; CHECK-GI-NEXT: ushll v20.8h, v21.8b, #0 |
| 5256 | +; CHECK-GI-NEXT: uabdl v6.4s, v18.4h, v19.4h |
| 5257 | +; CHECK-GI-NEXT: uabdl2 v17.4s, v18.8h, v19.8h |
| 5258 | +; CHECK-GI-NEXT: add v4.4s, v4.4s, v16.4s |
| 5259 | +; CHECK-GI-NEXT: addv s5, v5.4s |
| 5260 | +; CHECK-GI-NEXT: fmov w10, s1 |
| 5261 | +; CHECK-GI-NEXT: add w8, w8, w9 |
| 5262 | +; CHECK-GI-NEXT: fmov w9, s3 |
| 5263 | +; CHECK-GI-NEXT: uabdl v18.4s, v7.4h, v20.4h |
| 5264 | +; CHECK-GI-NEXT: uabdl2 v7.4s, v7.8h, v20.8h |
| 5265 | +; CHECK-GI-NEXT: add v6.4s, v6.4s, v17.4s |
| 5266 | +; CHECK-GI-NEXT: add w8, w10, w8 |
| 5267 | +; CHECK-GI-NEXT: addv s0, v4.4s |
| 5268 | +; CHECK-GI-NEXT: add w8, w9, w8 |
| 5269 | +; CHECK-GI-NEXT: fmov w9, s5 |
| 5270 | +; CHECK-GI-NEXT: add v7.4s, v18.4s, v7.4s |
| 5271 | +; CHECK-GI-NEXT: addv s1, v6.4s |
| 5272 | +; CHECK-GI-NEXT: add w8, w9, w8 |
| 5273 | +; CHECK-GI-NEXT: fmov w9, s0 |
| 5274 | +; CHECK-GI-NEXT: addv s2, v7.4s |
| 5275 | +; CHECK-GI-NEXT: add w8, w9, w8 |
| 5276 | +; CHECK-GI-NEXT: fmov w9, s1 |
| 5277 | +; CHECK-GI-NEXT: add w8, w9, w8 |
| 5278 | +; CHECK-GI-NEXT: fmov w9, s2 |
| 5279 | +; CHECK-GI-NEXT: add w0, w9, w8 |
| 5280 | +; CHECK-GI-NEXT: ret |
5295 | 5281 | entry:
|
5296 | 5282 | %idx.ext8 = sext i32 %s2 to i64
|
5297 | 5283 | %idx.ext = sext i32 %s1 to i64
|
|
0 commit comments