Skip to content

Commit 8982454

Browse files
committed
[AArch64] Add rev codegen tests for bfloat16. NFC
1 parent 1dc8578 commit 8982454

File tree

2 files changed

+151
-58
lines changed

2 files changed

+151
-58
lines changed

llvm/test/CodeGen/AArch64/arm64-rev.ll

Lines changed: 130 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ define <8 x i8> @test_vrev64D8(ptr %A) nounwind {
213213
; CHECK-NEXT: ldr d0, [x0]
214214
; CHECK-NEXT: rev64.8b v0, v0
215215
; CHECK-NEXT: ret
216-
%tmp1 = load <8 x i8>, ptr %A
217-
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
218-
ret <8 x i8> %tmp2
216+
%tmp1 = load <8 x i8>, ptr %A
217+
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
218+
ret <8 x i8> %tmp2
219219
}
220220

221221
define <4 x i16> @test_vrev64D16(ptr %A) nounwind {
@@ -224,9 +224,9 @@ define <4 x i16> @test_vrev64D16(ptr %A) nounwind {
224224
; CHECK-NEXT: ldr d0, [x0]
225225
; CHECK-NEXT: rev64.4h v0, v0
226226
; CHECK-NEXT: ret
227-
%tmp1 = load <4 x i16>, ptr %A
228-
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
229-
ret <4 x i16> %tmp2
227+
%tmp1 = load <4 x i16>, ptr %A
228+
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
229+
ret <4 x i16> %tmp2
230230
}
231231

232232
define <2 x i32> @test_vrev64D32(ptr %A) nounwind {
@@ -235,9 +235,9 @@ define <2 x i32> @test_vrev64D32(ptr %A) nounwind {
235235
; CHECK-NEXT: ldr d0, [x0]
236236
; CHECK-NEXT: rev64.2s v0, v0
237237
; CHECK-NEXT: ret
238-
%tmp1 = load <2 x i32>, ptr %A
239-
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
240-
ret <2 x i32> %tmp2
238+
%tmp1 = load <2 x i32>, ptr %A
239+
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
240+
ret <2 x i32> %tmp2
241241
}
242242

243243
define <2 x float> @test_vrev64Df(ptr %A) nounwind {
@@ -246,9 +246,9 @@ define <2 x float> @test_vrev64Df(ptr %A) nounwind {
246246
; CHECK-NEXT: ldr d0, [x0]
247247
; CHECK-NEXT: rev64.2s v0, v0
248248
; CHECK-NEXT: ret
249-
%tmp1 = load <2 x float>, ptr %A
250-
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
251-
ret <2 x float> %tmp2
249+
%tmp1 = load <2 x float>, ptr %A
250+
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
251+
ret <2 x float> %tmp2
252252
}
253253

254254
define <16 x i8> @test_vrev64Q8(ptr %A) nounwind {
@@ -257,9 +257,9 @@ define <16 x i8> @test_vrev64Q8(ptr %A) nounwind {
257257
; CHECK-NEXT: ldr q0, [x0]
258258
; CHECK-NEXT: rev64.16b v0, v0
259259
; CHECK-NEXT: ret
260-
%tmp1 = load <16 x i8>, ptr %A
261-
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
262-
ret <16 x i8> %tmp2
260+
%tmp1 = load <16 x i8>, ptr %A
261+
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
262+
ret <16 x i8> %tmp2
263263
}
264264

265265
define <8 x i16> @test_vrev64Q16(ptr %A) nounwind {
@@ -268,9 +268,9 @@ define <8 x i16> @test_vrev64Q16(ptr %A) nounwind {
268268
; CHECK-NEXT: ldr q0, [x0]
269269
; CHECK-NEXT: rev64.8h v0, v0
270270
; CHECK-NEXT: ret
271-
%tmp1 = load <8 x i16>, ptr %A
272-
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
273-
ret <8 x i16> %tmp2
271+
%tmp1 = load <8 x i16>, ptr %A
272+
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
273+
ret <8 x i16> %tmp2
274274
}
275275

276276
define <4 x i32> @test_vrev64Q32(ptr %A) nounwind {
@@ -279,9 +279,9 @@ define <4 x i32> @test_vrev64Q32(ptr %A) nounwind {
279279
; CHECK-NEXT: ldr q0, [x0]
280280
; CHECK-NEXT: rev64.4s v0, v0
281281
; CHECK-NEXT: ret
282-
%tmp1 = load <4 x i32>, ptr %A
283-
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
284-
ret <4 x i32> %tmp2
282+
%tmp1 = load <4 x i32>, ptr %A
283+
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
284+
ret <4 x i32> %tmp2
285285
}
286286

287287
define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
@@ -290,9 +290,9 @@ define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
290290
; CHECK-NEXT: ldr q0, [x0]
291291
; CHECK-NEXT: rev64.4s v0, v0
292292
; CHECK-NEXT: ret
293-
%tmp1 = load <4 x float>, ptr %A
294-
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
295-
ret <4 x float> %tmp2
293+
%tmp1 = load <4 x float>, ptr %A
294+
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
295+
ret <4 x float> %tmp2
296296
}
297297

298298
define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
@@ -301,9 +301,9 @@ define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
301301
; CHECK-NEXT: ldr d0, [x0]
302302
; CHECK-NEXT: rev32.8b v0, v0
303303
; CHECK-NEXT: ret
304-
%tmp1 = load <8 x i8>, ptr %A
305-
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
306-
ret <8 x i8> %tmp2
304+
%tmp1 = load <8 x i8>, ptr %A
305+
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
306+
ret <8 x i8> %tmp2
307307
}
308308

309309
define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
@@ -312,9 +312,9 @@ define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
312312
; CHECK-NEXT: ldr d0, [x0]
313313
; CHECK-NEXT: rev32.4h v0, v0
314314
; CHECK-NEXT: ret
315-
%tmp1 = load <4 x i16>, ptr %A
316-
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
317-
ret <4 x i16> %tmp2
315+
%tmp1 = load <4 x i16>, ptr %A
316+
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
317+
ret <4 x i16> %tmp2
318318
}
319319

320320
define <16 x i8> @test_vrev32Q8(ptr %A) nounwind {
@@ -323,9 +323,9 @@ define <16 x i8> @test_vrev32Q8(ptr %A) nounwind {
323323
; CHECK-NEXT: ldr q0, [x0]
324324
; CHECK-NEXT: rev32.16b v0, v0
325325
; CHECK-NEXT: ret
326-
%tmp1 = load <16 x i8>, ptr %A
327-
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
328-
ret <16 x i8> %tmp2
326+
%tmp1 = load <16 x i8>, ptr %A
327+
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
328+
ret <16 x i8> %tmp2
329329
}
330330

331331
define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
@@ -334,9 +334,9 @@ define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
334334
; CHECK-NEXT: ldr q0, [x0]
335335
; CHECK-NEXT: rev32.8h v0, v0
336336
; CHECK-NEXT: ret
337-
%tmp1 = load <8 x i16>, ptr %A
338-
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
339-
ret <8 x i16> %tmp2
337+
%tmp1 = load <8 x i16>, ptr %A
338+
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
339+
ret <8 x i16> %tmp2
340340
}
341341

342342
define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
@@ -345,9 +345,9 @@ define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
345345
; CHECK-NEXT: ldr d0, [x0]
346346
; CHECK-NEXT: rev16.8b v0, v0
347347
; CHECK-NEXT: ret
348-
%tmp1 = load <8 x i8>, ptr %A
349-
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
350-
ret <8 x i8> %tmp2
348+
%tmp1 = load <8 x i8>, ptr %A
349+
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
350+
ret <8 x i8> %tmp2
351351
}
352352

353353
define <16 x i8> @test_vrev16Q8(ptr %A) nounwind {
@@ -356,9 +356,81 @@ define <16 x i8> @test_vrev16Q8(ptr %A) nounwind {
356356
; CHECK-NEXT: ldr q0, [x0]
357357
; CHECK-NEXT: rev16.16b v0, v0
358358
; CHECK-NEXT: ret
359-
%tmp1 = load <16 x i8>, ptr %A
360-
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
361-
ret <16 x i8> %tmp2
359+
%tmp1 = load <16 x i8>, ptr %A
360+
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
361+
ret <16 x i8> %tmp2
362+
}
363+
364+
define <4 x half> @test_vrev32Df16(<4 x half> %A) nounwind {
365+
; CHECK-LABEL: test_vrev32Df16:
366+
; CHECK: // %bb.0:
367+
; CHECK-NEXT: rev32.4h v0, v0
368+
; CHECK-NEXT: ret
369+
%tmp2 = shufflevector <4 x half> %A, <4 x half> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
370+
ret <4 x half> %tmp2
371+
}
372+
373+
define <8 x half> @test_vrev32Qf16(<8 x half> %A) nounwind {
374+
; CHECK-LABEL: test_vrev32Qf16:
375+
; CHECK: // %bb.0:
376+
; CHECK-NEXT: rev32.8h v0, v0
377+
; CHECK-NEXT: ret
378+
%tmp2 = shufflevector <8 x half> %A, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
379+
ret <8 x half> %tmp2
380+
}
381+
382+
define <4 x half> @test_vrev64Df16(<4 x half> %A) nounwind {
383+
; CHECK-LABEL: test_vrev64Df16:
384+
; CHECK: // %bb.0:
385+
; CHECK-NEXT: rev64.4h v0, v0
386+
; CHECK-NEXT: ret
387+
%tmp2 = shufflevector <4 x half> %A, <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
388+
ret <4 x half> %tmp2
389+
}
390+
391+
define <8 x half> @test_vrev64Qf16(<8 x half> %A) nounwind {
392+
; CHECK-LABEL: test_vrev64Qf16:
393+
; CHECK: // %bb.0:
394+
; CHECK-NEXT: rev64.8h v0, v0
395+
; CHECK-NEXT: ret
396+
%tmp2 = shufflevector <8 x half> %A, <8 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
397+
ret <8 x half> %tmp2
398+
}
399+
400+
define <4 x bfloat> @test_vrev32Dbf16(<4 x bfloat> %A) nounwind {
401+
; CHECK-LABEL: test_vrev32Dbf16:
402+
; CHECK: // %bb.0:
403+
; CHECK-NEXT: rev32.4h v0, v0
404+
; CHECK-NEXT: ret
405+
%tmp2 = shufflevector <4 x bfloat> %A, <4 x bfloat> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
406+
ret <4 x bfloat> %tmp2
407+
}
408+
409+
define <8 x bfloat> @test_vrev32Qbf16(<8 x bfloat> %A) nounwind {
410+
; CHECK-LABEL: test_vrev32Qbf16:
411+
; CHECK: // %bb.0:
412+
; CHECK-NEXT: rev32.8h v0, v0
413+
; CHECK-NEXT: ret
414+
%tmp2 = shufflevector <8 x bfloat> %A, <8 x bfloat> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
415+
ret <8 x bfloat> %tmp2
416+
}
417+
418+
define <4 x bfloat> @test_vrev64Dbf16(<4 x bfloat> %A) nounwind {
419+
; CHECK-LABEL: test_vrev64Dbf16:
420+
; CHECK: // %bb.0:
421+
; CHECK-NEXT: rev64.4h v0, v0
422+
; CHECK-NEXT: ret
423+
%tmp2 = shufflevector <4 x bfloat> %A, <4 x bfloat> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
424+
ret <4 x bfloat> %tmp2
425+
}
426+
427+
define <8 x bfloat> @test_vrev64Qbf16(<8 x bfloat> %A) nounwind {
428+
; CHECK-LABEL: test_vrev64Qbf16:
429+
; CHECK: // %bb.0:
430+
; CHECK-NEXT: rev64.8h v0, v0
431+
; CHECK-NEXT: ret
432+
%tmp2 = shufflevector <8 x bfloat> %A, <8 x bfloat> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
433+
ret <8 x bfloat> %tmp2
362434
}
363435

364436
; Undef shuffle indices should not prevent matching to VREV:
@@ -369,9 +441,9 @@ define <8 x i8> @test_vrev64D8_undef(ptr %A) nounwind {
369441
; CHECK-NEXT: ldr d0, [x0]
370442
; CHECK-NEXT: rev64.8b v0, v0
371443
; CHECK-NEXT: ret
372-
%tmp1 = load <8 x i8>, ptr %A
373-
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
374-
ret <8 x i8> %tmp2
444+
%tmp1 = load <8 x i8>, ptr %A
445+
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
446+
ret <8 x i8> %tmp2
375447
}
376448

377449
define <8 x i16> @test_vrev32Q16_undef(ptr %A) nounwind {
@@ -380,9 +452,9 @@ define <8 x i16> @test_vrev32Q16_undef(ptr %A) nounwind {
380452
; CHECK-NEXT: ldr q0, [x0]
381453
; CHECK-NEXT: rev32.8h v0, v0
382454
; CHECK-NEXT: ret
383-
%tmp1 = load <8 x i16>, ptr %A
384-
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
385-
ret <8 x i16> %tmp2
455+
%tmp1 = load <8 x i16>, ptr %A
456+
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
457+
ret <8 x i16> %tmp2
386458
}
387459

388460
; vrev <4 x i16> should use REV32 and not REV64
@@ -426,9 +498,9 @@ define void @float_vrev64(ptr nocapture %source, ptr nocapture %dest) nounwind n
426498
; CHECK-GI-LABEL: float_vrev64:
427499
; CHECK-GI: // %bb.0: // %entry
428500
; CHECK-GI-NEXT: movi d0, #0000000000000000
429-
; CHECK-GI-NEXT: adrp x8, .LCPI28_0
501+
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
430502
; CHECK-GI-NEXT: ldr q1, [x0]
431-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI28_0]
503+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
432504
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
433505
; CHECK-GI-NEXT: str q0, [x1, #176]
434506
; CHECK-GI-NEXT: ret
@@ -456,27 +528,27 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
456528
define void @test_rev16_truncstore() {
457529
; CHECK-SD-LABEL: test_rev16_truncstore:
458530
; CHECK-SD: // %bb.0: // %entry
459-
; CHECK-SD-NEXT: cbnz wzr, .LBB30_2
460-
; CHECK-SD-NEXT: .LBB30_1: // %cleanup
531+
; CHECK-SD-NEXT: cbnz wzr, .LBB38_2
532+
; CHECK-SD-NEXT: .LBB38_1: // %cleanup
461533
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
462534
; CHECK-SD-NEXT: ldrh w8, [x8]
463535
; CHECK-SD-NEXT: rev16 w8, w8
464536
; CHECK-SD-NEXT: strh w8, [x8]
465-
; CHECK-SD-NEXT: cbz wzr, .LBB30_1
466-
; CHECK-SD-NEXT: .LBB30_2: // %fail
537+
; CHECK-SD-NEXT: cbz wzr, .LBB38_1
538+
; CHECK-SD-NEXT: .LBB38_2: // %fail
467539
; CHECK-SD-NEXT: ret
468540
;
469541
; CHECK-GI-LABEL: test_rev16_truncstore:
470542
; CHECK-GI: // %bb.0: // %entry
471-
; CHECK-GI-NEXT: tbnz wzr, #0, .LBB30_2
472-
; CHECK-GI-NEXT: .LBB30_1: // %cleanup
543+
; CHECK-GI-NEXT: tbnz wzr, #0, .LBB38_2
544+
; CHECK-GI-NEXT: .LBB38_1: // %cleanup
473545
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
474546
; CHECK-GI-NEXT: ldrh w8, [x8]
475547
; CHECK-GI-NEXT: rev w8, w8
476548
; CHECK-GI-NEXT: lsr w8, w8, #16
477549
; CHECK-GI-NEXT: strh w8, [x8]
478-
; CHECK-GI-NEXT: tbz wzr, #0, .LBB30_1
479-
; CHECK-GI-NEXT: .LBB30_2: // %fail
550+
; CHECK-GI-NEXT: tbz wzr, #0, .LBB38_1
551+
; CHECK-GI-NEXT: .LBB38_2: // %fail
480552
; CHECK-GI-NEXT: ret
481553
entry:
482554
br label %body

llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,24 @@ entry:
198198
%V128 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
199199
ret <4 x half> %V128
200200
}
201+
202+
define <8 x bfloat> @v8bf16(<8 x bfloat> %a) {
203+
; CHECK-LABEL: v8bf16:
204+
; CHECK: // %bb.0: // %entry
205+
; CHECK-NEXT: rev64 v0.8h, v0.8h
206+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
207+
; CHECK-NEXT: ret
208+
entry:
209+
%V128 = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
210+
ret <8 x bfloat> %V128
211+
}
212+
213+
define <4 x bfloat> @v4bf16(<4 x bfloat> %a) {
214+
; CHECK-LABEL: v4bf16:
215+
; CHECK: // %bb.0: // %entry
216+
; CHECK-NEXT: rev64 v0.4h, v0.4h
217+
; CHECK-NEXT: ret
218+
entry:
219+
%V128 = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
220+
ret <4 x bfloat> %V128
221+
}

0 commit comments

Comments
 (0)