|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s |
| 3 | + |
| 4 | +define void @dupq_i8_256b(ptr %addr) #0 { |
| 5 | +; CHECK-LABEL: dupq_i8_256b: |
| 6 | +; CHECK: // %bb.0: |
| 7 | +; CHECK-NEXT: ldr z0, [x0] |
| 8 | +; CHECK-NEXT: dupq z0.b, z0.b[15] |
| 9 | +; CHECK-NEXT: str z0, [x0] |
| 10 | +; CHECK-NEXT: ret |
| 11 | + %load = load <32 x i8>, ptr %addr |
| 12 | + %splat.lanes = shufflevector <32 x i8> %load, <32 x i8> poison, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, |
| 13 | + i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> |
| 14 | + store <32 x i8> %splat.lanes, ptr %addr |
| 15 | + ret void |
| 16 | +} |
| 17 | + |
| 18 | +define void @dupq_i16_256b(ptr %addr) #0 { |
| 19 | +; CHECK-LABEL: dupq_i16_256b: |
| 20 | +; CHECK: // %bb.0: |
| 21 | +; CHECK-NEXT: ldr z0, [x0] |
| 22 | +; CHECK-NEXT: dupq z0.h, z0.h[2] |
| 23 | +; CHECK-NEXT: str z0, [x0] |
| 24 | +; CHECK-NEXT: ret |
| 25 | + %load = load <16 x i16>, ptr %addr |
| 26 | + %splat.lanes = shufflevector <16 x i16> %load, <16 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, |
| 27 | + i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> |
| 28 | + store <16 x i16> %splat.lanes, ptr %addr |
| 29 | + ret void |
| 30 | +} |
| 31 | + |
| 32 | +define void @dupq_i32_256b(ptr %addr) #0 { |
| 33 | +; CHECK-LABEL: dupq_i32_256b: |
| 34 | +; CHECK: // %bb.0: |
| 35 | +; CHECK-NEXT: ldr z0, [x0] |
| 36 | +; CHECK-NEXT: dupq z0.s, z0.s[3] |
| 37 | +; CHECK-NEXT: str z0, [x0] |
| 38 | +; CHECK-NEXT: ret |
| 39 | + %load = load <8 x i32>, ptr %addr |
| 40 | + %splat.lanes = shufflevector <8 x i32> %load, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, |
| 41 | + i32 7, i32 7, i32 7, i32 7> |
| 42 | + store <8 x i32> %splat.lanes, ptr %addr |
| 43 | + ret void |
| 44 | +} |
| 45 | + |
| 46 | +define void @dupq_i64_256b(ptr %addr) #0 { |
| 47 | +; CHECK-LABEL: dupq_i64_256b: |
| 48 | +; CHECK: // %bb.0: |
| 49 | +; CHECK-NEXT: ldr z0, [x0] |
| 50 | +; CHECK-NEXT: trn1 z0.d, z0.d, z0.d |
| 51 | +; CHECK-NEXT: str z0, [x0] |
| 52 | +; CHECK-NEXT: ret |
| 53 | + %load = load <4 x i64>, ptr %addr |
| 54 | + %splat.lanes = shufflevector <4 x i64> %load, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2> |
| 55 | + store <4 x i64> %splat.lanes, ptr %addr |
| 56 | + ret void |
| 57 | +} |
| 58 | + |
| 59 | +define void @dupq_f16_256b(ptr %addr) #0 { |
| 60 | +; CHECK-LABEL: dupq_f16_256b: |
| 61 | +; CHECK: // %bb.0: |
| 62 | +; CHECK-NEXT: ldr z0, [x0] |
| 63 | +; CHECK-NEXT: dupq z0.h, z0.h[2] |
| 64 | +; CHECK-NEXT: str z0, [x0] |
| 65 | +; CHECK-NEXT: ret |
| 66 | + %load = load <16 x half>, ptr %addr |
| 67 | + %splat.lanes = shufflevector <16 x half> %load, <16 x half> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, |
| 68 | + i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> |
| 69 | + store <16 x half> %splat.lanes, ptr %addr |
| 70 | + ret void |
| 71 | +} |
| 72 | + |
| 73 | +define void @dupq_bf16_256b(ptr %addr) #0 { |
| 74 | +; CHECK-LABEL: dupq_bf16_256b: |
| 75 | +; CHECK: // %bb.0: |
| 76 | +; CHECK-NEXT: ldp q0, q1, [x0] |
| 77 | +; CHECK-NEXT: dup v0.8h, v0.h[2] |
| 78 | +; CHECK-NEXT: dup v1.8h, v1.h[2] |
| 79 | +; CHECK-NEXT: stp q0, q1, [x0] |
| 80 | +; CHECK-NEXT: ret |
| 81 | + %load = load <16 x bfloat>, ptr %addr |
| 82 | + %splat.lanes = shufflevector <16 x bfloat> %load, <16 x bfloat> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, |
| 83 | + i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> |
| 84 | + store <16 x bfloat> %splat.lanes, ptr %addr |
| 85 | + ret void |
| 86 | +} |
| 87 | + |
| 88 | +define void @dupq_f32_256b(ptr %addr) #0 { |
| 89 | +; CHECK-LABEL: dupq_f32_256b: |
| 90 | +; CHECK: // %bb.0: |
| 91 | +; CHECK-NEXT: ldr z0, [x0] |
| 92 | +; CHECK-NEXT: dupq z0.s, z0.s[3] |
| 93 | +; CHECK-NEXT: str z0, [x0] |
| 94 | +; CHECK-NEXT: ret |
| 95 | + %load = load <8 x float>, ptr %addr |
| 96 | + %splat.lanes = shufflevector <8 x float> %load, <8 x float> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, |
| 97 | + i32 7, i32 7, i32 7, i32 7> |
| 98 | + store <8 x float> %splat.lanes, ptr %addr |
| 99 | + ret void |
| 100 | +} |
| 101 | + |
| 102 | +define void @dupq_f64_256b(ptr %addr) #0 { |
| 103 | +; CHECK-LABEL: dupq_f64_256b: |
| 104 | +; CHECK: // %bb.0: |
| 105 | +; CHECK-NEXT: ldr z0, [x0] |
| 106 | +; CHECK-NEXT: trn1 z0.d, z0.d, z0.d |
| 107 | +; CHECK-NEXT: str z0, [x0] |
| 108 | +; CHECK-NEXT: ret |
| 109 | + %load = load <4 x double>, ptr %addr |
| 110 | + %splat.lanes = shufflevector <4 x double> %load, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2> |
| 111 | + store <4 x double> %splat.lanes, ptr %addr |
| 112 | + ret void |
| 113 | +} |
| 114 | + |
| 115 | +attributes #0 = { noinline vscale_range(2,2) "target-features"="+sve2p1,+bf16" } |
0 commit comments