|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
2 |
| -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32 |
3 |
| -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 |
4 |
| -; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,ZVE64F |
| 2 | +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV32 |
| 3 | +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV64 |
| 4 | +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh,+unaligned-vector-mem -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN |
| 5 | + |
| 6 | +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,ZVE64F |
5 | 7 |
|
6 | 8 | ; The two loads are contigous and should be folded into one
|
7 | 9 | define void @widen_2xv4i16(ptr %x, ptr %z) {
|
@@ -109,6 +111,46 @@ define void @widen_4xv4i16(ptr %x, ptr %z) {
|
109 | 111 | ret void
|
110 | 112 | }
|
111 | 113 |
|
| 114 | +define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { |
| 115 | +; CHECK-NO-MISALIGN-LABEL: widen_4xv4i16_unaligned: |
| 116 | +; CHECK-NO-MISALIGN: # %bb.0: |
| 117 | +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma |
| 118 | +; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0) |
| 119 | +; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 |
| 120 | +; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) |
| 121 | +; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16 |
| 122 | +; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2) |
| 123 | +; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24 |
| 124 | +; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0) |
| 125 | +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma |
| 126 | +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4 |
| 127 | +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma |
| 128 | +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8 |
| 129 | +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma |
| 130 | +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v14, 12 |
| 131 | +; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1) |
| 132 | +; CHECK-NO-MISALIGN-NEXT: ret |
| 133 | +; |
| 134 | +; RV64-MISALIGN-LABEL: widen_4xv4i16_unaligned: |
| 135 | +; RV64-MISALIGN: # %bb.0: |
| 136 | +; RV64-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma |
| 137 | +; RV64-MISALIGN-NEXT: vle16.v v8, (a0) |
| 138 | +; RV64-MISALIGN-NEXT: vse16.v v8, (a1) |
| 139 | +; RV64-MISALIGN-NEXT: ret |
| 140 | + %a = load <4 x i16>, ptr %x, align 1 |
| 141 | + %b.gep = getelementptr i8, ptr %x, i64 8 |
| 142 | + %b = load <4 x i16>, ptr %b.gep, align 1 |
| 143 | + %c.gep = getelementptr i8, ptr %b.gep, i64 8 |
| 144 | + %c = load <4 x i16>, ptr %c.gep, align 1 |
| 145 | + %d.gep = getelementptr i8, ptr %c.gep, i64 8 |
| 146 | + %d = load <4 x i16>, ptr %d.gep, align 1 |
| 147 | + %e.0 = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 148 | + %e.1 = shufflevector <4 x i16> %c, <4 x i16> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 149 | + %e.2 = shufflevector <8 x i16> %e.0, <8 x i16> %e.1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| 150 | + store <16 x i16> %e.2, ptr %z |
| 151 | + ret void |
| 152 | +} |
| 153 | + |
112 | 154 | ; Should be a strided load - with type coercion to i64
|
113 | 155 | define void @strided_constant(ptr %x, ptr %z) {
|
114 | 156 | ; CHECK-LABEL: strided_constant:
|
@@ -365,17 +407,23 @@ define void @strided_runtime_4xv2f32(ptr %x, ptr %z, i64 %s) {
|
365 | 407 | ret void
|
366 | 408 | }
|
367 | 409 |
|
368 |
| -; Shouldn't be combined because the resulting load would not be aligned |
369 | 410 | define void @strided_unaligned(ptr %x, ptr %z, i64 %s) {
|
370 |
| -; CHECK-LABEL: strided_unaligned: |
371 |
| -; CHECK: # %bb.0: |
372 |
| -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma |
373 |
| -; CHECK-NEXT: vle8.v v8, (a0) |
374 |
| -; CHECK-NEXT: add a0, a0, a2 |
375 |
| -; CHECK-NEXT: vle8.v v9, (a0) |
376 |
| -; CHECK-NEXT: vslideup.vi v8, v9, 4 |
377 |
| -; CHECK-NEXT: vse16.v v8, (a1) |
378 |
| -; CHECK-NEXT: ret |
| 411 | +; CHECK-NO-MISALIGN-LABEL: strided_unaligned: |
| 412 | +; CHECK-NO-MISALIGN: # %bb.0: |
| 413 | +; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma |
| 414 | +; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0) |
| 415 | +; CHECK-NO-MISALIGN-NEXT: add a0, a0, a2 |
| 416 | +; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0) |
| 417 | +; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v9, 4 |
| 418 | +; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1) |
| 419 | +; CHECK-NO-MISALIGN-NEXT: ret |
| 420 | +; |
| 421 | +; RV64-MISALIGN-LABEL: strided_unaligned: |
| 422 | +; RV64-MISALIGN: # %bb.0: |
| 423 | +; RV64-MISALIGN-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| 424 | +; RV64-MISALIGN-NEXT: vlse64.v v8, (a0), a2 |
| 425 | +; RV64-MISALIGN-NEXT: vse64.v v8, (a1) |
| 426 | +; RV64-MISALIGN-NEXT: ret |
379 | 427 | %a = load <4 x i16>, ptr %x, align 1
|
380 | 428 | %b.gep = getelementptr i8, ptr %x, i64 %s
|
381 | 429 | %b = load <4 x i16>, ptr %b.gep, align 1
|
|
0 commit comments