Skip to content

Commit d636bcb

Browse files
committed
[RISCV] Introduce unaligned-vector-mem feature
This allows us to model and thus test transforms which are legal only when a vector load with less than element alignment are supported. This was originally part of D126085, but was split out as we didn't have a good example of such a transform. As can be seen in the test diffs, we have the recently added concat_vector(loads) -> strided_load transform (from D147713) which now benefits from the unaligned support. While making this change, I realized that we actually *do* support unaligned vector loads and stores of all types via conversion to i8 element type. For contiguous loads and stores without masking, we actually already implement this in the backend - though we don't tell the optimizer that. For indexed, lowering to i8 requires complicated addressing. For indexed and segmented, we'd have to use indexed. All around, doesn't seem worthwhile pursuing, but makes for an interesting observation. Differential Revision: https://reviews.llvm.org/D149375
1 parent e161fcd commit d636bcb

File tree

5 files changed

+194
-21
lines changed

5 files changed

+194
-21
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,11 @@ def FeatureUnalignedScalarMem
717717
"true", "Has reasonably performant unaligned scalar "
718718
"loads and stores">;
719719

720+
def FeatureUnalignedVectorMem
721+
: SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
722+
"true", "Has reasonably performant unaligned vector "
723+
"loads and stores">;
724+
720725
def TuneNoOptimizedZeroStrideLoad
721726
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
722727
"false", "Hasn't optimized (perform fewer memory operations)"

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15636,7 +15636,13 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
1563615636
return true;
1563715637
}
1563815638

15639-
return false;
15639+
// Note: We lower an unmasked unaligned vector access to an equally sized
15640+
// e8 element type access. Given this, we effectively support all unmasked
15641+
// misaligned accesses. TODO: Work through the codegen implications of
15642+
// allowing such accesses to be formed, and considered fast.
15643+
if (Fast)
15644+
*Fast = 0;
15645+
return Subtarget.enableUnalignedVectorMem();
1564015646
}
1564115647

1564215648
bool RISCVTargetLowering::splitValueIntoRegisterParts(
@@ -15811,7 +15817,8 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(const DataLayout &DL,
1581115817
if (!isLegalElementTypeForRVV(ScalarType))
1581215818
return false;
1581315819

15814-
if (Alignment < DL.getTypeStoreSize(ScalarType).getFixedValue())
15820+
if (!Subtarget.enableUnalignedVectorMem() &&
15821+
Alignment < DL.getTypeStoreSize(ScalarType).getFixedValue())
1581515822
return false;
1581615823

1581715824
return true;

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
190190
if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
191191
return false;
192192

193-
if (Alignment <
194-
DL.getTypeStoreSize(DataType->getScalarType()).getFixedValue())
193+
auto *ElemType = DataType->getScalarType();
194+
if (!ST->enableUnalignedVectorMem() &&
195+
Alignment < DL.getTypeStoreSize(ElemType).getFixedValue())
195196
return false;
196197

197-
return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
198+
return TLI->isLegalElementTypeForRVV(ElemType);
198199
}
199200

200201
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -212,11 +213,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
212213
if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
213214
return false;
214215

215-
if (Alignment <
216-
DL.getTypeStoreSize(DataType->getScalarType()).getFixedValue())
216+
auto *ElemType = DataType->getScalarType();
217+
if (!ST->enableUnalignedVectorMem() &&
218+
Alignment < DL.getTypeStoreSize(ElemType).getFixedValue())
217219
return false;
218220

219-
return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
221+
return TLI->isLegalElementTypeForRVV(ElemType);
220222
}
221223

222224
bool isLegalMaskedGather(Type *DataType, Align Alignment) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32
3-
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
4-
; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,ZVE64F
2+
; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV64
4+
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh,+unaligned-vector-mem -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN
5+
6+
; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+experimental-zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,ZVE64F
57

68
; The two loads are contigous and should be folded into one
79
define void @widen_2xv4i16(ptr %x, ptr %z) {
@@ -109,6 +111,46 @@ define void @widen_4xv4i16(ptr %x, ptr %z) {
109111
ret void
110112
}
111113

114+
define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) {
115+
; CHECK-NO-MISALIGN-LABEL: widen_4xv4i16_unaligned:
116+
; CHECK-NO-MISALIGN: # %bb.0:
117+
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
118+
; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0)
119+
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8
120+
; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2)
121+
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16
122+
; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2)
123+
; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24
124+
; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0)
125+
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma
126+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4
127+
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma
128+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8
129+
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
130+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v14, 12
131+
; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1)
132+
; CHECK-NO-MISALIGN-NEXT: ret
133+
;
134+
; RV64-MISALIGN-LABEL: widen_4xv4i16_unaligned:
135+
; RV64-MISALIGN: # %bb.0:
136+
; RV64-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
137+
; RV64-MISALIGN-NEXT: vle16.v v8, (a0)
138+
; RV64-MISALIGN-NEXT: vse16.v v8, (a1)
139+
; RV64-MISALIGN-NEXT: ret
140+
%a = load <4 x i16>, ptr %x, align 1
141+
%b.gep = getelementptr i8, ptr %x, i64 8
142+
%b = load <4 x i16>, ptr %b.gep, align 1
143+
%c.gep = getelementptr i8, ptr %b.gep, i64 8
144+
%c = load <4 x i16>, ptr %c.gep, align 1
145+
%d.gep = getelementptr i8, ptr %c.gep, i64 8
146+
%d = load <4 x i16>, ptr %d.gep, align 1
147+
%e.0 = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
148+
%e.1 = shufflevector <4 x i16> %c, <4 x i16> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
149+
%e.2 = shufflevector <8 x i16> %e.0, <8 x i16> %e.1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
150+
store <16 x i16> %e.2, ptr %z
151+
ret void
152+
}
153+
112154
; Should be a strided load - with type coercion to i64
113155
define void @strided_constant(ptr %x, ptr %z) {
114156
; CHECK-LABEL: strided_constant:
@@ -365,17 +407,23 @@ define void @strided_runtime_4xv2f32(ptr %x, ptr %z, i64 %s) {
365407
ret void
366408
}
367409

368-
; Shouldn't be combined because the resulting load would not be aligned
369410
define void @strided_unaligned(ptr %x, ptr %z, i64 %s) {
370-
; CHECK-LABEL: strided_unaligned:
371-
; CHECK: # %bb.0:
372-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
373-
; CHECK-NEXT: vle8.v v8, (a0)
374-
; CHECK-NEXT: add a0, a0, a2
375-
; CHECK-NEXT: vle8.v v9, (a0)
376-
; CHECK-NEXT: vslideup.vi v8, v9, 4
377-
; CHECK-NEXT: vse16.v v8, (a1)
378-
; CHECK-NEXT: ret
411+
; CHECK-NO-MISALIGN-LABEL: strided_unaligned:
412+
; CHECK-NO-MISALIGN: # %bb.0:
413+
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
414+
; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0)
415+
; CHECK-NO-MISALIGN-NEXT: add a0, a0, a2
416+
; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0)
417+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v9, 4
418+
; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1)
419+
; CHECK-NO-MISALIGN-NEXT: ret
420+
;
421+
; RV64-MISALIGN-LABEL: strided_unaligned:
422+
; RV64-MISALIGN: # %bb.0:
423+
; RV64-MISALIGN-NEXT: vsetivli zero, 2, e64, m1, ta, ma
424+
; RV64-MISALIGN-NEXT: vlse64.v v8, (a0), a2
425+
; RV64-MISALIGN-NEXT: vse64.v v8, (a1)
426+
; RV64-MISALIGN-NEXT: ret
379427
%a = load <4 x i16>, ptr %x, align 1
380428
%b.gep = getelementptr i8, ptr %x, i64 %s
381429
%b = load <4 x i16>, ptr %b.gep, align 1

0 commit comments

Comments
 (0)