Skip to content

Commit 1762e01

Browse files
Fix codegen of consteval functions returning an empty class, and related issues (#93115)
Fix codegen of consteval functions returning an empty class, and related issues If a class is empty, don't store it to memory: the store might overwrite useful data. Similarly, if a class has tail padding that might overlap other fields, don't store the tail padding to memory. The problem here turned out a bit more general than I initially thought: basically all uses of EmitAggregateStore were broken. Call lowering had a method that did mostly the right thing, though: CreateCoercedStore. Adapt CreateCoercedStore so it always does the conservatively right thing, and use it for both calls and ConstantExpr. Also, along the way, fix the "overlap" bit in AggValueSlot: the bit was set incorrectly for empty classes in some cases. Fixes #93040.
1 parent ae6dc64 commit 1762e01

15 files changed

+320
-287
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 61 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
13361336
return CGF.Builder.CreateLoad(Tmp);
13371337
}
13381338

1339-
// Function to store a first-class aggregate into memory. We prefer to
1340-
// store the elements rather than the aggregate to be more friendly to
1341-
// fast-isel.
1342-
// FIXME: Do we need to recurse here?
1343-
void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
1344-
bool DestIsVolatile) {
1345-
// Prefer scalar stores to first-class aggregate stores.
1346-
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
1347-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1348-
Address EltPtr = Builder.CreateStructGEP(Dest, i);
1349-
llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
1350-
Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
1351-
}
1352-
} else {
1353-
Builder.CreateStore(Val, Dest, DestIsVolatile);
1354-
}
1355-
}
1356-
1357-
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
1358-
/// where the source and destination may have different types. The
1359-
/// destination is known to be aligned to \arg DstAlign bytes.
1360-
///
1361-
/// This safely handles the case when the src type is larger than the
1362-
/// destination type; the upper bits of the src will be lost.
1363-
static void CreateCoercedStore(llvm::Value *Src,
1364-
Address Dst,
1365-
bool DstIsVolatile,
1366-
CodeGenFunction &CGF) {
1367-
llvm::Type *SrcTy = Src->getType();
1368-
llvm::Type *DstTy = Dst.getElementType();
1369-
if (SrcTy == DstTy) {
1370-
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
1371-
return;
1372-
}
1373-
1374-
llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
1375-
1376-
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
1377-
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
1378-
SrcSize.getFixedValue(), CGF);
1379-
DstTy = Dst.getElementType();
1380-
}
1381-
1382-
llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
1383-
llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
1384-
if (SrcPtrTy && DstPtrTy &&
1385-
SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
1386-
Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
1387-
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
1339+
void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
1340+
llvm::TypeSize DstSize,
1341+
bool DstIsVolatile) {
1342+
if (!DstSize)
13881343
return;
1389-
}
13901344

1391-
// If the source and destination are integer or pointer types, just do an
1392-
// extension or truncation to the desired type.
1393-
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
1394-
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
1395-
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
1396-
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
1397-
return;
1345+
llvm::Type *SrcTy = Src->getType();
1346+
llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
1347+
1348+
// GEP into structs to try to make types match.
1349+
// FIXME: This isn't really that useful with opaque types, but it impacts a
1350+
// lot of regression tests.
1351+
if (SrcTy != Dst.getElementType()) {
1352+
if (llvm::StructType *DstSTy =
1353+
dyn_cast<llvm::StructType>(Dst.getElementType())) {
1354+
assert(!SrcSize.isScalable());
1355+
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
1356+
SrcSize.getFixedValue(), *this);
1357+
}
13981358
}
13991359

1400-
llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
1401-
1402-
// If store is legal, just bitcast the src pointer.
1403-
if (isa<llvm::ScalableVectorType>(SrcTy) ||
1404-
isa<llvm::ScalableVectorType>(DstTy) ||
1405-
SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
1406-
Dst = Dst.withElementType(SrcTy);
1407-
CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
1360+
if (SrcSize.isScalable() || SrcSize <= DstSize) {
1361+
if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
1362+
SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
1363+
// If the value is supposed to be a pointer, convert it before storing it.
1364+
Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
1365+
Builder.CreateStore(Src, Dst, DstIsVolatile);
1366+
} else if (llvm::StructType *STy =
1367+
dyn_cast<llvm::StructType>(Src->getType())) {
1368+
// Prefer scalar stores to first-class aggregate stores.
1369+
Dst = Dst.withElementType(SrcTy);
1370+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1371+
Address EltPtr = Builder.CreateStructGEP(Dst, i);
1372+
llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
1373+
Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
1374+
}
1375+
} else {
1376+
Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
1377+
}
1378+
} else if (SrcTy->isIntegerTy()) {
1379+
// If the source is a simple integer, coerce it directly.
1380+
llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
1381+
Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
1382+
Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
14081383
} else {
14091384
// Otherwise do coercion through memory. This is stupid, but
14101385
// simple.
@@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
14161391
// FIXME: Assert that we aren't truncating non-padding bits when have access
14171392
// to that information.
14181393
RawAddress Tmp =
1419-
CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
1420-
CGF.Builder.CreateStore(Src, Tmp);
1421-
CGF.Builder.CreateMemCpy(
1422-
Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
1423-
Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
1424-
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
1394+
CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
1395+
Builder.CreateStore(Src, Tmp);
1396+
Builder.CreateMemCpy(Dst.emitRawPointer(*this),
1397+
Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
1398+
Tmp.getAlignment().getAsAlign(),
1399+
Builder.CreateTypeSize(IntPtrTy, DstSize));
14251400
}
14261401
}
14271402

@@ -3315,7 +3290,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
33153290
assert(NumIRArgs == 1);
33163291
auto AI = Fn->getArg(FirstIRArg);
33173292
AI->setName(Arg->getName() + ".coerce");
3318-
CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
3293+
CreateCoercedStore(
3294+
AI, Ptr,
3295+
llvm::TypeSize::getFixed(
3296+
getContext().getTypeSizeInChars(Ty).getQuantity() -
3297+
ArgI.getDirectOffset()),
3298+
/*DstIsVolatile=*/false);
33193299
}
33203300

33213301
// Match to what EmitParmDecl is expecting for this type.
@@ -5950,17 +5930,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
59505930
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
59515931
return RValue::getComplex(std::make_pair(Real, Imag));
59525932
}
5953-
case TEK_Aggregate: {
5954-
Address DestPtr = ReturnValue.getAddress();
5955-
bool DestIsVolatile = ReturnValue.isVolatile();
5956-
5957-
if (!DestPtr.isValid()) {
5958-
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
5959-
DestIsVolatile = false;
5960-
}
5961-
EmitAggregateStore(CI, DestPtr, DestIsVolatile);
5962-
return RValue::getAggregate(DestPtr);
5963-
}
5933+
case TEK_Aggregate:
5934+
break;
59645935
case TEK_Scalar: {
59655936
// If the argument doesn't match, perform a bitcast to coerce it.
59665937
// This can happen due to trivial type mismatches.
@@ -5970,7 +5941,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
59705941
return RValue::get(V);
59715942
}
59725943
}
5973-
llvm_unreachable("bad evaluation kind");
59745944
}
59755945

59765946
// If coercing a fixed vector from a scalable vector for ABI
@@ -5992,10 +5962,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
59925962

59935963
Address DestPtr = ReturnValue.getValue();
59945964
bool DestIsVolatile = ReturnValue.isVolatile();
5965+
uint64_t DestSize =
5966+
getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();
59955967

59965968
if (!DestPtr.isValid()) {
59975969
DestPtr = CreateMemTemp(RetTy, "coerce");
59985970
DestIsVolatile = false;
5971+
DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
59995972
}
60005973

60015974
// An empty record can overlap other data (if declared with
@@ -6004,7 +5977,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
60045977
if (!isEmptyRecord(getContext(), RetTy, true)) {
60055978
// If the value is offset in memory, apply the offset now.
60065979
Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
6007-
CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
5980+
CreateCoercedStore(
5981+
CI, StorePtr,
5982+
llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
5983+
DestIsVolatile);
60085984
}
60095985

60105986
return convertTempToRValue(DestPtr, RetTy, SourceLocation());

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,12 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
131131
EnsureDest(E->getType());
132132

133133
if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
134-
Address StoreDest = Dest.getAddress();
135-
// The emitted value is guaranteed to have the same size as the
136-
// destination but can have a different type. Just do a bitcast in this
137-
// case to avoid incorrect GEPs.
138-
if (Result->getType() != StoreDest.getType())
139-
StoreDest = StoreDest.withElementType(Result->getType());
140-
141-
CGF.EmitAggregateStore(Result, StoreDest,
142-
E->getType().isVolatileQualified());
134+
CGF.CreateCoercedStore(
135+
Result, Dest.getAddress(),
136+
llvm::TypeSize::getFixed(
137+
Dest.getPreferredSize(CGF.getContext(), E->getType())
138+
.getQuantity()),
139+
E->getType().isVolatileQualified());
143140
return;
144141
}
145142
return Visit(E->getSubExpr());
@@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
20502047
if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
20512048
return AggValueSlot::DoesNotOverlap;
20522049

2050+
// Empty fields can overlap earlier fields.
2051+
if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
2052+
return AggValueSlot::MayOverlap;
2053+
20532054
// If the field lies entirely within the enclosing class's nvsize, its tail
20542055
// padding cannot overlap any already-initialized object. (The only subobjects
20552056
// with greater addresses that might already be initialized are vbases.)
@@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
20722073
if (IsVirtual)
20732074
return AggValueSlot::MayOverlap;
20742075

2076+
// Empty bases can overlap earlier bases.
2077+
if (BaseRD->isEmpty())
2078+
return AggValueSlot::MayOverlap;
2079+
20752080
// If the base class is laid out entirely within the nvsize of the derived
20762081
// class, its tail padding cannot yet be initialized, so we can issue
20772082
// stores at the full width of the base class.

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4850,9 +4850,10 @@ class CodeGenFunction : public CodeGenTypeCache {
48504850
void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
48514851
ExprValueKind SrcKind);
48524852

4853-
/// Build all the stores needed to initialize an aggregate at Dest with the
4854-
/// value Val.
4855-
void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
4853+
/// Create a store to \arg DstPtr from \arg Src, truncating the stored value
4854+
/// to at most \arg DstSize bytes.
4855+
void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
4856+
bool DstIsVolatile);
48564857

48574858
/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
48584859
/// make sure it survives garbage collection until this point.

clang/test/CodeGen/arm-mve-intrinsics/vld24.c

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ uint8x16x4_t test_vld4q_u8(const uint8_t *addr)
4848

4949
// CHECK-LABEL: @test_vst2q_u32(
5050
// CHECK-NEXT: entry:
51-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
52-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[VALUE_COERCE]], 0, 1
53-
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR:%.*]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
54-
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
51+
// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] [[VALUE_COERCE:%.*]], 0
52+
// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP0]], 0
53+
// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP0]], 1
54+
// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
55+
// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
56+
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR:%.*]], <4 x i32> [[DOTFCA_0_EXTRACT]], <4 x i32> [[DOTFCA_1_EXTRACT]], i32 0)
57+
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR]], <4 x i32> [[DOTFCA_0_EXTRACT]], <4 x i32> [[DOTFCA_1_EXTRACT]], i32 1)
5558
// CHECK-NEXT: ret void
5659
//
5760
void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
@@ -65,14 +68,19 @@ void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
6568

6669
// CHECK-LABEL: @test_vst4q_s8(
6770
// CHECK-NEXT: entry:
68-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
69-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 1
70-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 2
71-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 3
72-
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR:%.*]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 0)
73-
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 1)
74-
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 2)
75-
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 3)
71+
// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] [[VALUE_COERCE:%.*]], 0
72+
// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 0
73+
// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 1
74+
// CHECK-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 2
75+
// CHECK-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 3
76+
// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
77+
// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
78+
// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[DOTFCA_2_EXTRACT]], 0, 2
79+
// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[DOTFCA_3_EXTRACT]], 0, 3
80+
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR:%.*]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 0)
81+
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 1)
82+
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 2)
83+
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 3)
7684
// CHECK-NEXT: ret void
7785
//
7886
void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
@@ -86,10 +94,13 @@ void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
8694

8795
// CHECK-LABEL: @test_vst2q_f16(
8896
// CHECK-NEXT: entry:
89-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
90-
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[VALUE_COERCE]], 0, 1
91-
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR:%.*]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
92-
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
97+
// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] [[VALUE_COERCE:%.*]], 0
98+
// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP0]], 0
99+
// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP0]], 1
100+
// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] poison, <8 x half> [[DOTFCA_0_EXTRACT]], 0, 0
101+
// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[DOTFCA_1_EXTRACT]], 0, 1
102+
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR:%.*]], <8 x half> [[DOTFCA_0_EXTRACT]], <8 x half> [[DOTFCA_1_EXTRACT]], i32 0)
103+
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR]], <8 x half> [[DOTFCA_0_EXTRACT]], <8 x half> [[DOTFCA_1_EXTRACT]], i32 1)
93104
// CHECK-NEXT: ret void
94105
//
95106
void test_vst2q_f16(float16_t *addr, float16x8x2_t value)

0 commit comments

Comments
 (0)