Skip to content

Commit 86692b0

Browse files
committed
[RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits
For i1 vectors, we used an i8 fixed vector as the storage type. If the known minimum number of elements of the scalable vector type is less than 8, we were doing the cast through memory. This used a load or store from a fixed vector alloca. If X is less than 8, DataLayout indicates that the load/store reads/writes vscale bytes even if vscale is known and vscale*X is less than or equal to 8. This means the load or store is outside the bounds of the fixed size alloca as far as DataLayout is concerned leading to undefined behavior. This patch avoids this by widening the i1 scalable vector type with zero elements until it is divisible by 8. This allows it be bitcasted to/from an i8 scalable vector. We then insert or extract the i8 fixed vector into this type. Hopefully this enables llvm#130973 to be accepted.
1 parent 948bffa commit 86692b0

8 files changed

+119
-197
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,19 +1366,29 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
13661366
// If we are casting a fixed i8 vector to a scalable i1 predicate
13671367
// vector, use a vector insert and bitcast the result.
13681368
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
1369-
ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
13701369
FixedSrcTy->getElementType()->isIntegerTy(8)) {
13711370
ScalableDstTy = llvm::ScalableVectorType::get(
13721371
FixedSrcTy->getElementType(),
1373-
ScalableDstTy->getElementCount().getKnownMinValue() / 8);
1372+
llvm::divideCeil(
1373+
ScalableDstTy->getElementCount().getKnownMinValue(), 8));
13741374
}
13751375
if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
13761376
auto *Load = CGF.Builder.CreateLoad(Src);
13771377
auto *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
13781378
llvm::Value *Result = CGF.Builder.CreateInsertVector(
13791379
ScalableDstTy, PoisonVec, Load, uint64_t(0), "cast.scalable");
1380-
if (ScalableDstTy != Ty)
1381-
Result = CGF.Builder.CreateBitCast(Result, Ty);
1380+
ScalableDstTy = cast<llvm::ScalableVectorType>(Ty);
1381+
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
1382+
!ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
1383+
FixedSrcTy->getElementType()->isIntegerTy(8))
1384+
ScalableDstTy = llvm::ScalableVectorType::get(
1385+
ScalableDstTy->getElementType(),
1386+
llvm::alignTo<8>(
1387+
ScalableDstTy->getElementCount().getKnownMinValue()));
1388+
if (Result->getType() != ScalableDstTy)
1389+
Result = CGF.Builder.CreateBitCast(Result, ScalableDstTy);
1390+
if (Result->getType() != Ty)
1391+
Result = CGF.Builder.CreateExtractVector(Ty, Result, uint64_t(0));
13821392
return Result;
13831393
}
13841394
}
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy,
14761486
// If we are casting a scalable i1 predicate vector to a fixed i8
14771487
// vector, first bitcast the source.
14781488
if (FromTy->getElementType()->isIntegerTy(1) &&
1479-
FromTy->getElementCount().isKnownMultipleOf(8) &&
14801489
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
1490+
if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
1491+
FromTy = llvm::ScalableVectorType::get(
1492+
FromTy->getElementType(),
1493+
llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
1494+
llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
1495+
V = CGF.Builder.CreateInsertVector(FromTy, ZeroVec, V, uint64_t(0));
1496+
}
14811497
FromTy = llvm::ScalableVectorType::get(
14821498
ToTy->getElementType(),
14831499
FromTy->getElementCount().getKnownMinValue() / 8);

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2492,18 +2492,28 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
24922492
// If we are casting a fixed i8 vector to a scalable i1 predicate
24932493
// vector, use a vector insert and bitcast the result.
24942494
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
2495-
ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
24962495
FixedSrcTy->getElementType()->isIntegerTy(8)) {
24972496
ScalableDstTy = llvm::ScalableVectorType::get(
24982497
FixedSrcTy->getElementType(),
2499-
ScalableDstTy->getElementCount().getKnownMinValue() / 8);
2498+
llvm::divideCeil(
2499+
ScalableDstTy->getElementCount().getKnownMinValue(), 8));
25002500
}
25012501
if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) {
25022502
llvm::Value *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
25032503
llvm::Value *Result = Builder.CreateInsertVector(
25042504
ScalableDstTy, PoisonVec, Src, uint64_t(0), "cast.scalable");
2505+
ScalableDstTy = cast<llvm::ScalableVectorType>(DstTy);
2506+
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
2507+
!ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
2508+
FixedSrcTy->getElementType()->isIntegerTy(8))
2509+
ScalableDstTy = llvm::ScalableVectorType::get(
2510+
ScalableDstTy->getElementType(),
2511+
llvm::alignTo<8>(
2512+
ScalableDstTy->getElementCount().getKnownMinValue()));
2513+
if (Result->getType() != ScalableDstTy)
2514+
Result = Builder.CreateBitCast(Result, ScalableDstTy);
25052515
if (Result->getType() != DstTy)
2506-
Result = Builder.CreateBitCast(Result, DstTy);
2516+
Result = Builder.CreateExtractVector(DstTy, Result, uint64_t(0));
25072517
return Result;
25082518
}
25092519
}
@@ -2517,8 +2527,17 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
25172527
// If we are casting a scalable i1 predicate vector to a fixed i8
25182528
// vector, bitcast the source and use a vector extract.
25192529
if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
2520-
ScalableSrcTy->getElementCount().isKnownMultipleOf(8) &&
25212530
FixedDstTy->getElementType()->isIntegerTy(8)) {
2531+
if (!ScalableSrcTy->getElementCount().isKnownMultipleOf(8)) {
2532+
ScalableSrcTy = llvm::ScalableVectorType::get(
2533+
ScalableSrcTy->getElementType(),
2534+
llvm::alignTo<8>(
2535+
ScalableSrcTy->getElementCount().getKnownMinValue()));
2536+
llvm::Value *ZeroVec = llvm::Constant::getNullValue(ScalableSrcTy);
2537+
Src = Builder.CreateInsertVector(ScalableSrcTy, ZeroVec, Src,
2538+
uint64_t(0));
2539+
}
2540+
25222541
ScalableSrcTy = llvm::ScalableVectorType::get(
25232542
FixedDstTy->getElementType(),
25242543
ScalableSrcTy->getElementCount().getKnownMinValue() / 8);

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c

Lines changed: 16 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,12 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_
1515

1616
// CHECK-64-LABEL: @call_bool32_ff(
1717
// CHECK-64-NEXT: entry:
18-
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
19-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
20-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 2)
21-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
22-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
23-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
24-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
18+
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 2)
2519
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
2620
//
2721
// CHECK-128-LABEL: @call_bool32_ff(
2822
// CHECK-128-NEXT: entry:
29-
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
30-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
31-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 4)
32-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
33-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
34-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
35-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
23+
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 4)
3624
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
3725
//
3826
fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
@@ -41,24 +29,12 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
4129

4230
// CHECK-64-LABEL: @call_bool64_ff(
4331
// CHECK-64-NEXT: entry:
44-
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
45-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
46-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 1)
47-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
48-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
49-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
50-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
32+
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 1)
5133
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
5234
//
5335
// CHECK-128-LABEL: @call_bool64_ff(
5436
// CHECK-128-NEXT: entry:
55-
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
56-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
57-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 2)
58-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
59-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
60-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
61-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
37+
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 2)
6238
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
6339
//
6440
fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
@@ -71,51 +47,27 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
7147

7248
// CHECK-64-LABEL: @call_bool32_fs(
7349
// CHECK-64-NEXT: entry:
74-
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
75-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
76-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
77-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
78-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
79-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
80-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
81-
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
50+
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
51+
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP1]]
8252
//
8353
// CHECK-128-LABEL: @call_bool32_fs(
8454
// CHECK-128-NEXT: entry:
85-
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
86-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
87-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
88-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
89-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
90-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
91-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
92-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
55+
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
56+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP1]]
9357
//
9458
fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) {
9559
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
9660
}
9761

9862
// CHECK-64-LABEL: @call_bool64_fs(
9963
// CHECK-64-NEXT: entry:
100-
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
101-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
102-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
103-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
104-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
105-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
106-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
107-
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
64+
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
65+
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP1]]
10866
//
10967
// CHECK-128-LABEL: @call_bool64_fs(
11068
// CHECK-128-NEXT: entry:
111-
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
112-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
113-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
114-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
115-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
116-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
117-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
118-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
69+
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
70+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP1]]
11971
//
12072
fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
12173
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);
@@ -127,51 +79,27 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
12779

12880
// CHECK-64-LABEL: @call_bool32_ss(
12981
// CHECK-64-NEXT: entry:
130-
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
131-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
13282
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
133-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
134-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
135-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
136-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
137-
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
83+
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP0]]
13884
//
13985
// CHECK-128-LABEL: @call_bool32_ss(
14086
// CHECK-128-NEXT: entry:
141-
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
142-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
14387
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
144-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
145-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
146-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
147-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
148-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
88+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP0]]
14989
//
15090
fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) {
15191
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
15292
}
15393

15494
// CHECK-64-LABEL: @call_bool64_ss(
15595
// CHECK-64-NEXT: entry:
156-
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
157-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
15896
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
159-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
160-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
161-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
162-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
163-
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
97+
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP0]]
16498
//
16599
// CHECK-128-LABEL: @call_bool64_ss(
166100
// CHECK-128-NEXT: entry:
167-
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
168-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
169101
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
170-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
171-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
172-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
173-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
174-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
102+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP0]]
175103
//
176104
fixed_bool64_t call_bool64_ss(vbool64_t op1, vbool64_t op2) {
177105
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);

0 commit comments

Comments
 (0)