Skip to content

Commit bf0655f

Browse files
authored
[RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (#139190)
For i1 vectors, we used an i8 fixed vector as the storage type. If the known minimum number of elements of the scalable vector type is less than 8, we were doing the cast through memory. This used a load or store from a fixed vector alloca. If is less than 8, DataLayout indicates that the load/store reads/writes vscale bytes even if vscale is known and vscale*X is less than or equal to 8. This means the load or store is outside the bounds of the fixed size alloca as far as DataLayout is concerned leading to undefined behavior. This patch avoids this by widening the i1 scalable vector type with zero elements until it is divisible by 8. This allows it be bitcasted to/from an i8 scalable vector. We then insert or extract the i8 fixed vector into this type. Hopefully this enables #130973 to be accepted.
1 parent a3ba00a commit bf0655f

9 files changed

+124
-197
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,19 +1366,23 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
13661366
// If we are casting a fixed i8 vector to a scalable i1 predicate
13671367
// vector, use a vector insert and bitcast the result.
13681368
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
1369-
ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
13701369
FixedSrcTy->getElementType()->isIntegerTy(8)) {
13711370
ScalableDstTy = llvm::ScalableVectorType::get(
13721371
FixedSrcTy->getElementType(),
1373-
ScalableDstTy->getElementCount().getKnownMinValue() / 8);
1372+
llvm::divideCeil(
1373+
ScalableDstTy->getElementCount().getKnownMinValue(), 8));
13741374
}
13751375
if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
13761376
auto *Load = CGF.Builder.CreateLoad(Src);
13771377
auto *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
13781378
llvm::Value *Result = CGF.Builder.CreateInsertVector(
13791379
ScalableDstTy, PoisonVec, Load, uint64_t(0), "cast.scalable");
1380-
if (ScalableDstTy != Ty)
1381-
Result = CGF.Builder.CreateBitCast(Result, Ty);
1380+
ScalableDstTy = cast<llvm::ScalableVectorType>(
1381+
llvm::VectorType::getWithSizeAndScalar(ScalableDstTy, Ty));
1382+
if (Result->getType() != ScalableDstTy)
1383+
Result = CGF.Builder.CreateBitCast(Result, ScalableDstTy);
1384+
if (Result->getType() != Ty)
1385+
Result = CGF.Builder.CreateExtractVector(Ty, Result, uint64_t(0));
13821386
return Result;
13831387
}
13841388
}
@@ -1476,8 +1480,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy,
14761480
// If we are casting a scalable i1 predicate vector to a fixed i8
14771481
// vector, first bitcast the source.
14781482
if (FromTy->getElementType()->isIntegerTy(1) &&
1479-
FromTy->getElementCount().isKnownMultipleOf(8) &&
14801483
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
1484+
if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
1485+
FromTy = llvm::ScalableVectorType::get(
1486+
FromTy->getElementType(),
1487+
llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
1488+
llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
1489+
V = CGF.Builder.CreateInsertVector(FromTy, ZeroVec, V, uint64_t(0));
1490+
}
14811491
FromTy = llvm::ScalableVectorType::get(
14821492
ToTy->getElementType(),
14831493
FromTy->getElementCount().getKnownMinValue() / 8);

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,18 +2491,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
24912491
// If we are casting a fixed i8 vector to a scalable i1 predicate
24922492
// vector, use a vector insert and bitcast the result.
24932493
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
2494-
ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
24952494
FixedSrcTy->getElementType()->isIntegerTy(8)) {
24962495
ScalableDstTy = llvm::ScalableVectorType::get(
24972496
FixedSrcTy->getElementType(),
2498-
ScalableDstTy->getElementCount().getKnownMinValue() / 8);
2497+
llvm::divideCeil(
2498+
ScalableDstTy->getElementCount().getKnownMinValue(), 8));
24992499
}
25002500
if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) {
25012501
llvm::Value *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
25022502
llvm::Value *Result = Builder.CreateInsertVector(
25032503
ScalableDstTy, PoisonVec, Src, uint64_t(0), "cast.scalable");
2504+
ScalableDstTy = cast<llvm::ScalableVectorType>(
2505+
llvm::VectorType::getWithSizeAndScalar(ScalableDstTy, DstTy));
2506+
if (Result->getType() != ScalableDstTy)
2507+
Result = Builder.CreateBitCast(Result, ScalableDstTy);
25042508
if (Result->getType() != DstTy)
2505-
Result = Builder.CreateBitCast(Result, DstTy);
2509+
Result = Builder.CreateExtractVector(DstTy, Result, uint64_t(0));
25062510
return Result;
25072511
}
25082512
}
@@ -2516,8 +2520,17 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
25162520
// If we are casting a scalable i1 predicate vector to a fixed i8
25172521
// vector, bitcast the source and use a vector extract.
25182522
if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
2519-
ScalableSrcTy->getElementCount().isKnownMultipleOf(8) &&
25202523
FixedDstTy->getElementType()->isIntegerTy(8)) {
2524+
if (!ScalableSrcTy->getElementCount().isKnownMultipleOf(8)) {
2525+
ScalableSrcTy = llvm::ScalableVectorType::get(
2526+
ScalableSrcTy->getElementType(),
2527+
llvm::alignTo<8>(
2528+
ScalableSrcTy->getElementCount().getKnownMinValue()));
2529+
llvm::Value *ZeroVec = llvm::Constant::getNullValue(ScalableSrcTy);
2530+
Src = Builder.CreateInsertVector(ScalableSrcTy, ZeroVec, Src,
2531+
uint64_t(0));
2532+
}
2533+
25212534
ScalableSrcTy = llvm::ScalableVectorType::get(
25222535
FixedDstTy->getElementType(),
25232536
ScalableSrcTy->getElementCount().getKnownMinValue() / 8);

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c

Lines changed: 16 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,12 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_
1515

1616
// CHECK-64-LABEL: @call_bool32_ff(
1717
// CHECK-64-NEXT: entry:
18-
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
19-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
20-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 2)
21-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
22-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
23-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
24-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
18+
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 2)
2519
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
2620
//
2721
// CHECK-128-LABEL: @call_bool32_ff(
2822
// CHECK-128-NEXT: entry:
29-
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
30-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
31-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 4)
32-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
33-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
34-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
35-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
23+
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[TMP1:%.*]], i64 4)
3624
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
3725
//
3826
fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
@@ -41,24 +29,12 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
4129

4230
// CHECK-64-LABEL: @call_bool64_ff(
4331
// CHECK-64-NEXT: entry:
44-
// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
45-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
46-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 1)
47-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
48-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
49-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
50-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
32+
// CHECK-64-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 1)
5133
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
5234
//
5335
// CHECK-128-LABEL: @call_bool64_ff(
5436
// CHECK-128-NEXT: entry:
55-
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
56-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
57-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 2)
58-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
59-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
60-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
61-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
37+
// CHECK-128-NEXT: [[TMP2:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[TMP1:%.*]], i64 2)
6238
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
6339
//
6440
fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
@@ -71,51 +47,27 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
7147

7248
// CHECK-64-LABEL: @call_bool32_fs(
7349
// CHECK-64-NEXT: entry:
74-
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
75-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
76-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
77-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
78-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
79-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
80-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
81-
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
50+
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
51+
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP1]]
8252
//
8353
// CHECK-128-LABEL: @call_bool32_fs(
8454
// CHECK-128-NEXT: entry:
85-
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
86-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
87-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
88-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
89-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
90-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
91-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
92-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
55+
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[TMP0:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
56+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP1]]
9357
//
9458
fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) {
9559
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
9660
}
9761

9862
// CHECK-64-LABEL: @call_bool64_fs(
9963
// CHECK-64-NEXT: entry:
100-
// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
101-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
102-
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
103-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
104-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
105-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
106-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
107-
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
64+
// CHECK-64-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
65+
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP1]]
10866
//
10967
// CHECK-128-LABEL: @call_bool64_fs(
11068
// CHECK-128-NEXT: entry:
111-
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
112-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
113-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
114-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
115-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
116-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
117-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
118-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
69+
// CHECK-128-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[TMP0:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
70+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP1]]
11971
//
12072
fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
12173
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);
@@ -127,51 +79,27 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
12779

12880
// CHECK-64-LABEL: @call_bool32_ss(
12981
// CHECK-64-NEXT: entry:
130-
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
131-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
13282
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 2)
133-
// CHECK-64-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
134-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
135-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
136-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
137-
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP2]]
83+
// CHECK-64-NEXT: ret <vscale x 2 x i1> [[TMP0]]
13884
//
13985
// CHECK-128-LABEL: @call_bool32_ss(
14086
// CHECK-128-NEXT: entry:
141-
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
142-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
14387
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
144-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
145-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
146-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
147-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
148-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
88+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP0]]
14989
//
15090
fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) {
15191
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32);
15292
}
15393

15494
// CHECK-64-LABEL: @call_bool64_ss(
15595
// CHECK-64-NEXT: entry:
156-
// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
157-
// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
15896
// CHECK-64-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 1)
159-
// CHECK-64-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
160-
// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
161-
// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
162-
// CHECK-64-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
163-
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP2]]
97+
// CHECK-64-NEXT: ret <vscale x 1 x i1> [[TMP0]]
16498
//
16599
// CHECK-128-LABEL: @call_bool64_ss(
166100
// CHECK-128-NEXT: entry:
167-
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 1 x i1>, align 1
168-
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
169101
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
170-
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
171-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
172-
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
173-
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
174-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
102+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP0]]
175103
//
176104
fixed_bool64_t call_bool64_ss(vbool64_t op1, vbool64_t op2) {
177105
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64);

0 commit comments

Comments
 (0)