Skip to content

Commit 5032643

Browse files
committed
Add gather4_masked_scaled2 and gather_masked_scaled2 intrinsics
1 parent a08fe5b commit 5032643

File tree

5 files changed

+203
-29
lines changed

5 files changed

+203
-29
lines changed

GenXIntrinsics/include/llvm/GenXIntrinsics/GenXSimdCFLowering.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ class CMSimdCFLower {
7272
std::set<AssertingVH<Value>> AlreadyPredicated;
7373
// Mask for shufflevector to extract part of EM.
7474
SmallVector<Constant *, 32> ShuffleMask;
75+
// Original predicate for an instruction (if it was changed with AND respect
76+
// to EM)
77+
std::map<Instruction *, Value *> OriginalPred;
78+
// Replicate mask for provided number of channels
79+
Value *replicateMask(Value *EM, Instruction *InsertBefore, unsigned SimdWidth,
80+
unsigned NumChannels = 1);
7581

7682
public:
7783
static const unsigned MAX_SIMD_CF_WIDTH = 32;
@@ -106,7 +112,7 @@ class CMSimdCFLower {
106112
void lowerSimdCF();
107113
void lowerUnmaskOps();
108114
unsigned deduceNumChannels(Instruction *SI);
109-
Instruction *loadExecutionMask(Instruction *InsertBefore, unsigned SimdWidth, unsigned NumChannels = 1);
115+
Instruction *loadExecutionMask(Instruction *InsertBefore, unsigned SimdWidth);
110116
Value *getRMAddr(BasicBlock *JP, unsigned SimdWidth);
111117
};
112118

GenXIntrinsics/include/llvm/GenXIntrinsics/Intrinsic_definitions.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,6 +1804,23 @@
18041804
###
18051805
"gather_scaled2" : ["anyvector",["int","short","int","int","anyint"],"ReadMem"],
18061806

1807+
### ``llvm.genx.gather.masked.scaled2`` : vISA GATHER_SCALED instruction
1808+
### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1809+
###
1810+
###
1811+
### * (Exec_size inferred from element offset type)
1812+
### * arg0: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4)
1813+
### * arg1: i16 scale, constant
1814+
### * arg2: i32 surface index
1815+
### * arg3: i32 global offset in bytes
1816+
### * arg4: vXi32 element offset in bytes (overloaded)
1817+
### * arg5: vXi1 predicate (overloaded)
1818+
###
1819+
### * Return value: the data read
1820+
###
1821+
"gather_masked_scaled2" : ["anyvector",["int","short","int","int","anyint","anyvector"],"ReadMem"],
1822+
1823+
18071824
### ``llvm.genx.gather4.scaled.<return type>.<vector type>.<any int>`` : vISA GATHER4_SCALED instruction
18081825
### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18091826
###
@@ -1859,6 +1876,22 @@
18591876
###
18601877
"gather4_scaled2" : ["anyvector",["int","short","int","int","anyint"],"ReadMem"],
18611878

1879+
### ``llvm.genx.gather4.masked.scaled2`` : vISA GATHER4_SCALED instruction
1880+
### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1881+
###
1882+
### * (Exec_size inferred from element offset type)
1883+
### * arg0: i32 channel mask, constant
1884+
### * arg1: i16 scale, constant
1885+
### * arg2: i32 surface index
1886+
### * arg3: i32 global offset in bytes
1887+
### * arg4: vXi32 element offset in bytes
1888+
### * arg5: vXi1 predicate (overloaded)
1889+
###
1890+
### * Return value: the data read
1891+
###
1892+
"gather4_masked_scaled2" : ["anyvector",["int","short","int","int","anyint","anyvector"],"ReadMem"],
1893+
1894+
18621895
### ``llvm.genx.gather4.typed.<return type>.<vector type>.<vector type>`` : vISA GATHER4_TYPED instruction
18631896
### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18641897
###

GenXIntrinsics/lib/GenXIntrinsics/GenXSimdCFLowering.cpp

Lines changed: 80 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,14 @@ void CMSimdCFLower::processFunction(Function *ArgF)
535535
unsigned CMWidth = PredicatedSubroutines[F];
536536
// Find the simd branches.
537537
bool FoundSIMD = findSimdBranches(CMWidth);
538+
539+
// Create shuffle mask for EM adjustment
540+
if (ShuffleMask.empty()) {
541+
auto I32Ty = Type::getInt32Ty(F->getContext());
542+
for (unsigned i = 0; i != 32; ++i)
543+
ShuffleMask.push_back(ConstantInt::get(I32Ty, i));
544+
}
545+
538546
if (CMWidth > 0 || FoundSIMD) {
539547
// Determine which basic blocks need to be predicated.
540548
determinePredicatedBlocks();
@@ -555,10 +563,13 @@ void CMSimdCFLower::processFunction(Function *ArgF)
555563
lowerSimdCF();
556564
lowerUnmaskOps();
557565
}
566+
567+
ShuffleMask.clear();
558568
SimdBranches.clear();
559569
PredicatedBlocks.clear();
560570
JoinPoints.clear();
561571
RMAddrs.clear();
572+
OriginalPred.clear();
562573
AlreadyPredicated.clear();
563574
}
564575

@@ -1214,6 +1225,7 @@ unsigned CMSimdCFLower::deduceNumChannels(Instruction *SI) {
12141225
// If it's not a function call then check for a specific instruction
12151226
unsigned IID = GenXIntrinsic::getGenXIntrinsicID(CI);
12161227
switch (IID) {
1228+
case GenXIntrinsic::genx_gather4_masked_scaled2:
12171229
case GenXIntrinsic::genx_gather4_scaled2: {
12181230
unsigned AddrElems = VCINTR::VectorType::getNumElements(
12191231
cast<VectorType>(CI->getOperand(4)->getType()));
@@ -1262,6 +1274,7 @@ void CMSimdCFLower::predicateStore(Instruction *SI, unsigned SimdWidth)
12621274
CallInst *WrRegionToPredicate = nullptr;
12631275
Use *U = &SI->getOperandUse(0);
12641276
Use *UseNeedsUpdate = nullptr;
1277+
Value *ExistingPred = nullptr;
12651278
for (;;) {
12661279
if (auto BC = dyn_cast<BitCastInst>(V)) {
12671280
U = &BC->getOperandUse(0);
@@ -1277,6 +1290,15 @@ void CMSimdCFLower::predicateStore(Instruction *SI, unsigned SimdWidth)
12771290
unsigned IID = GenXIntrinsic::getGenXIntrinsicID(WrRegion);
12781291
if (IID != GenXIntrinsic::genx_wrregioni
12791292
&& IID != GenXIntrinsic::genx_wrregionf) {
1293+
// genx_gather4_masked_scaled2 is slightly different: it has predicate
1294+
// operand and its users have to be predicated as well since it returns value
1295+
// with size greater of execution size
1296+
if (IID == GenXIntrinsic::genx_gather4_masked_scaled2) {
1297+
assert(AlreadyPredicated.find(WrRegion) != AlreadyPredicated.end());
1298+
if (OriginalPred.count(WrRegion))
1299+
ExistingPred = OriginalPred[WrRegion];
1300+
break;
1301+
}
12801302
// Not wrregion. See if it is an intrinsic that has already been
12811303
// predicated; if so do not attempt to predicate the store.
12821304
if (AlreadyPredicated.find(WrRegion) != AlreadyPredicated.end())
@@ -1361,7 +1383,19 @@ void CMSimdCFLower::predicateStore(Instruction *SI, unsigned SimdWidth)
13611383
Load = CallInst::Create(Fn, Addr, ".simdcfpred.vload", SI);
13621384
}
13631385
Load->setDebugLoc(SI->getDebugLoc());
1364-
auto EM = loadExecutionMask(SI, SimdWidth, NumChannels);
1386+
Value *EM = loadExecutionMask(SI, SimdWidth);
1387+
1388+
// If there was a predicate already then update it with current EM
1389+
if (ExistingPred) {
1390+
EM = BinaryOperator::Create(
1391+
Instruction::And, ExistingPred, EM,
1392+
ExistingPred->getName() + ".and." + EM->getName(), SI);
1393+
cast<Instruction>(EM)->setDebugLoc(SI->getDebugLoc());
1394+
}
1395+
1396+
// Replicate mask for each channel if needed
1397+
EM = replicateMask(EM, SI, SimdWidth, NumChannels);
1398+
13651399
auto Select = SelectInst::Create(EM, SI->getOperand(0), Load,
13661400
SI->getOperand(0)->getName() + ".simdcfpred", SI);
13671401
SI->setOperand(0, Select);
@@ -1450,16 +1484,26 @@ void CMSimdCFLower::predicateScatterGather(CallInst *CI, unsigned SimdWidth,
14501484
{
14511485
Value *OldPred = CI->getArgOperand(PredOperandNum);
14521486
assert(OldPred->getType()->getScalarType()->isIntegerTy(1));
1453-
if (SimdWidth != VCINTR::VectorType::getNumElements(
1454-
cast<VectorType>(OldPred->getType()))) {
1455-
DiagnosticInfoSimdCF::emit(CI, "mismatching SIMD width of scatter/gather inside SIMD control flow");
1456-
return;
1487+
switch (GenXIntrinsic::getGenXIntrinsicID(CI)) {
1488+
case GenXIntrinsic::genx_gather4_masked_scaled2:
1489+
break;
1490+
default: {
1491+
if (SimdWidth != VCINTR::VectorType::getNumElements(
1492+
cast<VectorType>(OldPred->getType()))) {
1493+
DiagnosticInfoSimdCF::emit(
1494+
CI,
1495+
"mismatching SIMD width of scatter/gather inside SIMD control flow");
1496+
return;
1497+
}
1498+
break;
1499+
}
14571500
}
14581501
Instruction *NewPred = loadExecutionMask(CI, SimdWidth);
14591502
if (auto C = dyn_cast<Constant>(OldPred))
14601503
if (C->isAllOnesValue())
14611504
OldPred = nullptr;
14621505
if (OldPred) {
1506+
OriginalPred[CI] = OldPred;
14631507
auto And = BinaryOperator::Create(Instruction::And, OldPred, NewPred,
14641508
OldPred->getName() + ".and." + NewPred->getName(), CI);
14651509
And->setDebugLoc(CI->getDebugLoc());
@@ -1496,6 +1540,7 @@ CallInst *CMSimdCFLower::predicateWrRegion(CallInst *WrR, unsigned SimdWidth)
14961540
if (!Pred)
14971541
Pred = EM;
14981542
else {
1543+
OriginalPred[WrR] = Pred;
14991544
auto And = BinaryOperator::Create(Instruction::And, EM, Pred,
15001545
Pred->getName() + ".and." + EM->getName(), WrR);
15011546
And->setDebugLoc(WrR->getDebugLoc());
@@ -1783,39 +1828,46 @@ CallInst *CMSimdCFLower::isSimdCFAny(Value *V)
17831828
return nullptr;
17841829
}
17851830

1831+
/***********************************************************************
1832+
* replicateMask : copy mask for provided number of channels using shufflevector
1833+
*/
1834+
Value *CMSimdCFLower::replicateMask(Value *EM, Instruction *InsertBefore,
1835+
unsigned SimdWidth, unsigned NumChannels) {
1836+
// No need to replicate the mask for one channel
1837+
if (NumChannels == 1)
1838+
return EM;
1839+
1840+
SmallVector<Constant *, 128> ChannelMask{SimdWidth * NumChannels};
1841+
for (unsigned i = 0; i < NumChannels; ++i)
1842+
std::copy(ShuffleMask.begin(), ShuffleMask.begin() + SimdWidth,
1843+
ChannelMask.begin() + SimdWidth * i);
1844+
EM = new ShuffleVectorInst(
1845+
EM, UndefValue::get(EM->getType()), ConstantVector::get(ChannelMask),
1846+
Twine("ChannelEM") + Twine(SimdWidth), InsertBefore);
1847+
1848+
return EM;
1849+
}
1850+
17861851
/***********************************************************************
17871852
* loadExecutionMask : create instruction to load EM
17881853
*/
17891854
Instruction *CMSimdCFLower::loadExecutionMask(Instruction *InsertBefore,
1790-
unsigned SimdWidth, unsigned NumChannels)
1791-
{
1855+
unsigned SimdWidth) {
17921856
Instruction *EM =
17931857
new LoadInst(EMVar->getType()->getPointerElementType(), EMVar,
17941858
EMVar->getName(), false /* isVolatile */, InsertBefore);
1795-
EM->setDebugLoc(InsertBefore->getDebugLoc());
1859+
17961860
// If the simd width is not MAX_SIMD_CF_WIDTH, extract the part of EM we want.
1797-
if (NumChannels == 1 && SimdWidth == MAX_SIMD_CF_WIDTH)
1861+
if (SimdWidth == MAX_SIMD_CF_WIDTH)
17981862
return EM;
1799-
if (ShuffleMask.empty()) {
1800-
auto I32Ty = Type::getInt32Ty(F->getContext());
1801-
for (unsigned i = 0; i != 32; ++i)
1802-
ShuffleMask.push_back(ConstantInt::get(I32Ty, i));
1803-
}
1804-
if (NumChannels == 1) {
1805-
ArrayRef<Constant *> Mask = ShuffleMask;
1806-
EM = new ShuffleVectorInst(EM, UndefValue::get(EM->getType()),
1807-
ConstantVector::get(Mask.take_front(SimdWidth)),
1808-
Twine("EM") + Twine(SimdWidth), InsertBefore);
1809-
} else {
1810-
SmallVector<Constant *, 128> ChannelMask{SimdWidth * NumChannels};
1811-
for (unsigned i = 0; i < NumChannels; ++i)
1812-
std::copy(ShuffleMask.begin(), ShuffleMask.begin() + SimdWidth,
1813-
ChannelMask.begin() + SimdWidth * i);
1814-
EM = new ShuffleVectorInst(
1815-
EM, UndefValue::get(EM->getType()), ConstantVector::get(ChannelMask),
1816-
Twine("ChannelEM") + Twine(SimdWidth), InsertBefore);
1817-
}
1863+
1864+
ArrayRef<Constant *> Mask = ShuffleMask;
1865+
EM = new ShuffleVectorInst(EM, UndefValue::get(EM->getType()),
1866+
ConstantVector::get(Mask.take_front(SimdWidth)),
1867+
Twine("EM") + Twine(SimdWidth), InsertBefore);
1868+
18181869
EM->setDebugLoc(InsertBefore->getDebugLoc());
1870+
18191871
return EM;
18201872
}
18211873

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: opt -S -cmsimdcflowering < %s | FileCheck %s
2+
3+
@Rcp_T2 = internal global <64 x i32> undef
4+
5+
; CHECK: @EM = internal global <32 x i1>
6+
7+
define dso_local dllexport void @test(<32 x i16> %mask, <32 x i32> %addrs) {
8+
entry:
9+
%Rcp_T = alloca <64 x i32>, align 512
10+
%0 = icmp ne <32 x i16> %mask, zeroinitializer
11+
%call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0)
12+
br i1 %call, label %if.then, label %if.end
13+
if.then:
14+
; CHECK-LABEL: if.then:
15+
; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, <32 x i1>* @EM
16+
; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]])
17+
%call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>)
18+
19+
; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, <32 x i1>* @EM
20+
; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD2]], <32 x i1> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
21+
; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]]
22+
store <64 x i32> %call1, <64 x i32>* %Rcp_T
23+
br label %if.end
24+
25+
if.end:
26+
%1 = load <64 x i32>, <64 x i32>* %Rcp_T
27+
store <64 x i32> %1, <64 x i32>* @Rcp_T2
28+
ret void
29+
}
30+
31+
declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>)
32+
declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: opt -S -cmsimdcflowering < %s | FileCheck %s
2+
3+
@Rcp_T2 = internal global <64 x i32> undef
4+
5+
; CHECK: @EM = internal global <32 x i1>
6+
7+
define dso_local dllexport void @test(<32 x i16> %cond1, <32 x i16> %cond2, <32 x i32> %addrs, <32 x i1> %pred) {
8+
entry:
9+
%Rcp_T = alloca <64 x i32>, align 512
10+
%0 = icmp ne <32 x i16> %cond1, zeroinitializer
11+
%call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0)
12+
br i1 %call, label %if.then, label %if.end
13+
if.then:
14+
; CHECK-LABEL: if.then:
15+
; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, <32 x i1>* @EM
16+
; CHECK-NEXT: [[EM_UPDATE1:%.*]] = and <32 x i1> %pred, [[EM_LOAD1]]
17+
; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_UPDATE1]])
18+
%call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> %pred)
19+
20+
; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, <32 x i1>* @EM
21+
; CHECK-NEXT: [[EM_UPDATE2:%.*]] = and <32 x i1> %pred, [[EM_LOAD2]]
22+
; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_UPDATE2]], <32 x i1> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
23+
; CHECK-NEXT: [[CALL1_SIMDCFPRED1:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]]
24+
25+
store <64 x i32> %call1, <64 x i32>* %Rcp_T
26+
27+
%1 = icmp ne <32 x i16> %cond2, zeroinitializer
28+
%nest = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %1)
29+
br i1 %nest, label %if.then2, label %if.end2
30+
31+
if.then2:
32+
; CHECK-LABEL: if.then2:
33+
; CHECK: [[EM_LOAD3:%.*]] = load <32 x i1>, <32 x i1>* @EM
34+
; CHECK-NEXT: [[EM_UPDATE2:%.*]] = and <32 x i1> %pred, [[EM_LOAD3]]
35+
; CHECK-NEXT: [[CHENNELEM2:%.*]] = shufflevector <32 x i1> [[EM_UPDATE2]], <32 x i1> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
36+
; CHECK-NEXT: %call1.simdcfpred7 = select <64 x i1> [[CHENNELEM2]], <64 x i32> [[CALL1]]
37+
store <64 x i32> %call1, <64 x i32>* %Rcp_T
38+
br label %if.end2
39+
40+
if.end2:
41+
br label %if.end
42+
if.end:
43+
%2 = load <64 x i32>, <64 x i32>* %Rcp_T
44+
store <64 x i32> %2, <64 x i32>* @Rcp_T2
45+
ret void
46+
}
47+
48+
declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>)
49+
declare <64 x i32> @llvm.genx.wrregioni.v64i32.v16i32.i16.i1(<64 x i32> %load, <64 x i32> %call, i32, i32, i32, i16, i32, i1)
50+
declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>)
51+

0 commit comments

Comments
 (0)