Skip to content

Commit da179eb

Browse files
[TLI] replace-with-veclib works with FRem Instruction.
Updated SLEEF and ArmPL tests with Fixed-Width and Scalable cases for frem. Those are mapped to fmod/fmodf.
1 parent 567941b commit da179eb

File tree

4 files changed

+149
-55
lines changed

4 files changed

+149
-55
lines changed

llvm/lib/CodeGen/ReplaceWithVeclib.cpp

Lines changed: 70 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -69,52 +69,57 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
6969
return TLIFunc;
7070
}
7171

72-
/// Replace the call to the vector intrinsic ( \p CalltoReplace ) with a call to
73-
/// the corresponding function from the vector library ( \p TLIVecFunc ).
74-
static void replaceWithTLIFunction(CallInst &CalltoReplace, VFInfo &Info,
72+
/// Replace the Instruction \p I, that may be a vector intrinsic CallInst or
73+
/// the frem instruction, with a call to the corresponding function from the
74+
/// vector library ( \p TLIVecFunc ).
75+
static void replaceWithTLIFunction(Instruction &I, VFInfo &Info,
7576
Function *TLIVecFunc) {
76-
IRBuilder<> IRBuilder(&CalltoReplace);
77-
SmallVector<Value *> Args(CalltoReplace.args());
77+
IRBuilder<> IRBuilder(&I);
78+
auto *CI = dyn_cast<CallInst>(&I);
79+
SmallVector<Value *> Args(CI ? CI->args() : I.operands());
7880
if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
79-
auto *MaskTy = VectorType::get(Type::getInt1Ty(CalltoReplace.getContext()),
80-
Info.Shape.VF);
81+
auto *MaskTy =
82+
VectorType::get(Type::getInt1Ty(I.getContext()), Info.Shape.VF);
8183
Args.insert(Args.begin() + OptMaskpos.value(),
8284
Constant::getAllOnesValue(MaskTy));
8385
}
8486

85-
// Preserve the operand bundles.
87+
// Preserve the operand bundles for CallInsts.
8688
SmallVector<OperandBundleDef, 1> OpBundles;
87-
CalltoReplace.getOperandBundlesAsDefs(OpBundles);
89+
if (CI)
90+
CI->getOperandBundlesAsDefs(OpBundles);
91+
8892
CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
89-
CalltoReplace.replaceAllUsesWith(Replacement);
93+
I.replaceAllUsesWith(Replacement);
9094
// Preserve fast math flags for FP math.
9195
if (isa<FPMathOperator>(Replacement))
92-
Replacement->copyFastMathFlags(&CalltoReplace);
96+
Replacement->copyFastMathFlags(&I);
9397
}
9498

95-
/// Returns true when successfully replaced \p CallToReplace with a suitable
96-
/// function taking vector arguments, based on available mappings in the \p TLI.
97-
/// Currently only works when \p CallToReplace is a call to vectorized
98-
/// intrinsic.
99+
/// Returns true when successfully replaced \p I with a suitable function taking
100+
/// vector arguments, based on available mappings in the \p TLI. Currently only
101+
/// works when \p I is a call to vectorized intrinsic or the FRem Instruction.
99102
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
100-
CallInst &CallToReplace) {
101-
if (!CallToReplace.getCalledFunction())
102-
return false;
103-
104-
auto IntrinsicID = CallToReplace.getCalledFunction()->getIntrinsicID();
105-
// Replacement is only performed for intrinsic functions.
106-
if (IntrinsicID == Intrinsic::not_intrinsic)
107-
return false;
108-
103+
Instruction &I) {
104+
CallInst *CI = dyn_cast<CallInst>(&I);
105+
Intrinsic::ID IID = Intrinsic::not_intrinsic;
106+
if (CI)
107+
IID = CI->getCalledFunction()->getIntrinsicID();
109108
// Compute arguments types of the corresponding scalar call. Additionally
110109
// checks if in the vector call, all vector operands have the same EC.
111110
ElementCount VF = ElementCount::getFixed(0);
112-
SmallVector<Type *> ScalarArgTypes;
113-
for (auto Arg : enumerate(CallToReplace.args())) {
111+
SmallVector<Type *, 8> ScalarArgTypes;
112+
for (auto Arg : enumerate(CI ? CI->args() : I.operands())) {
114113
auto *ArgTy = Arg.value()->getType();
115-
if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
114+
if (CI && isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
116115
ScalarArgTypes.push_back(ArgTy);
117-
} else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
116+
} else {
117+
auto *VectorArgTy = dyn_cast<VectorType>(ArgTy);
118+
// We are expecting only VectorTypes, as:
119+
// - with a CallInst, scalar operands are handled earlier
120+
// - with the FRem Instruction, both operands must be vectors.
121+
if (!VectorArgTy)
122+
return false;
118123
ScalarArgTypes.push_back(ArgTy->getScalarType());
119124
// Disallow vector arguments with different VFs. When processing the first
120125
// vector argument, store it's VF, and for the rest ensure that they match
@@ -123,18 +128,22 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
123128
VF = VectorArgTy->getElementCount();
124129
else if (VF != VectorArgTy->getElementCount())
125130
return false;
126-
} else
127-
// Exit when it is supposed to be a vector argument but it isn't.
128-
return false;
131+
}
129132
}
130133

131-
// Try to reconstruct the name for the scalar version of this intrinsic using
132-
// the intrinsic ID and the argument types converted to scalar above.
133-
std::string ScalarName =
134-
(Intrinsic::isOverloaded(IntrinsicID)
135-
? Intrinsic::getName(IntrinsicID, ScalarArgTypes,
136-
CallToReplace.getModule())
137-
: Intrinsic::getName(IntrinsicID).str());
134+
// Try to reconstruct the name for the scalar version of the instruction.
135+
std::string ScalarName;
136+
if (CI) {
137+
// For intrinsics, use scalar argument types
138+
ScalarName = Intrinsic::isOverloaded(IID)
139+
? Intrinsic::getName(IID, ScalarArgTypes, I.getModule())
140+
: Intrinsic::getName(IID).str();
141+
} else {
142+
LibFunc Func;
143+
if (!TLI.getLibFunc(I.getOpcode(), I.getType()->getScalarType(), Func))
144+
return false;
145+
ScalarName = TLI.getName(Func);
146+
}
138147

139148
// Try to find the mapping for the scalar version of this intrinsic and the
140149
// exact vector width of the call operands in the TargetLibraryInfo. First,
@@ -150,7 +159,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
150159

151160
// Replace the call to the intrinsic with a call to the vector library
152161
// function.
153-
Type *ScalarRetTy = CallToReplace.getType()->getScalarType();
162+
Type *ScalarRetTy = I.getType()->getScalarType();
154163
FunctionType *ScalarFTy =
155164
FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
156165
const std::string MangledName = VD->getVectorFunctionABIVariantString();
@@ -162,27 +171,36 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
162171
if (!VectorFTy)
163172
return false;
164173

165-
Function *FuncToReplace = CallToReplace.getCalledFunction();
166-
Function *TLIFunc = getTLIFunction(CallToReplace.getModule(), VectorFTy,
174+
Function *FuncToReplace = CI ? CI->getCalledFunction() : nullptr;
175+
Function *TLIFunc = getTLIFunction(I.getModule(), VectorFTy,
167176
VD->getVectorFnName(), FuncToReplace);
168-
replaceWithTLIFunction(CallToReplace, *OptInfo, TLIFunc);
169-
170-
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
171-
<< FuncToReplace->getName() << "` with call to `"
172-
<< TLIFunc->getName() << "`.\n");
177+
replaceWithTLIFunction(I, *OptInfo, TLIFunc);
178+
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
179+
<< "` with call to `" << TLIFunc->getName() << "`.\n");
173180
++NumCallsReplaced;
174181
return true;
175182
}
176183

184+
/// Supported Instructions \p I are either FRem or CallInsts to Intrinsics.
185+
static bool isSupportedInstruction(Instruction *I) {
186+
if (auto *CI = dyn_cast<CallInst>(I)) {
187+
if (!CI->getCalledFunction())
188+
return false;
189+
if (CI->getCalledFunction()->getIntrinsicID() == Intrinsic::not_intrinsic)
190+
return false;
191+
} else if (I->getOpcode() != Instruction::FRem)
192+
return false;
193+
194+
return true;
195+
}
196+
177197
static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
178198
bool Changed = false;
179-
SmallVector<CallInst *> ReplacedCalls;
199+
SmallVector<Instruction *> ReplacedCalls;
180200
for (auto &I : instructions(F)) {
181-
if (auto *CI = dyn_cast<CallInst>(&I)) {
182-
if (replaceWithCallToVeclib(TLI, *CI)) {
183-
ReplacedCalls.push_back(CI);
184-
Changed = true;
185-
}
201+
if (isSupportedInstruction(&I) && replaceWithCallToVeclib(TLI, I)) {
202+
ReplacedCalls.push_back(&I);
203+
Changed = true;
186204
}
187205
}
188206
// Erase the calls to the intrinsics that have been replaced

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
1515
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
1616

1717
;.
18-
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x], section "llvm.metadata"
18+
; CHECK: @llvm.compiler.used = appending global [36 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vfmodq_f64, ptr @armpl_vfmodq_f32, ptr @armpl_svfmod_f64_x, ptr @armpl_svfmod_f32_x], section "llvm.metadata"
1919
;.
2020
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
2121
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -424,6 +424,46 @@ define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vsca
424424
ret <vscale x 4 x float> %1
425425
}
426426

427+
define <2 x double> @frem_f64(<2 x double> %in) {
428+
; CHECK-LABEL: define <2 x double> @frem_f64
429+
; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
430+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @armpl_vfmodq_f64(<2 x double> [[IN]], <2 x double> [[IN]])
431+
; CHECK-NEXT: ret <2 x double> [[TMP1]]
432+
;
433+
%1= frem <2 x double> %in, %in
434+
ret <2 x double> %1
435+
}
436+
437+
define <4 x float> @frem_f32(<4 x float> %in) {
438+
; CHECK-LABEL: define <4 x float> @frem_f32
439+
; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
440+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @armpl_vfmodq_f32(<4 x float> [[IN]], <4 x float> [[IN]])
441+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
442+
;
443+
%1= frem <4 x float> %in, %in
444+
ret <4 x float> %1
445+
}
446+
447+
define <vscale x 2 x double> @frem_vscale_f64(<vscale x 2 x double> %in) #0 {
448+
; CHECK-LABEL: define <vscale x 2 x double> @frem_vscale_f64
449+
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
450+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
451+
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
452+
;
453+
%1= frem <vscale x 2 x double> %in, %in
454+
ret <vscale x 2 x double> %1
455+
}
456+
457+
define <vscale x 4 x float> @frem_vscale_f32(<vscale x 4 x float> %in) #0 {
458+
; CHECK-LABEL: define <vscale x 4 x float> @frem_vscale_f32
459+
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
460+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
461+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
462+
;
463+
%1= frem <vscale x 4 x float> %in, %in
464+
ret <vscale x 4 x float> %1
465+
}
466+
427467
attributes #0 = { "target-features"="+sve" }
428468
;.
429469
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf], section "llvm.metadata"
7+
; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxvv_fmod, ptr @_ZGVsMxvv_fmodf], section "llvm.metadata"
88
;.
99
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_vscale_f64(
@@ -384,6 +384,24 @@ define <vscale x 4 x float> @llvm_trunc_vscale_f32(<vscale x 4 x float> %in) {
384384
ret <vscale x 4 x float> %1
385385
}
386386

387+
define <vscale x 2 x double> @frem_f64(<vscale x 2 x double> %in) {
388+
; CHECK-LABEL: @frem_f64(
389+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[IN]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
390+
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
391+
;
392+
%1= frem <vscale x 2 x double> %in, %in
393+
ret <vscale x 2 x double> %1
394+
}
395+
396+
define <vscale x 4 x float> @frem_f32(<vscale x 4 x float> %in) {
397+
; CHECK-LABEL: @frem_f32(
398+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[IN]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
399+
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
400+
;
401+
%1= frem <vscale x 4 x float> %in, %in
402+
ret <vscale x 4 x float> %1
403+
}
404+
387405
declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
388406
declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
389407
declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
target triple = "aarch64-unknown-linux-gnu"
55

66
;.
7-
; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
7+
; CHECK: @llvm.compiler.used = appending global [18 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2vv_fmod, ptr @_ZGVnN4vv_fmodf], section "llvm.metadata"
88
;.
99
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
1010
; CHECK-LABEL: @llvm_ceil_f64(
@@ -384,6 +384,24 @@ define <4 x float> @llvm_trunc_f32(<4 x float> %in) {
384384
ret <4 x float> %1
385385
}
386386

387+
define <2 x double> @frem_f64(<2 x double> %in) {
388+
; CHECK-LABEL: @frem_f64(
389+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[IN:%.*]], <2 x double> [[IN]])
390+
; CHECK-NEXT: ret <2 x double> [[TMP1]]
391+
;
392+
%1= frem <2 x double> %in, %in
393+
ret <2 x double> %1
394+
}
395+
396+
define <4 x float> @frem_f32(<4 x float> %in) {
397+
; CHECK-LABEL: @frem_f32(
398+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[IN:%.*]], <4 x float> [[IN]])
399+
; CHECK-NEXT: ret <4 x float> [[TMP1]]
400+
;
401+
%1= frem <4 x float> %in, %in
402+
ret <4 x float> %1
403+
}
404+
387405
declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
388406
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
389407
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)

0 commit comments

Comments
 (0)