Skip to content

Commit 7edddee

Browse files
[ExpandLargeFpConvert] Scalarize vector types. (#86954)
expand-large-fp-convert cannot handle vector types. If overly large vector element types survive into isel, they will likely be scalarized there, but since isel cannot handle scalar integer types of that size, it will assert. Handle vector types in expand-large-fp-convert by scalarizing them and then expanding the scalar type operation. For large vectors, this results in a *massive* code expansion, but it's better than asserting.
1 parent 4ef22fc commit 7edddee

File tree

5 files changed

+521
-8
lines changed

5 files changed

+521
-8
lines changed

llvm/lib/CodeGen/ExpandLargeFpConvert.cpp

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -568,8 +568,29 @@ static void expandIToFP(Instruction *IToFP) {
568568
IToFP->eraseFromParent();
569569
}
570570

571+
static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
572+
VectorType *VTy = cast<FixedVectorType>(I->getType());
573+
574+
IRBuilder<> Builder(I);
575+
576+
unsigned NumElements = VTy->getElementCount().getFixedValue();
577+
Value *Result = PoisonValue::get(VTy);
578+
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
579+
Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
580+
Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
581+
I->getType()->getScalarType());
582+
Result = Builder.CreateInsertElement(Result, Cast, Idx);
583+
if (isa<Instruction>(Cast))
584+
Replace.push_back(cast<Instruction>(Cast));
585+
}
586+
I->replaceAllUsesWith(Result);
587+
I->dropAllReferences();
588+
I->eraseFromParent();
589+
}
590+
571591
static bool runImpl(Function &F, const TargetLowering &TLI) {
572592
SmallVector<Instruction *, 4> Replace;
593+
SmallVector<Instruction *, 4> ReplaceVector;
573594
bool Modified = false;
574595

575596
unsigned MaxLegalFpConvertBitWidth =
@@ -584,29 +605,36 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
584605
switch (I.getOpcode()) {
585606
case Instruction::FPToUI:
586607
case Instruction::FPToSI: {
587-
// TODO: This pass doesn't handle vectors.
588-
if (I.getOperand(0)->getType()->isVectorTy())
608+
// TODO: This pass doesn't handle scalable vectors.
609+
if (I.getOperand(0)->getType()->isScalableTy())
589610
continue;
590611

591-
auto *IntTy = dyn_cast<IntegerType>(I.getType());
612+
auto *IntTy = dyn_cast<IntegerType>(I.getType()->getScalarType());
592613
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
593614
continue;
594615

595-
Replace.push_back(&I);
616+
if (I.getOperand(0)->getType()->isVectorTy())
617+
ReplaceVector.push_back(&I);
618+
else
619+
Replace.push_back(&I);
596620
Modified = true;
597621
break;
598622
}
599623
case Instruction::UIToFP:
600624
case Instruction::SIToFP: {
601-
// TODO: This pass doesn't handle vectors.
602-
if (I.getOperand(0)->getType()->isVectorTy())
625+
// TODO: This pass doesn't handle scalable vectors.
626+
if (I.getOperand(0)->getType()->isScalableTy())
603627
continue;
604628

605-
auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
629+
auto *IntTy =
630+
dyn_cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
606631
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
607632
continue;
608633

609-
Replace.push_back(&I);
634+
if (I.getOperand(0)->getType()->isVectorTy())
635+
ReplaceVector.push_back(&I);
636+
else
637+
Replace.push_back(&I);
610638
Modified = true;
611639
break;
612640
}
@@ -615,6 +643,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
615643
}
616644
}
617645

646+
while (!ReplaceVector.empty()) {
647+
Instruction *I = ReplaceVector.pop_back_val();
648+
scalarize(I, Replace);
649+
}
650+
618651
if (Replace.empty())
619652
return false;
620653

llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,80 @@ define i129 @fp128tosi129(fp128 %a) {
176176
%conv = fptosi fp128 %a to i129
177177
ret i129 %conv
178178
}
179+
180+
define <2 x i129> @floattosi129v2(<2 x float> %a) {
181+
; CHECK-LABEL: @floattosi129v2(
182+
; CHECK-NEXT: fp-to-i-entryfp-to-i-entry:
183+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0
184+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
185+
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
186+
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1
187+
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
188+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i129 [[TMP2]], 23
189+
; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP5]], 255
190+
; CHECK-NEXT: [[TMP7:%.*]] = and i129 [[TMP2]], 8388607
191+
; CHECK-NEXT: [[TMP8:%.*]] = or i129 [[TMP7]], 8388608
192+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i129 [[TMP6]], 127
193+
; CHECK-NEXT: br i1 [[TMP9]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_END2:%.*]]
194+
; CHECK: fp-to-i-if-end2:
195+
; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -256
196+
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
197+
; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]]
198+
; CHECK: fp-to-i-if-then53:
199+
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
200+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
201+
; CHECK: fp-to-i-if-end94:
202+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i129 [[TMP6]], 150
203+
; CHECK-NEXT: br i1 [[TMP13]], label [[FP_TO_I_IF_THEN125:%.*]], label [[FP_TO_I_IF_ELSE6:%.*]]
204+
; CHECK: fp-to-i-if-then125:
205+
; CHECK-NEXT: [[TMP14:%.*]] = sub i129 150, [[TMP6]]
206+
; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 [[TMP8]], [[TMP14]]
207+
; CHECK-NEXT: [[TMP16:%.*]] = mul i129 [[TMP15]], [[TMP4]]
208+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
209+
; CHECK: fp-to-i-if-else6:
210+
; CHECK-NEXT: [[TMP17:%.*]] = add i129 [[TMP6]], -150
211+
; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP8]], [[TMP17]]
212+
; CHECK-NEXT: [[TMP19:%.*]] = mul i129 [[TMP18]], [[TMP4]]
213+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
214+
; CHECK: fp-to-i-cleanup1:
215+
; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP12]], [[FP_TO_I_IF_THEN53]] ], [ [[TMP16]], [[FP_TO_I_IF_THEN125]] ], [ [[TMP19]], [[FP_TO_I_IF_ELSE6]] ], [ 0, [[FP_TO_I_ENTRYFP_TO_I_ENTRY:%.*]] ]
216+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i129> poison, i129 [[TMP20]], i64 0
217+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[A]], i64 1
218+
; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32
219+
; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129
220+
; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], -1
221+
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i129 1, i129 -1
222+
; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP24]], 23
223+
; CHECK-NEXT: [[TMP28:%.*]] = and i129 [[TMP27]], 255
224+
; CHECK-NEXT: [[TMP29:%.*]] = and i129 [[TMP24]], 8388607
225+
; CHECK-NEXT: [[TMP30:%.*]] = or i129 [[TMP29]], 8388608
226+
; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i129 [[TMP28]], 127
227+
; CHECK-NEXT: br i1 [[TMP31]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]]
228+
; CHECK: fp-to-i-if-end:
229+
; CHECK-NEXT: [[TMP32:%.*]] = add i129 [[TMP28]], -256
230+
; CHECK-NEXT: [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129
231+
; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
232+
; CHECK: fp-to-i-if-then5:
233+
; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
234+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
235+
; CHECK: fp-to-i-if-end9:
236+
; CHECK-NEXT: [[TMP35:%.*]] = icmp ult i129 [[TMP28]], 150
237+
; CHECK-NEXT: br i1 [[TMP35]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]]
238+
; CHECK: fp-to-i-if-then12:
239+
; CHECK-NEXT: [[TMP36:%.*]] = sub i129 150, [[TMP28]]
240+
; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP30]], [[TMP36]]
241+
; CHECK-NEXT: [[TMP38:%.*]] = mul i129 [[TMP37]], [[TMP26]]
242+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
243+
; CHECK: fp-to-i-if-else:
244+
; CHECK-NEXT: [[TMP39:%.*]] = add i129 [[TMP28]], -150
245+
; CHECK-NEXT: [[TMP40:%.*]] = shl i129 [[TMP30]], [[TMP39]]
246+
; CHECK-NEXT: [[TMP41:%.*]] = mul i129 [[TMP40]], [[TMP26]]
247+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
248+
; CHECK: fp-to-i-cleanup:
249+
; CHECK-NEXT: [[TMP42:%.*]] = phi i129 [ [[TMP34]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP38]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP41]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_CLEANUP1]] ]
250+
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i129> [[TMP21]], i129 [[TMP42]], i64 1
251+
; CHECK-NEXT: ret <2 x i129> [[TMP43]]
252+
;
253+
%conv = fptosi <2 x float> %a to <2 x i129>
254+
ret <2 x i129> %conv
255+
}

llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,80 @@ define i129 @fp128toui129(fp128 %a) {
176176
%conv = fptoui fp128 %a to i129
177177
ret i129 %conv
178178
}
179+
180+
define <2 x i129> @floattoui129v2(<2 x float> %a) {
181+
; CHECK-LABEL: @floattoui129v2(
182+
; CHECK-NEXT: fp-to-i-entryfp-to-i-entry:
183+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0
184+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
185+
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
186+
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1
187+
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i129 1, i129 -1
188+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i129 [[TMP2]], 23
189+
; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP5]], 255
190+
; CHECK-NEXT: [[TMP7:%.*]] = and i129 [[TMP2]], 8388607
191+
; CHECK-NEXT: [[TMP8:%.*]] = or i129 [[TMP7]], 8388608
192+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i129 [[TMP6]], 127
193+
; CHECK-NEXT: br i1 [[TMP9]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_END2:%.*]]
194+
; CHECK: fp-to-i-if-end2:
195+
; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -256
196+
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
197+
; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]]
198+
; CHECK: fp-to-i-if-then53:
199+
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
200+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
201+
; CHECK: fp-to-i-if-end94:
202+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i129 [[TMP6]], 150
203+
; CHECK-NEXT: br i1 [[TMP13]], label [[FP_TO_I_IF_THEN125:%.*]], label [[FP_TO_I_IF_ELSE6:%.*]]
204+
; CHECK: fp-to-i-if-then125:
205+
; CHECK-NEXT: [[TMP14:%.*]] = sub i129 150, [[TMP6]]
206+
; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 [[TMP8]], [[TMP14]]
207+
; CHECK-NEXT: [[TMP16:%.*]] = mul i129 [[TMP15]], [[TMP4]]
208+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
209+
; CHECK: fp-to-i-if-else6:
210+
; CHECK-NEXT: [[TMP17:%.*]] = add i129 [[TMP6]], -150
211+
; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP8]], [[TMP17]]
212+
; CHECK-NEXT: [[TMP19:%.*]] = mul i129 [[TMP18]], [[TMP4]]
213+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
214+
; CHECK: fp-to-i-cleanup1:
215+
; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP12]], [[FP_TO_I_IF_THEN53]] ], [ [[TMP16]], [[FP_TO_I_IF_THEN125]] ], [ [[TMP19]], [[FP_TO_I_IF_ELSE6]] ], [ 0, [[FP_TO_I_ENTRYFP_TO_I_ENTRY:%.*]] ]
216+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i129> poison, i129 [[TMP20]], i64 0
217+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[A]], i64 1
218+
; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32
219+
; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129
220+
; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], -1
221+
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i129 1, i129 -1
222+
; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP24]], 23
223+
; CHECK-NEXT: [[TMP28:%.*]] = and i129 [[TMP27]], 255
224+
; CHECK-NEXT: [[TMP29:%.*]] = and i129 [[TMP24]], 8388607
225+
; CHECK-NEXT: [[TMP30:%.*]] = or i129 [[TMP29]], 8388608
226+
; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i129 [[TMP28]], 127
227+
; CHECK-NEXT: br i1 [[TMP31]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]]
228+
; CHECK: fp-to-i-if-end:
229+
; CHECK-NEXT: [[TMP32:%.*]] = add i129 [[TMP28]], -256
230+
; CHECK-NEXT: [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129
231+
; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
232+
; CHECK: fp-to-i-if-then5:
233+
; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
234+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
235+
; CHECK: fp-to-i-if-end9:
236+
; CHECK-NEXT: [[TMP35:%.*]] = icmp ult i129 [[TMP28]], 150
237+
; CHECK-NEXT: br i1 [[TMP35]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]]
238+
; CHECK: fp-to-i-if-then12:
239+
; CHECK-NEXT: [[TMP36:%.*]] = sub i129 150, [[TMP28]]
240+
; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP30]], [[TMP36]]
241+
; CHECK-NEXT: [[TMP38:%.*]] = mul i129 [[TMP37]], [[TMP26]]
242+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
243+
; CHECK: fp-to-i-if-else:
244+
; CHECK-NEXT: [[TMP39:%.*]] = add i129 [[TMP28]], -150
245+
; CHECK-NEXT: [[TMP40:%.*]] = shl i129 [[TMP30]], [[TMP39]]
246+
; CHECK-NEXT: [[TMP41:%.*]] = mul i129 [[TMP40]], [[TMP26]]
247+
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
248+
; CHECK: fp-to-i-cleanup:
249+
; CHECK-NEXT: [[TMP42:%.*]] = phi i129 [ [[TMP34]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP38]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP41]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_CLEANUP1]] ]
250+
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i129> [[TMP21]], i129 [[TMP42]], i64 1
251+
; CHECK-NEXT: ret <2 x i129> [[TMP43]]
252+
;
253+
%conv = fptoui <2 x float> %a to <2 x i129>
254+
ret <2 x i129> %conv
255+
}

0 commit comments

Comments
 (0)