Skip to content

Commit ba938f6

Browse files
AMDGPU/GlobalISel: Legalize s16->s64 G_FPTOSI/G_FPTOUI
Add narrowScalarFor action. Add narrow scalar for typeIndex == 0 for G_FPTOSI/G_FPTOUI. Legalize using narrowScalarFor as s16->s32 G_FPTOSI/G_FPTOUI followed by s32->s64 G_SEXT/G_ZEXT. Differential Revision: https://reviews.llvm.org/D84010
1 parent 2a4df6a commit ba938f6

File tree

5 files changed

+147
-1
lines changed

5 files changed

+147
-1
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,13 @@ class LegalizeRuleSet {
696696
markAllIdxsAsCovered();
697697
return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation);
698698
}
699+
/// Narrow the scalar, specified in mutation, when type indexes 0 and 1 is any
700+
/// type pair in the given list.
701+
LegalizeRuleSet &
702+
narrowScalarFor(std::initializer_list<std::pair<LLT, LLT>> Types,
703+
LegalizeMutation Mutation) {
704+
return actionFor(LegalizeAction::NarrowScalar, Types, Mutation);
705+
}
699706

700707
/// Add more elements to reach the type selected by the mutation if the
701708
/// predicate is true.

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,6 +1212,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
12121212
Observer.changedInstr(MI);
12131213
return Legalized;
12141214
}
1215+
case TargetOpcode::G_FPTOUI: {
1216+
if (TypeIdx != 0)
1217+
return UnableToLegalize;
1218+
Observer.changingInstr(MI);
1219+
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1220+
Observer.changedInstr(MI);
1221+
return Legalized;
1222+
}
1223+
case TargetOpcode::G_FPTOSI: {
1224+
if (TypeIdx != 0)
1225+
return UnableToLegalize;
1226+
Observer.changingInstr(MI);
1227+
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1228+
Observer.changedInstr(MI);
1229+
return Legalized;
1230+
}
12151231
}
12161232
}
12171233

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
654654

655655
auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
656656
.legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
657-
.customFor({{S64, S64}});
657+
.customFor({{S64, S64}})
658+
.narrowScalarFor({{S64, S16}}, changeTo(0, S32));
658659
if (ST.has16BitInsts())
659660
FPToI.legalFor({{S16, S16}});
660661
else

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,3 +587,64 @@ body: |
587587
%1:_(<2 x s64>) = G_FPTOSI %0
588588
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
589589
...
590+
591+
---
592+
name: test_fptosi_s16_to_s64
593+
body: |
594+
bb.0:
595+
liveins: $vgpr0
596+
597+
; SI-LABEL: name: test_fptosi_s16_to_s64
598+
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
599+
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
600+
; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
601+
; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32)
602+
; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
603+
; VI-LABEL: name: test_fptosi_s16_to_s64
604+
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
605+
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
606+
; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
607+
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32)
608+
; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
609+
%0:_(s32) = COPY $vgpr0
610+
%1:_(s16) = G_TRUNC %0
611+
%2:_(s64) = G_FPTOSI %1
612+
$vgpr0_vgpr1 = COPY %2
613+
...
614+
615+
---
616+
name: test_fptosi_v2s16_to_v2s64
617+
body: |
618+
bb.0:
619+
liveins: $vgpr0
620+
621+
; SI-LABEL: name: test_fptosi_v2s16_to_v2s64
622+
; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
623+
; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
624+
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
625+
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
626+
; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
627+
; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
628+
; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
629+
; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32)
630+
; SI: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC1]](s16)
631+
; SI: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI1]](s32)
632+
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64)
633+
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
634+
; VI-LABEL: name: test_fptosi_v2s16_to_v2s64
635+
; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
636+
; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
637+
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
638+
; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
639+
; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
640+
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
641+
; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16)
642+
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI]](s32)
643+
; VI: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC1]](s16)
644+
; VI: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[FPTOSI1]](s32)
645+
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64)
646+
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
647+
%0:_(<2 x s16>) = COPY $vgpr0
648+
%1:_(<2 x s64>) = G_FPTOSI %0
649+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
650+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,64 @@ body: |
757757
%1:_(<2 x s64>) = G_FPTOUI %0
758758
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
759759
...
760+
761+
---
762+
name: test_fptoui_s16_to_s64
763+
body: |
764+
bb.0:
765+
liveins: $vgpr0
766+
767+
; SI-LABEL: name: test_fptoui_s16_to_s64
768+
; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
769+
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
770+
; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16)
771+
; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32)
772+
; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
773+
; VI-LABEL: name: test_fptoui_s16_to_s64
774+
; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
775+
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
776+
; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16)
777+
; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32)
778+
; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
779+
%0:_(s32) = COPY $vgpr0
780+
%1:_(s16) = G_TRUNC %0
781+
%2:_(s64) = G_FPTOUI %1
782+
$vgpr0_vgpr1 = COPY %2
783+
...
784+
785+
---
786+
name: test_fptoui_v2s16_to_v2s64
787+
body: |
788+
bb.0:
789+
liveins: $vgpr0
790+
791+
; SI-LABEL: name: test_fptoui_v2s16_to_v2s64
792+
; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
793+
; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
794+
; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
795+
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
796+
; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
797+
; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
798+
; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16)
799+
; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32)
800+
; SI: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC1]](s16)
801+
; SI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI1]](s32)
802+
; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
803+
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
804+
; VI-LABEL: name: test_fptoui_v2s16_to_v2s64
805+
; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
806+
; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
807+
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
808+
; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
809+
; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
810+
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
811+
; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC]](s16)
812+
; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI]](s32)
813+
; VI: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[TRUNC1]](s16)
814+
; VI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[FPTOUI1]](s32)
815+
; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64)
816+
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
817+
%0:_(<2 x s16>) = COPY $vgpr0
818+
%1:_(<2 x s64>) = G_FPTOUI %0
819+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
820+
...

0 commit comments

Comments
 (0)