Skip to content

Commit c3f53d7

Browse files
committed
multiple use
1 parent 6b6453b commit c3f53d7

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6213,6 +6213,11 @@ PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
62136213
Op->getOperand(0).getValueType() == MVT::i32))
62146214
return SDValue();
62156215

6216+
// If the truncate has multiple uses, this optimization can increase
6217+
// register pressure
6218+
if (!Op->hasOneUse())
6219+
return SDValue();
6220+
62166221
*Op = Op->getOperand(0);
62176222

62186223
// Optionally, fold in a shift-right of the original operand and let permute

llvm/test/CodeGen/NVPTX/i16x2-instructions.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,71 @@ define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 {
818818
ret <2 x i16> %r
819819
}
820820

821+
define <2 x i16> @test_trunc_2xi32_muliple_use0(<2 x i32> %a, ptr %p) #0 {
822+
; I16x2-LABEL: test_trunc_2xi32_muliple_use0(
823+
; I16x2: {
824+
; I16x2-NEXT: .reg .b32 %r<7>;
825+
; I16x2-NEXT: .reg .b64 %rd<2>;
826+
; I16x2-EMPTY:
827+
; I16x2-NEXT: // %bb.0:
828+
; I16x2-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use0_param_0];
829+
; I16x2-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use0_param_1];
830+
; I16x2-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
831+
; I16x2-NEXT: mov.b32 %r5, 65537;
832+
; I16x2-NEXT: add.s16x2 %r6, %r3, %r5;
833+
; I16x2-NEXT: st.u32 [%rd1], %r6;
834+
; I16x2-NEXT: st.param.b32 [func_retval0], %r3;
835+
; I16x2-NEXT: ret;
836+
;
837+
; NO-I16x2-LABEL: test_trunc_2xi32_muliple_use0(
838+
; NO-I16x2: {
839+
; NO-I16x2-NEXT: .reg .b16 %rs<5>;
840+
; NO-I16x2-NEXT: .reg .b32 %r<5>;
841+
; NO-I16x2-NEXT: .reg .b64 %rd<2>;
842+
; NO-I16x2-EMPTY:
843+
; NO-I16x2-NEXT: // %bb.0:
844+
; NO-I16x2-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use0_param_0];
845+
; NO-I16x2-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use0_param_1];
846+
; NO-I16x2-NEXT: cvt.u16.u32 %rs1, %r2;
847+
; NO-I16x2-NEXT: cvt.u16.u32 %rs2, %r1;
848+
; NO-I16x2-NEXT: mov.b32 %r3, {%rs2, %rs1};
849+
; NO-I16x2-NEXT: add.s16 %rs3, %rs1, 1;
850+
; NO-I16x2-NEXT: add.s16 %rs4, %rs2, 1;
851+
; NO-I16x2-NEXT: mov.b32 %r4, {%rs4, %rs3};
852+
; NO-I16x2-NEXT: st.u32 [%rd1], %r4;
853+
; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r3;
854+
; NO-I16x2-NEXT: ret;
855+
%r = trunc <2 x i32> %a to <2 x i16>
856+
; Reuse the truncate - optimizing to PRMT when we don't have i16x2 vectors
857+
; would increase register pressure
858+
%s = add <2 x i16> %r, splat (i16 1)
859+
store <2 x i16> %s, ptr %p
860+
ret <2 x i16> %r
861+
}
862+
863+
define <2 x i16> @test_trunc_2xi32_muliple_use1(<2 x i32> %a, ptr %p) #0 {
864+
; COMMON-LABEL: test_trunc_2xi32_muliple_use1(
865+
; COMMON: {
866+
; COMMON-NEXT: .reg .b32 %r<7>;
867+
; COMMON-NEXT: .reg .b64 %rd<2>;
868+
; COMMON-EMPTY:
869+
; COMMON-NEXT: // %bb.0:
870+
; COMMON-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use1_param_0];
871+
; COMMON-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use1_param_1];
872+
; COMMON-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
873+
; COMMON-NEXT: add.s32 %r5, %r2, 1;
874+
; COMMON-NEXT: add.s32 %r6, %r1, 1;
875+
; COMMON-NEXT: st.v2.u32 [%rd1], {%r6, %r5};
876+
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
877+
; COMMON-NEXT: ret;
878+
%r = trunc <2 x i32> %a to <2 x i16>
879+
; Reuse the original value - optimizing to PRMT does not increase register
880+
; pressure
881+
%s = add <2 x i32> %a, splat (i32 1)
882+
store <2 x i32> %s, ptr %p
883+
ret <2 x i16> %r
884+
}
885+
821886
define <2 x i16> @test_trunc_2xi64(<2 x i64> %a) #0 {
822887
; COMMON-LABEL: test_trunc_2xi64(
823888
; COMMON: {

0 commit comments

Comments
 (0)