Skip to content

Commit fe7ece8

Browse files
author
git apple-llvm automerger
committed
Merge commit 'f88a9a32d995' from llvm.org/main into next
2 parents acd4381 + f88a9a3 commit fe7ece8

17 files changed

+2512
-2259
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4162,22 +4162,17 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
41624162
SDLoc SL(N);
41634163
unsigned RHSVal = RHS->getZExtValue();
41644164

4165-
// (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
4166-
if (RHSVal == 32) {
4165+
// For C >= 32
4166+
// (sra i64:x, C) -> build_pair (sra hi_32(x), C - 32), (sra hi_32(x), 31)
4167+
if (RHSVal >= 32) {
41674168
SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
4168-
SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
4169-
DAG.getConstant(31, SL, MVT::i32));
4169+
Hi = DAG.getFreeze(Hi);
4170+
SDValue HiShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
4171+
DAG.getConstant(31, SL, MVT::i32));
4172+
SDValue LoShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
4173+
DAG.getConstant(RHSVal - 32, SL, MVT::i32));
41704174

4171-
SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
4172-
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
4173-
}
4174-
4175-
// (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
4176-
if (RHSVal == 63) {
4177-
SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
4178-
SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
4179-
DAG.getConstant(31, SL, MVT::i32));
4180-
SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
4175+
SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {LoShift, HiShift});
41814176
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
41824177
}
41834178

llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -685,16 +685,16 @@ define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace
685685
; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
686686
; CI-NEXT: s_mov_b64 s[2:3], s[6:7]
687687
; CI-NEXT: s_waitcnt vmcnt(0)
688-
; CI-NEXT: v_bfe_i32 v6, v3, 0, 16
689-
; CI-NEXT: v_ashr_i64 v[3:4], v[2:3], 56
690-
; CI-NEXT: v_bfe_i32 v5, v2, 0, 16
688+
; CI-NEXT: v_bfe_i32 v4, v2, 0, 16
689+
; CI-NEXT: v_bfe_i32 v5, v3, 0, 16
690+
; CI-NEXT: v_ashrrev_i32_e32 v3, 24, v3
691691
; CI-NEXT: v_ashrrev_i32_e32 v2, 24, v2
692-
; CI-NEXT: v_bfe_u32 v4, v6, 8, 16
693-
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
694-
; CI-NEXT: v_bfe_u32 v5, v5, 8, 16
695692
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
696-
; CI-NEXT: v_or_b32_e32 v3, v4, v3
697-
; CI-NEXT: v_or_b32_e32 v2, v5, v2
693+
; CI-NEXT: v_bfe_u32 v5, v5, 8, 16
694+
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
695+
; CI-NEXT: v_bfe_u32 v4, v4, 8, 16
696+
; CI-NEXT: v_or_b32_e32 v3, v5, v3
697+
; CI-NEXT: v_or_b32_e32 v2, v4, v2
698698
; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
699699
; CI-NEXT: s_endpgm
700700
;

llvm/test/CodeGen/AMDGPU/dagcomb-mullohi.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ define i32 @mul_one_bit_hi_hi_u32_lshr_ashr(i32 %arg, i32 %arg1, ptr %arg2) {
150150
; CHECK-LABEL: mul_one_bit_hi_hi_u32_lshr_ashr:
151151
; CHECK: ; %bb.0: ; %bb
152152
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153-
; CHECK-NEXT: v_mul_hi_u32 v4, v1, v0
154-
; CHECK-NEXT: v_ashrrev_i64 v[0:1], 33, v[3:4]
155-
; CHECK-NEXT: flat_store_dword v[2:3], v4
153+
; CHECK-NEXT: v_mul_hi_u32 v0, v1, v0
154+
; CHECK-NEXT: flat_store_dword v[2:3], v0
155+
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 1, v0
156156
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
157157
; CHECK-NEXT: s_setpc_b64 s[30:31]
158158
bb:

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4398,9 +4398,10 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
43984398
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
43994399
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
44004400
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[2:3]
4401-
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 1, v4
4402-
; GFX9-NEXT: v_ashrrev_i64 v[2:3], 33, v[2:3]
4403-
; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
4401+
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v4
4402+
; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
4403+
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v3
4404+
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v3
44044405
; GFX9-NEXT: s_setpc_b64 s[30:31]
44054406
;
44064407
; GFX9-O0-LABEL: v_sdiv_i128_v_pow2k:

llvm/test/CodeGen/AMDGPU/fptoi.i128.ll

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,15 +1433,25 @@ define i128 @fptoui_f32_to_i128(float %x) {
14331433
}
14341434

14351435
define i128 @fptosi_f16_to_i128(half %x) {
1436-
; GCN-LABEL: fptosi_f16_to_i128:
1437-
; GCN: ; %bb.0:
1438-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439-
; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
1440-
; GCN-NEXT: v_cvt_i32_f32_e32 v0, v0
1441-
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1442-
; GCN-NEXT: v_mov_b32_e32 v2, v1
1443-
; GCN-NEXT: v_mov_b32_e32 v3, v1
1444-
; GCN-NEXT: s_setpc_b64 s[30:31]
1436+
; SDAG-LABEL: fptosi_f16_to_i128:
1437+
; SDAG: ; %bb.0:
1438+
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439+
; SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1440+
; SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
1441+
; SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1442+
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1443+
; SDAG-NEXT: v_mov_b32_e32 v3, v2
1444+
; SDAG-NEXT: s_setpc_b64 s[30:31]
1445+
;
1446+
; GISEL-LABEL: fptosi_f16_to_i128:
1447+
; GISEL: ; %bb.0:
1448+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449+
; GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1450+
; GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0
1451+
; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1452+
; GISEL-NEXT: v_mov_b32_e32 v2, v1
1453+
; GISEL-NEXT: v_mov_b32_e32 v3, v1
1454+
; GISEL-NEXT: s_setpc_b64 s[30:31]
14451455
%cvt = fptosi half %x to i128
14461456
ret i128 %cvt
14471457
}

0 commit comments

Comments
 (0)