Skip to content

[TargetLowering][AMDGPU][ARM][RISCV][X86] Teach SimplifyDemandedBits to combine (srl (sra X, C1), ShAmt) -> sra(X, C1+ShAmt) #101751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1958,6 +1958,22 @@ bool TargetLowering::SimplifyDemandedBits(
}
}

// If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
// single sra. We can do this if the top bits are never demanded.
if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (std::optional<uint64_t> InnerSA =
TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
unsigned C1 = *InnerSA;
// Clamp the combined shift amount if it exceeds the bit width.
unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
Op0.getOperand(0), NewSA));
}
}
}

APInt InDemandedMask = (DemandedBits << ShAmt);

// If the shift is exact, then it does demand the low bits (and knows that
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ define i1 @test_srem_even(i4 %X) nounwind {
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: // %bb.0:
; CHECK-NEXT: sbfx w8, w0, #0, #6
; CHECK-NEXT: ubfx w8, w8, #9, #2
; CHECK-NEXT: sbfx w8, w0, #5, #1
; CHECK-NEXT: and w8, w8, #0x3
; CHECK-NEXT: add w8, w0, w8
; CHECK-NEXT: and w8, w8, #0x3c
; CHECK-NEXT: sub w8, w0, w8
Expand Down
24 changes: 11 additions & 13 deletions llvm/test/CodeGen/AMDGPU/permute_i8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1049,15 +1049,14 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_bfe_i32 v1, v9, 0, 8
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 24, v9
; GFX10-NEXT: v_ashrrev_i32_sdwa v2, v2, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 25, v9
; GFX10-NEXT: v_lshlrev_b16 v1, 7, v1
; GFX10-NEXT: v_lshrrev_b16 v3, 1, v3
; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_ashrrev_i16 v4, 10, v0
; GFX10-NEXT: v_perm_b32 v0, v9, v0, 0x4010707
; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff00, v1
; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: global_store_dword v[5:6], v1, off
Expand All @@ -1075,23 +1074,22 @@ define hidden void @ashr_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1,
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: global_load_dword v4, v[0:1], off
; GFX9-NEXT: global_load_dword v9, v[2:3], off
; GFX9-NEXT: v_mov_b32_e32 v0, 26
; GFX9-NEXT: v_mov_b32_e32 v1, 1
; GFX9-NEXT: v_mov_b32_e32 v2, 7
; GFX9-NEXT: v_mov_b32_e32 v1, 7
; GFX9-NEXT: s_mov_b32 s4, 0x4010707
; GFX9-NEXT: v_mov_b32_e32 v0, 26
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
; GFX9-NEXT: v_lshlrev_b16_sdwa v2, v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_perm_b32 v3, v4, v9, s4
; GFX9-NEXT: v_perm_b32 v2, v4, v9, s4
; GFX9-NEXT: v_ashrrev_i32_sdwa v0, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 25, v4
; GFX9-NEXT: v_ashrrev_i16_e32 v9, 10, v9
; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v2
; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v1
; GFX9-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_or_b32_sdwa v1, v9, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9-NEXT: global_store_dword v[5:6], v0, off
; GFX9-NEXT: global_store_dword v[7:8], v3, off
; GFX9-NEXT: global_store_dword v[7:8], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_bfe_i32 v1, v0, 0, 6
; CHECK-NEXT: v_bfe_u32 v1, v1, 9, 2
; CHECK-NEXT: v_bfe_i32 v1, v0, 5, 1
; CHECK-NEXT: v_and_b32_e32 v1, 3, v1
Comment on lines +46 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small encoding size improvement

; CHECK-NEXT: v_add_i32_e32 v1, vcc, v0, v1
; CHECK-NEXT: v_and_b32_e32 v1, 60, v1
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
Expand Down
26 changes: 14 additions & 12 deletions llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; ARM5: @ %bb.0:
; ARM5-NEXT: lsl r1, r0, #26
; ARM5-NEXT: mov r2, #3
; ARM5-NEXT: asr r1, r1, #26
; ARM5-NEXT: and r1, r2, r1, lsr #9
; ARM5-NEXT: and r1, r2, r1, asr #31
; ARM5-NEXT: add r1, r0, r1
; ARM5-NEXT: and r1, r1, #60
; ARM5-NEXT: sub r0, r0, r1
Expand All @@ -222,8 +221,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; ARM6: @ %bb.0:
; ARM6-NEXT: lsl r1, r0, #26
; ARM6-NEXT: mov r2, #3
; ARM6-NEXT: asr r1, r1, #26
; ARM6-NEXT: and r1, r2, r1, lsr #9
; ARM6-NEXT: and r1, r2, r1, asr #31
; ARM6-NEXT: add r1, r0, r1
; ARM6-NEXT: and r1, r1, #60
; ARM6-NEXT: sub r0, r0, r1
Expand All @@ -233,8 +231,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; ARM7-LABEL: test_srem_pow2_setne:
; ARM7: @ %bb.0:
; ARM7-NEXT: sbfx r1, r0, #0, #6
; ARM7-NEXT: ubfx r1, r1, #9, #2
; ARM7-NEXT: lsl r1, r0, #26
; ARM7-NEXT: mov r2, #3
; ARM7-NEXT: and r1, r2, r1, asr #31
; ARM7-NEXT: add r1, r0, r1
; ARM7-NEXT: and r1, r1, #60
; ARM7-NEXT: sub r0, r0, r1
Expand All @@ -244,8 +243,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; ARM8-LABEL: test_srem_pow2_setne:
; ARM8: @ %bb.0:
; ARM8-NEXT: sbfx r1, r0, #0, #6
; ARM8-NEXT: ubfx r1, r1, #9, #2
; ARM8-NEXT: lsl r1, r0, #26
Copy link
Collaborator Author

@topperc topperc Aug 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like ARM prefers to fold asr #31 over folding an immediate. AArch64 on the other hand has a let AddComplexity = 6 on the ANDWri pattern to prioritize the immediate.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Considering these get optimized very differently by opt, and I don't see this coming up elsewhere I think it is OK. (I may look into it in the future to see if there is something we can do more generally to improve it).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @davemgreen

; ARM8-NEXT: mov r2, #3
; ARM8-NEXT: and r1, r2, r1, asr #31
; ARM8-NEXT: add r1, r0, r1
; ARM8-NEXT: and r1, r1, #60
; ARM8-NEXT: sub r0, r0, r1
Expand All @@ -255,8 +255,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; NEON7-LABEL: test_srem_pow2_setne:
; NEON7: @ %bb.0:
; NEON7-NEXT: sbfx r1, r0, #0, #6
; NEON7-NEXT: ubfx r1, r1, #9, #2
; NEON7-NEXT: lsl r1, r0, #26
; NEON7-NEXT: mov r2, #3
; NEON7-NEXT: and r1, r2, r1, asr #31
; NEON7-NEXT: add r1, r0, r1
; NEON7-NEXT: and r1, r1, #60
; NEON7-NEXT: sub r0, r0, r1
Expand All @@ -266,8 +267,9 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
;
; NEON8-LABEL: test_srem_pow2_setne:
; NEON8: @ %bb.0:
; NEON8-NEXT: sbfx r1, r0, #0, #6
; NEON8-NEXT: ubfx r1, r1, #9, #2
; NEON8-NEXT: lsl r1, r0, #26
; NEON8-NEXT: mov r2, #3
; NEON8-NEXT: and r1, r2, r1, asr #31
; NEON8-NEXT: add r1, r0, r1
; NEON8-NEXT: and r1, r1, #60
; NEON8-NEXT: sub r0, r0, r1
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; MIPSEL-LABEL: test_srem_pow2_setne:
; MIPSEL: # %bb.0:
; MIPSEL-NEXT: sll $1, $4, 26
; MIPSEL-NEXT: sra $1, $1, 26
; MIPSEL-NEXT: srl $1, $1, 9
; MIPSEL-NEXT: sra $1, $1, 31
; MIPSEL-NEXT: andi $1, $1, 3
; MIPSEL-NEXT: addu $1, $4, $1
; MIPSEL-NEXT: andi $1, $1, 60
Expand All @@ -104,8 +103,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; MIPS64EL: # %bb.0:
; MIPS64EL-NEXT: sll $1, $4, 0
; MIPS64EL-NEXT: sll $2, $1, 26
; MIPS64EL-NEXT: sra $2, $2, 26
; MIPS64EL-NEXT: srl $2, $2, 9
; MIPS64EL-NEXT: sra $2, $2, 31
; MIPS64EL-NEXT: andi $2, $2, 3
; MIPS64EL-NEXT: addu $2, $1, $2
; MIPS64EL-NEXT: andi $2, $2, 60
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; PPC-LABEL: test_srem_pow2_setne:
; PPC: # %bb.0:
; PPC-NEXT: slwi 4, 3, 26
; PPC-NEXT: srawi 4, 4, 26
; PPC-NEXT: rlwinm 4, 4, 23, 30, 31
; PPC-NEXT: srawi 4, 4, 31
; PPC-NEXT: clrlwi 4, 4, 30
; PPC-NEXT: add 4, 3, 4
; PPC-NEXT: rlwinm 4, 4, 0, 26, 29
; PPC-NEXT: sub 3, 3, 4
Expand All @@ -99,8 +99,8 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; PPC64LE-LABEL: test_srem_pow2_setne:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: slwi 4, 3, 26
; PPC64LE-NEXT: srawi 4, 4, 26
; PPC64LE-NEXT: rlwinm 4, 4, 23, 30, 31
; PPC64LE-NEXT: srawi 4, 4, 31
; PPC64LE-NEXT: clrlwi 4, 4, 30
; PPC64LE-NEXT: add 4, 3, 4
; PPC64LE-NEXT: rlwinm 4, 4, 0, 26, 29
; PPC64LE-NEXT: sub 3, 3, 4
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/RISCV/div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1017,8 +1017,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32I-LABEL: sdiv8_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 24
; RV32I-NEXT: srai a1, a1, 24
; RV32I-NEXT: slli a1, a1, 17
; RV32I-NEXT: srai a1, a1, 2
; RV32I-NEXT: srli a1, a1, 29
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 24
Expand All @@ -1028,8 +1027,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV32IM-LABEL: sdiv8_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 24
; RV32IM-NEXT: srai a1, a1, 24
; RV32IM-NEXT: slli a1, a1, 17
; RV32IM-NEXT: srai a1, a1, 2
; RV32IM-NEXT: srli a1, a1, 29
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 24
Expand All @@ -1039,8 +1037,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64I-LABEL: sdiv8_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 56
; RV64I-NEXT: srai a1, a1, 56
; RV64I-NEXT: slli a1, a1, 49
; RV64I-NEXT: srai a1, a1, 2
; RV64I-NEXT: srli a1, a1, 61
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 56
Expand All @@ -1050,8 +1047,7 @@ define i8 @sdiv8_pow2(i8 %a) nounwind {
; RV64IM-LABEL: sdiv8_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 56
; RV64IM-NEXT: srai a1, a1, 56
; RV64IM-NEXT: slli a1, a1, 49
; RV64IM-NEXT: srai a1, a1, 2
; RV64IM-NEXT: srli a1, a1, 61
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 56
Expand Down Expand Up @@ -1209,8 +1205,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32I-LABEL: sdiv16_pow2:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 16
; RV32I-NEXT: srai a1, a1, 16
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: srai a1, a1, 2
; RV32I-NEXT: srli a1, a1, 29
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: slli a0, a0, 16
Expand All @@ -1220,8 +1215,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV32IM-LABEL: sdiv16_pow2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: slli a1, a0, 16
; RV32IM-NEXT: srai a1, a1, 16
; RV32IM-NEXT: slli a1, a1, 1
; RV32IM-NEXT: srai a1, a1, 2
; RV32IM-NEXT: srli a1, a1, 29
; RV32IM-NEXT: add a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 16
Expand All @@ -1231,8 +1225,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64I-LABEL: sdiv16_pow2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 48
; RV64I-NEXT: srai a1, a1, 48
; RV64I-NEXT: slli a1, a1, 33
; RV64I-NEXT: srai a1, a1, 2
; RV64I-NEXT: srli a1, a1, 61
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
Expand All @@ -1242,8 +1235,7 @@ define i16 @sdiv16_pow2(i16 %a) nounwind {
; RV64IM-LABEL: sdiv16_pow2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 48
; RV64IM-NEXT: srai a1, a1, 48
; RV64IM-NEXT: slli a1, a1, 33
; RV64IM-NEXT: srai a1, a1, 2
; RV64IM-NEXT: srli a1, a1, 61
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 48
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/RISCV/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1555,16 +1555,14 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: slli a0, a0, 23
; RV64I-NEXT: srai a0, a0, 31
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i8:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 56
; RV64ZBANOZBB-NEXT: srai a0, a0, 56
; RV64ZBANOZBB-NEXT: slli a0, a0, 23
; RV64ZBANOZBB-NEXT: srai a0, a0, 31
; RV64ZBANOZBB-NEXT: srli a0, a0, 32
; RV64ZBANOZBB-NEXT: ret
;
Expand Down Expand Up @@ -1674,16 +1672,14 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind {
; RV64I-LABEL: sext_ashr_zext_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: slli a0, a0, 23
; RV64I-NEXT: srai a0, a0, 25
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBANOZBB-LABEL: sext_ashr_zext_i16:
; RV64ZBANOZBB: # %bb.0:
; RV64ZBANOZBB-NEXT: slli a0, a0, 48
; RV64ZBANOZBB-NEXT: srai a0, a0, 48
; RV64ZBANOZBB-NEXT: slli a0, a0, 23
; RV64ZBANOZBB-NEXT: srai a0, a0, 25
; RV64ZBANOZBB-NEXT: srli a0, a0, 32
; RV64ZBANOZBB-NEXT: ret
;
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32-LABEL: test_srem_pow2_setne:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a0, 26
; RV32-NEXT: srai a1, a1, 26
; RV32-NEXT: slli a1, a1, 21
; RV32-NEXT: srai a1, a1, 1
; RV32-NEXT: srli a1, a1, 30
; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: andi a1, a1, 60
Expand All @@ -235,8 +234,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64-LABEL: test_srem_pow2_setne:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 58
; RV64-NEXT: srai a1, a1, 58
; RV64-NEXT: slli a1, a1, 53
; RV64-NEXT: srai a1, a1, 1
; RV64-NEXT: srli a1, a1, 62
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: andi a1, a1, 60
Expand All @@ -248,8 +246,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32M-LABEL: test_srem_pow2_setne:
; RV32M: # %bb.0:
; RV32M-NEXT: slli a1, a0, 26
; RV32M-NEXT: srai a1, a1, 26
; RV32M-NEXT: slli a1, a1, 21
; RV32M-NEXT: srai a1, a1, 1
; RV32M-NEXT: srli a1, a1, 30
; RV32M-NEXT: add a1, a0, a1
; RV32M-NEXT: andi a1, a1, 60
Expand All @@ -261,8 +258,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64M-LABEL: test_srem_pow2_setne:
; RV64M: # %bb.0:
; RV64M-NEXT: slli a1, a0, 58
; RV64M-NEXT: srai a1, a1, 58
; RV64M-NEXT: slli a1, a1, 53
; RV64M-NEXT: srai a1, a1, 1
; RV64M-NEXT: srli a1, a1, 62
; RV64M-NEXT: add a1, a0, a1
; RV64M-NEXT: andi a1, a1, 60
Expand All @@ -274,8 +270,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV32MV-LABEL: test_srem_pow2_setne:
; RV32MV: # %bb.0:
; RV32MV-NEXT: slli a1, a0, 26
; RV32MV-NEXT: srai a1, a1, 26
; RV32MV-NEXT: slli a1, a1, 21
; RV32MV-NEXT: srai a1, a1, 1
; RV32MV-NEXT: srli a1, a1, 30
; RV32MV-NEXT: add a1, a0, a1
; RV32MV-NEXT: andi a1, a1, 60
Expand All @@ -287,8 +282,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64MV-LABEL: test_srem_pow2_setne:
; RV64MV: # %bb.0:
; RV64MV-NEXT: slli a1, a0, 58
; RV64MV-NEXT: srai a1, a1, 58
; RV64MV-NEXT: slli a1, a1, 53
; RV64MV-NEXT: srai a1, a1, 1
; RV64MV-NEXT: srli a1, a1, 62
; RV64MV-NEXT: add a1, a0, a1
; RV64MV-NEXT: andi a1, a1, 60
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ define i1 @test_srem_even(i4 %X) nounwind {
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; CHECK-LABEL: test_srem_pow2_setne:
; CHECK: @ %bb.0:
; CHECK-NEXT: sbfx r1, r0, #0, #6
; CHECK-NEXT: ubfx r1, r1, #9, #2
; CHECK-NEXT: lsls r1, r0, #26
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: and.w r1, r2, r1, asr #31
; CHECK-NEXT: add r1, r0
; CHECK-NEXT: and r1, r1, #60
; CHECK-NEXT: subs r0, r0, r1
Expand Down
Loading
Loading