Skip to content

Commit a111f91

Browse files
authored
[LoongArch][ISel] Check the number of sign bits in PatGprGpr_32 (#107432)
After #92205, LoongArch ISel selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit constant. It will produce wrong result when `X == 2`: https://alive2.llvm.org/ce/z/pzfGZZ This patch adds additional `sexti32` checks to operands of `PatGprGpr_32`. Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp Fix #107414.
1 parent f3b4e47 commit a111f91

File tree

2 files changed

+69
-3
lines changed

2 files changed

+69
-3
lines changed

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x00006800>;
10651065

10661066
/// Generic pattern classes
10671067

1068+
def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
1069+
return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
1070+
}]>;
10681071
class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
10691072
: Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
10701073
class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
1071-
: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
1074+
: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
10721075
class PatGpr<SDPatternOperator OpNode, LAInst Inst>
10731076
: Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
10741077

llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
191191
; LA64: # %bb.0: # %entry
192192
; LA64-NEXT: addi.w $a1, $a1, 0
193193
; LA64-NEXT: addi.w $a0, $a0, 0
194-
; LA64-NEXT: div.w $a0, $a0, $a1
194+
; LA64-NEXT: div.d $a0, $a0, $a1
195+
; LA64-NEXT: addi.w $a0, $a0, 0
195196
; LA64-NEXT: ret
196197
;
197198
; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
207208
; LA64-TRAP: # %bb.0: # %entry
208209
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
209210
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
210-
; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
211+
; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
211212
; LA64-TRAP-NEXT: bnez $a1, .LBB5_2
212213
; LA64-TRAP-NEXT: # %bb.1: # %entry
213214
; LA64-TRAP-NEXT: break 7
214215
; LA64-TRAP-NEXT: .LBB5_2: # %entry
216+
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
215217
; LA64-TRAP-NEXT: ret
216218
entry:
217219
%r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
11511153
%r = urem i64 %a, %b
11521154
ret i64 %r
11531155
}
1156+
1157+
define signext i32 @pr107414(i32 signext %x) {
1158+
; LA32-LABEL: pr107414:
1159+
; LA32: # %bb.0: # %entry
1160+
; LA32-NEXT: addi.w $sp, $sp, -16
1161+
; LA32-NEXT: .cfi_def_cfa_offset 16
1162+
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
1163+
; LA32-NEXT: .cfi_offset 1, -4
1164+
; LA32-NEXT: move $a2, $a0
1165+
; LA32-NEXT: srai.w $a3, $a0, 31
1166+
; LA32-NEXT: lu12i.w $a0, -266831
1167+
; LA32-NEXT: ori $a0, $a0, 3337
1168+
; LA32-NEXT: move $a1, $zero
1169+
; LA32-NEXT: bl %plt(__divdi3)
1170+
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
1171+
; LA32-NEXT: addi.w $sp, $sp, 16
1172+
; LA32-NEXT: ret
1173+
;
1174+
; LA64-LABEL: pr107414:
1175+
; LA64: # %bb.0: # %entry
1176+
; LA64-NEXT: lu12i.w $a1, -266831
1177+
; LA64-NEXT: ori $a1, $a1, 3337
1178+
; LA64-NEXT: lu32i.d $a1, 0
1179+
; LA64-NEXT: div.d $a0, $a1, $a0
1180+
; LA64-NEXT: addi.w $a0, $a0, 0
1181+
; LA64-NEXT: ret
1182+
;
1183+
; LA32-TRAP-LABEL: pr107414:
1184+
; LA32-TRAP: # %bb.0: # %entry
1185+
; LA32-TRAP-NEXT: addi.w $sp, $sp, -16
1186+
; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16
1187+
; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
1188+
; LA32-TRAP-NEXT: .cfi_offset 1, -4
1189+
; LA32-TRAP-NEXT: move $a2, $a0
1190+
; LA32-TRAP-NEXT: srai.w $a3, $a0, 31
1191+
; LA32-TRAP-NEXT: lu12i.w $a0, -266831
1192+
; LA32-TRAP-NEXT: ori $a0, $a0, 3337
1193+
; LA32-TRAP-NEXT: move $a1, $zero
1194+
; LA32-TRAP-NEXT: bl %plt(__divdi3)
1195+
; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
1196+
; LA32-TRAP-NEXT: addi.w $sp, $sp, 16
1197+
; LA32-TRAP-NEXT: ret
1198+
;
1199+
; LA64-TRAP-LABEL: pr107414:
1200+
; LA64-TRAP: # %bb.0: # %entry
1201+
; LA64-TRAP-NEXT: lu12i.w $a1, -266831
1202+
; LA64-TRAP-NEXT: ori $a1, $a1, 3337
1203+
; LA64-TRAP-NEXT: lu32i.d $a1, 0
1204+
; LA64-TRAP-NEXT: div.d $a1, $a1, $a0
1205+
; LA64-TRAP-NEXT: bnez $a0, .LBB32_2
1206+
; LA64-TRAP-NEXT: # %bb.1: # %entry
1207+
; LA64-TRAP-NEXT: break 7
1208+
; LA64-TRAP-NEXT: .LBB32_2: # %entry
1209+
; LA64-TRAP-NEXT: addi.w $a0, $a1, 0
1210+
; LA64-TRAP-NEXT: ret
1211+
entry:
1212+
%conv = sext i32 %x to i64
1213+
%div = sdiv i64 3202030857, %conv
1214+
%conv1 = trunc i64 %div to i32
1215+
ret i32 %conv1
1216+
}

0 commit comments

Comments
 (0)