Skip to content

Commit 2c1a567

Browse files
committed
[LoongArch] Select {DIV,MOD}.{W,WU} instruction to eliminate explicit sign extension
1 parent 71fbbb6 commit 2c1a567

File tree

4 files changed

+57
-62
lines changed

4 files changed

+57
-62
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
139139

140140
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
141141
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
142+
setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
142143
}
143144

144145
// Set operations for LA32 only.
@@ -1665,6 +1666,10 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
16651666
switch (Opcode) {
16661667
default:
16671668
llvm_unreachable("Unexpected opcode");
1669+
case ISD::UDIV:
1670+
return LoongArchISD::DIV_WU;
1671+
case ISD::UREM:
1672+
return LoongArchISD::MOD_WU;
16681673
case ISD::SHL:
16691674
return LoongArchISD::SLL_W;
16701675
case ISD::SRA:
@@ -1841,6 +1846,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
18411846
switch (N->getOpcode()) {
18421847
default:
18431848
llvm_unreachable("Don't know how to legalize this operation");
1849+
case ISD::UDIV:
1850+
case ISD::UREM:
1851+
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1852+
"Unexpected custom legalisation");
1853+
Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
1854+
break;
18441855
case ISD::SHL:
18451856
case ISD::SRA:
18461857
case ISD::SRL:
@@ -3445,6 +3456,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
34453456
NODE_NAME_CASE(BITREV_W)
34463457
NODE_NAME_CASE(ROTR_W)
34473458
NODE_NAME_CASE(ROTL_W)
3459+
NODE_NAME_CASE(DIV_WU)
3460+
NODE_NAME_CASE(MOD_WU)
34483461
NODE_NAME_CASE(CLZ_W)
34493462
NODE_NAME_CASE(CTZ_W)
34503463
NODE_NAME_CASE(DBAR)

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ enum NodeType : unsigned {
4343
ROTL_W,
4444
ROTR_W,
4545

46+
// unsigned 32-bit integer division
47+
DIV_WU,
48+
MOD_WU,
49+
4650
// FPR<->GPR transfer operations
4751
MOVGR2FR_W_LA64,
4852
MOVFR2GR_S_LA64,

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
8585
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
8686
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
8787
def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
88+
def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
89+
def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
8890
def loongarch_crc_w_b_w
8991
: SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
9092
def loongarch_crc_w_h_w
@@ -1110,9 +1112,13 @@ def : PatGprImm_32<add, ADDI_W, simm12>;
11101112
def : PatGprGpr<sub, SUB_D>;
11111113
def : PatGprGpr_32<sub, SUB_W>;
11121114
def : PatGprGpr<sdiv, DIV_D>;
1115+
def : PatGprGpr_32<sdiv, DIV_W>;
11131116
def : PatGprGpr<udiv, DIV_DU>;
1117+
def : PatGprGpr<loongarch_div_wu, DIV_WU>;
11141118
def : PatGprGpr<srem, MOD_D>;
1119+
def : PatGprGpr_32<srem, MOD_W>;
11151120
def : PatGprGpr<urem, MOD_DU>;
1121+
def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
11161122
def : PatGprGpr<rotr, ROTR_D>;
11171123
def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
11181124
def : PatGprGpr_32<rotr, ROTR_W>;

llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll

Lines changed: 34 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
191191
; LA64: # %bb.0: # %entry
192192
; LA64-NEXT: addi.w $a1, $a1, 0
193193
; LA64-NEXT: addi.w $a0, $a0, 0
194-
; LA64-NEXT: div.d $a0, $a0, $a1
195-
; LA64-NEXT: addi.w $a0, $a0, 0
194+
; LA64-NEXT: div.w $a0, $a0, $a1
196195
; LA64-NEXT: ret
197196
;
198197
; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
208207
; LA64-TRAP: # %bb.0: # %entry
209208
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
210209
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
211-
; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
210+
; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
212211
; LA64-TRAP-NEXT: bnez $a1, .LBB5_2
213212
; LA64-TRAP-NEXT: # %bb.1: # %entry
214213
; LA64-TRAP-NEXT: break 7
215214
; LA64-TRAP-NEXT: .LBB5_2: # %entry
216-
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
217215
; LA64-TRAP-NEXT: ret
218216
entry:
219217
%r = sdiv i32 %a, %b
@@ -228,8 +226,7 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
228226
;
229227
; LA64-LABEL: sdiv_si32_si32_si32:
230228
; LA64: # %bb.0: # %entry
231-
; LA64-NEXT: div.d $a0, $a0, $a1
232-
; LA64-NEXT: addi.w $a0, $a0, 0
229+
; LA64-NEXT: div.w $a0, $a0, $a1
233230
; LA64-NEXT: ret
234231
;
235232
; LA32-TRAP-LABEL: sdiv_si32_si32_si32:
@@ -243,12 +240,11 @@ define signext i32 @sdiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
243240
;
244241
; LA64-TRAP-LABEL: sdiv_si32_si32_si32:
245242
; LA64-TRAP: # %bb.0: # %entry
246-
; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
243+
; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
247244
; LA64-TRAP-NEXT: bnez $a1, .LBB6_2
248245
; LA64-TRAP-NEXT: # %bb.1: # %entry
249246
; LA64-TRAP-NEXT: break 7
250247
; LA64-TRAP-NEXT: .LBB6_2: # %entry
251-
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
252248
; LA64-TRAP-NEXT: ret
253249
entry:
254250
%r = sdiv i32 %a, %b
@@ -407,9 +403,9 @@ define i32 @udiv_i32(i32 %a, i32 %b) {
407403
;
408404
; LA64-LABEL: udiv_i32:
409405
; LA64: # %bb.0: # %entry
410-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
411-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
412-
; LA64-NEXT: div.du $a0, $a0, $a1
406+
; LA64-NEXT: addi.w $a1, $a1, 0
407+
; LA64-NEXT: addi.w $a0, $a0, 0
408+
; LA64-NEXT: div.wu $a0, $a0, $a1
413409
; LA64-NEXT: ret
414410
;
415411
; LA32-TRAP-LABEL: udiv_i32:
@@ -423,9 +419,9 @@ define i32 @udiv_i32(i32 %a, i32 %b) {
423419
;
424420
; LA64-TRAP-LABEL: udiv_i32:
425421
; LA64-TRAP: # %bb.0: # %entry
426-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
427-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
428-
; LA64-TRAP-NEXT: div.du $a0, $a0, $a1
422+
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
423+
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
424+
; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1
429425
; LA64-TRAP-NEXT: bnez $a1, .LBB11_2
430426
; LA64-TRAP-NEXT: # %bb.1: # %entry
431427
; LA64-TRAP-NEXT: break 7
@@ -444,9 +440,7 @@ define i32 @udiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
444440
;
445441
; LA64-LABEL: udiv_ui32_si32_si32:
446442
; LA64: # %bb.0: # %entry
447-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
448-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
449-
; LA64-NEXT: div.du $a0, $a0, $a1
443+
; LA64-NEXT: div.wu $a0, $a0, $a1
450444
; LA64-NEXT: ret
451445
;
452446
; LA32-TRAP-LABEL: udiv_ui32_si32_si32:
@@ -460,9 +454,7 @@ define i32 @udiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
460454
;
461455
; LA64-TRAP-LABEL: udiv_ui32_si32_si32:
462456
; LA64-TRAP: # %bb.0: # %entry
463-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
464-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
465-
; LA64-TRAP-NEXT: div.du $a0, $a0, $a1
457+
; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1
466458
; LA64-TRAP-NEXT: bnez $a1, .LBB12_2
467459
; LA64-TRAP-NEXT: # %bb.1: # %entry
468460
; LA64-TRAP-NEXT: break 7
@@ -481,10 +473,9 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
481473
;
482474
; LA64-LABEL: udiv_si32_ui32_ui32:
483475
; LA64: # %bb.0: # %entry
484-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
485-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
486-
; LA64-NEXT: div.du $a0, $a0, $a1
476+
; LA64-NEXT: addi.w $a1, $a1, 0
487477
; LA64-NEXT: addi.w $a0, $a0, 0
478+
; LA64-NEXT: div.wu $a0, $a0, $a1
488479
; LA64-NEXT: ret
489480
;
490481
; LA32-TRAP-LABEL: udiv_si32_ui32_ui32:
@@ -498,14 +489,13 @@ define signext i32 @udiv_si32_ui32_ui32(i32 %a, i32 %b) {
498489
;
499490
; LA64-TRAP-LABEL: udiv_si32_ui32_ui32:
500491
; LA64-TRAP: # %bb.0: # %entry
501-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
502-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
503-
; LA64-TRAP-NEXT: div.du $a0, $a0, $a1
492+
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
493+
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
494+
; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1
504495
; LA64-TRAP-NEXT: bnez $a1, .LBB13_2
505496
; LA64-TRAP-NEXT: # %bb.1: # %entry
506497
; LA64-TRAP-NEXT: break 7
507498
; LA64-TRAP-NEXT: .LBB13_2: # %entry
508-
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
509499
; LA64-TRAP-NEXT: ret
510500
entry:
511501
%r = udiv i32 %a, %b
@@ -520,10 +510,7 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
520510
;
521511
; LA64-LABEL: udiv_si32_si32_si32:
522512
; LA64: # %bb.0: # %entry
523-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
524-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
525-
; LA64-NEXT: div.du $a0, $a0, $a1
526-
; LA64-NEXT: addi.w $a0, $a0, 0
513+
; LA64-NEXT: div.wu $a0, $a0, $a1
527514
; LA64-NEXT: ret
528515
;
529516
; LA32-TRAP-LABEL: udiv_si32_si32_si32:
@@ -537,14 +524,11 @@ define signext i32 @udiv_si32_si32_si32(i32 signext %a, i32 signext %b) {
537524
;
538525
; LA64-TRAP-LABEL: udiv_si32_si32_si32:
539526
; LA64-TRAP: # %bb.0: # %entry
540-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
541-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
542-
; LA64-TRAP-NEXT: div.du $a0, $a0, $a1
527+
; LA64-TRAP-NEXT: div.wu $a0, $a0, $a1
543528
; LA64-TRAP-NEXT: bnez $a1, .LBB14_2
544529
; LA64-TRAP-NEXT: # %bb.1: # %entry
545530
; LA64-TRAP-NEXT: break 7
546531
; LA64-TRAP-NEXT: .LBB14_2: # %entry
547-
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
548532
; LA64-TRAP-NEXT: ret
549533
entry:
550534
%r = udiv i32 %a, %b
@@ -995,9 +979,9 @@ define i32 @urem_i32(i32 %a, i32 %b) {
995979
;
996980
; LA64-LABEL: urem_i32:
997981
; LA64: # %bb.0: # %entry
998-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
999-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
1000-
; LA64-NEXT: mod.du $a0, $a0, $a1
982+
; LA64-NEXT: addi.w $a1, $a1, 0
983+
; LA64-NEXT: addi.w $a0, $a0, 0
984+
; LA64-NEXT: mod.wu $a0, $a0, $a1
1001985
; LA64-NEXT: ret
1002986
;
1003987
; LA32-TRAP-LABEL: urem_i32:
@@ -1011,9 +995,9 @@ define i32 @urem_i32(i32 %a, i32 %b) {
1011995
;
1012996
; LA64-TRAP-LABEL: urem_i32:
1013997
; LA64-TRAP: # %bb.0: # %entry
1014-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
1015-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
1016-
; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1
998+
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
999+
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
1000+
; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1
10171001
; LA64-TRAP-NEXT: bnez $a1, .LBB27_2
10181002
; LA64-TRAP-NEXT: # %bb.1: # %entry
10191003
; LA64-TRAP-NEXT: break 7
@@ -1032,9 +1016,7 @@ define i32 @urem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
10321016
;
10331017
; LA64-LABEL: urem_ui32_si32_si32:
10341018
; LA64: # %bb.0: # %entry
1035-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
1036-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
1037-
; LA64-NEXT: mod.du $a0, $a0, $a1
1019+
; LA64-NEXT: mod.wu $a0, $a0, $a1
10381020
; LA64-NEXT: ret
10391021
;
10401022
; LA32-TRAP-LABEL: urem_ui32_si32_si32:
@@ -1048,9 +1030,7 @@ define i32 @urem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
10481030
;
10491031
; LA64-TRAP-LABEL: urem_ui32_si32_si32:
10501032
; LA64-TRAP: # %bb.0: # %entry
1051-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
1052-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
1053-
; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1
1033+
; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1
10541034
; LA64-TRAP-NEXT: bnez $a1, .LBB28_2
10551035
; LA64-TRAP-NEXT: # %bb.1: # %entry
10561036
; LA64-TRAP-NEXT: break 7
@@ -1069,10 +1049,9 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
10691049
;
10701050
; LA64-LABEL: urem_si32_ui32_ui32:
10711051
; LA64: # %bb.0: # %entry
1072-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
1073-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
1074-
; LA64-NEXT: mod.du $a0, $a0, $a1
1052+
; LA64-NEXT: addi.w $a1, $a1, 0
10751053
; LA64-NEXT: addi.w $a0, $a0, 0
1054+
; LA64-NEXT: mod.wu $a0, $a0, $a1
10761055
; LA64-NEXT: ret
10771056
;
10781057
; LA32-TRAP-LABEL: urem_si32_ui32_ui32:
@@ -1086,14 +1065,13 @@ define signext i32 @urem_si32_ui32_ui32(i32 %a, i32 %b) {
10861065
;
10871066
; LA64-TRAP-LABEL: urem_si32_ui32_ui32:
10881067
; LA64-TRAP: # %bb.0: # %entry
1089-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
1090-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
1091-
; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1
1068+
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
1069+
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
1070+
; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1
10921071
; LA64-TRAP-NEXT: bnez $a1, .LBB29_2
10931072
; LA64-TRAP-NEXT: # %bb.1: # %entry
10941073
; LA64-TRAP-NEXT: break 7
10951074
; LA64-TRAP-NEXT: .LBB29_2: # %entry
1096-
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
10971075
; LA64-TRAP-NEXT: ret
10981076
entry:
10991077
%r = urem i32 %a, %b
@@ -1108,10 +1086,7 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
11081086
;
11091087
; LA64-LABEL: urem_si32_si32_si32:
11101088
; LA64: # %bb.0: # %entry
1111-
; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
1112-
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
1113-
; LA64-NEXT: mod.du $a0, $a0, $a1
1114-
; LA64-NEXT: addi.w $a0, $a0, 0
1089+
; LA64-NEXT: mod.wu $a0, $a0, $a1
11151090
; LA64-NEXT: ret
11161091
;
11171092
; LA32-TRAP-LABEL: urem_si32_si32_si32:
@@ -1125,14 +1100,11 @@ define signext i32 @urem_si32_si32_si32(i32 signext %a, i32 signext %b) {
11251100
;
11261101
; LA64-TRAP-LABEL: urem_si32_si32_si32:
11271102
; LA64-TRAP: # %bb.0: # %entry
1128-
; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0
1129-
; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0
1130-
; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1
1103+
; LA64-TRAP-NEXT: mod.wu $a0, $a0, $a1
11311104
; LA64-TRAP-NEXT: bnez $a1, .LBB30_2
11321105
; LA64-TRAP-NEXT: # %bb.1: # %entry
11331106
; LA64-TRAP-NEXT: break 7
11341107
; LA64-TRAP-NEXT: .LBB30_2: # %entry
1135-
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
11361108
; LA64-TRAP-NEXT: ret
11371109
entry:
11381110
%r = urem i32 %a, %b

0 commit comments

Comments
 (0)