Skip to content

Commit c035cb7

Browse files
svs-quicAnthony Tran
authored andcommitted
[RISCV] Add ISel pattern for generating QC_BREV32 (llvm#145288)
The `QC_BREV32` instruction reverses the bit order of `rs1` and writes the result to `rd`
1 parent 4c92dd6 commit c035cb7

File tree

3 files changed

+307
-1
lines changed

3 files changed

+307
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
384384
? Legal
385385
: Expand);
386386

387-
if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
387+
if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
388+
!Subtarget.is64Bit()) {
388389
setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
389390
} else {
390391
// Zbkb can use rev8+brev8 to implement bitreverse.

llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,8 @@ def : Pat<(i32 (and GPRNoX0:$rs, 255)), (QC_EXTU GPRNoX0:$rs, 8, 0)>;
14481448
def : Pat<(i32 (and GPRNoX0:$rs, 511)), (QC_EXTU GPRNoX0:$rs, 9, 0)>;
14491449
def : Pat<(i32 (and GPRNoX0:$rs, 1023)), (QC_EXTU GPRNoX0:$rs, 10, 0)>;
14501450
def : Pat<(i32 (and GPRNoX0:$rs, 2047)), (QC_EXTU GPRNoX0:$rs, 11, 0)>;
1451+
1452+
def : Pat<(i32 (bitreverse GPRNoX0:$rs1)), (QC_BREV32 GPRNoX0:$rs1)>;
14511453
} // Predicates = [HasVendorXqcibm, IsRV32]
14521454

14531455
// If Zbb is enabled sext.b/h is preferred since they are compressible

llvm/test/CodeGen/RISCV/xqcibm-cto-clo.ll renamed to llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,17 @@ declare i8 @llvm.cttz.i8(i8, i1)
1010
declare i16 @llvm.cttz.i16(i16, i1)
1111
declare i32 @llvm.cttz.i32(i32, i1)
1212
declare i64 @llvm.cttz.i64(i64, i1)
13+
1314
declare i8 @llvm.ctlz.i8(i8, i1)
1415
declare i16 @llvm.ctlz.i16(i16, i1)
1516
declare i32 @llvm.ctlz.i32(i32, i1)
1617
declare i64 @llvm.ctlz.i64(i64, i1)
1718

19+
declare i8 @llvm.bitreverse.i8(i8)
20+
declare i16 @llvm.bitreverse.i16(i16)
21+
declare i32 @llvm.bitreverse.i32(i32)
22+
declare i64 @llvm.bitreverse.i64(i64)
23+
1824
define i8 @test_cttz_i8(i8 %a) nounwind {
1925
; RV32I-LABEL: test_cttz_i8:
2026
; RV32I: # %bb.0:
@@ -956,3 +962,300 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
956962
%tmp = call i64 @llvm.ctlz.i64(i64 %1, i1 true)
957963
ret i64 %tmp
958964
}
965+
966+
define i8 @brev_i8(i8 %a0) {
967+
; RV32I-LABEL: brev_i8:
968+
; RV32I: # %bb.0:
969+
; RV32I-NEXT: andi a1, a0, 15
970+
; RV32I-NEXT: slli a0, a0, 24
971+
; RV32I-NEXT: slli a1, a1, 4
972+
; RV32I-NEXT: srli a0, a0, 28
973+
; RV32I-NEXT: or a0, a0, a1
974+
; RV32I-NEXT: andi a1, a0, 51
975+
; RV32I-NEXT: srli a0, a0, 2
976+
; RV32I-NEXT: slli a1, a1, 2
977+
; RV32I-NEXT: andi a0, a0, 51
978+
; RV32I-NEXT: or a0, a0, a1
979+
; RV32I-NEXT: andi a1, a0, 85
980+
; RV32I-NEXT: srli a0, a0, 1
981+
; RV32I-NEXT: slli a1, a1, 1
982+
; RV32I-NEXT: andi a0, a0, 85
983+
; RV32I-NEXT: or a0, a0, a1
984+
; RV32I-NEXT: ret
985+
;
986+
; RV32ZBB-LABEL: brev_i8:
987+
; RV32ZBB: # %bb.0:
988+
; RV32ZBB-NEXT: andi a1, a0, 15
989+
; RV32ZBB-NEXT: slli a0, a0, 24
990+
; RV32ZBB-NEXT: slli a1, a1, 4
991+
; RV32ZBB-NEXT: srli a0, a0, 28
992+
; RV32ZBB-NEXT: or a0, a0, a1
993+
; RV32ZBB-NEXT: andi a1, a0, 51
994+
; RV32ZBB-NEXT: srli a0, a0, 2
995+
; RV32ZBB-NEXT: slli a1, a1, 2
996+
; RV32ZBB-NEXT: andi a0, a0, 51
997+
; RV32ZBB-NEXT: or a0, a0, a1
998+
; RV32ZBB-NEXT: andi a1, a0, 85
999+
; RV32ZBB-NEXT: srli a0, a0, 1
1000+
; RV32ZBB-NEXT: slli a1, a1, 1
1001+
; RV32ZBB-NEXT: andi a0, a0, 85
1002+
; RV32ZBB-NEXT: or a0, a0, a1
1003+
; RV32ZBB-NEXT: ret
1004+
;
1005+
; RV32ZBBXQCIBM-LABEL: brev_i8:
1006+
; RV32ZBBXQCIBM: # %bb.0:
1007+
; RV32ZBBXQCIBM-NEXT: qc.brev32 a0, a0
1008+
; RV32ZBBXQCIBM-NEXT: srli a0, a0, 24
1009+
; RV32ZBBXQCIBM-NEXT: ret
1010+
%v0 = tail call i8 @llvm.bitreverse.i8(i8 %a0)
1011+
ret i8 %v0
1012+
}
1013+
1014+
define i16 @brev_i16(i16 %a0) {
1015+
; RV32I-LABEL: brev_i16:
1016+
; RV32I: # %bb.0:
1017+
; RV32I-NEXT: slli a1, a0, 8
1018+
; RV32I-NEXT: slli a0, a0, 16
1019+
; RV32I-NEXT: lui a2, 1
1020+
; RV32I-NEXT: srli a0, a0, 24
1021+
; RV32I-NEXT: addi a2, a2, -241
1022+
; RV32I-NEXT: or a0, a1, a0
1023+
; RV32I-NEXT: srli a1, a0, 4
1024+
; RV32I-NEXT: and a0, a0, a2
1025+
; RV32I-NEXT: and a1, a1, a2
1026+
; RV32I-NEXT: lui a2, 3
1027+
; RV32I-NEXT: addi a2, a2, 819
1028+
; RV32I-NEXT: slli a0, a0, 4
1029+
; RV32I-NEXT: or a0, a1, a0
1030+
; RV32I-NEXT: srli a1, a0, 2
1031+
; RV32I-NEXT: and a0, a0, a2
1032+
; RV32I-NEXT: and a1, a1, a2
1033+
; RV32I-NEXT: lui a2, 5
1034+
; RV32I-NEXT: addi a2, a2, 1365
1035+
; RV32I-NEXT: slli a0, a0, 2
1036+
; RV32I-NEXT: or a0, a1, a0
1037+
; RV32I-NEXT: srli a1, a0, 1
1038+
; RV32I-NEXT: and a0, a0, a2
1039+
; RV32I-NEXT: and a1, a1, a2
1040+
; RV32I-NEXT: slli a0, a0, 1
1041+
; RV32I-NEXT: or a0, a1, a0
1042+
; RV32I-NEXT: ret
1043+
;
1044+
; RV32ZBB-LABEL: brev_i16:
1045+
; RV32ZBB: # %bb.0:
1046+
; RV32ZBB-NEXT: rev8 a0, a0
1047+
; RV32ZBB-NEXT: lui a1, 15
1048+
; RV32ZBB-NEXT: srli a2, a0, 12
1049+
; RV32ZBB-NEXT: addi a1, a1, 240
1050+
; RV32ZBB-NEXT: and a1, a2, a1
1051+
; RV32ZBB-NEXT: lui a2, 3
1052+
; RV32ZBB-NEXT: srli a0, a0, 20
1053+
; RV32ZBB-NEXT: addi a2, a2, 819
1054+
; RV32ZBB-NEXT: andi a0, a0, -241
1055+
; RV32ZBB-NEXT: or a0, a0, a1
1056+
; RV32ZBB-NEXT: srli a1, a0, 2
1057+
; RV32ZBB-NEXT: and a0, a0, a2
1058+
; RV32ZBB-NEXT: and a1, a1, a2
1059+
; RV32ZBB-NEXT: lui a2, 5
1060+
; RV32ZBB-NEXT: addi a2, a2, 1365
1061+
; RV32ZBB-NEXT: slli a0, a0, 2
1062+
; RV32ZBB-NEXT: or a0, a1, a0
1063+
; RV32ZBB-NEXT: srli a1, a0, 1
1064+
; RV32ZBB-NEXT: and a0, a0, a2
1065+
; RV32ZBB-NEXT: and a1, a1, a2
1066+
; RV32ZBB-NEXT: slli a0, a0, 1
1067+
; RV32ZBB-NEXT: or a0, a1, a0
1068+
; RV32ZBB-NEXT: ret
1069+
;
1070+
; RV32ZBBXQCIBM-LABEL: brev_i16:
1071+
; RV32ZBBXQCIBM: # %bb.0:
1072+
; RV32ZBBXQCIBM-NEXT: qc.brev32 a0, a0
1073+
; RV32ZBBXQCIBM-NEXT: srli a0, a0, 16
1074+
; RV32ZBBXQCIBM-NEXT: ret
1075+
%v0 = tail call i16 @llvm.bitreverse.i16(i16 %a0)
1076+
ret i16 %v0
1077+
}
1078+
1079+
define i32 @brev_i32(i32 %a0) {
1080+
; RV32I-LABEL: brev_i32:
1081+
; RV32I: # %bb.0:
1082+
; RV32I-NEXT: srli a1, a0, 8
1083+
; RV32I-NEXT: lui a2, 16
1084+
; RV32I-NEXT: srli a3, a0, 24
1085+
; RV32I-NEXT: addi a2, a2, -256
1086+
; RV32I-NEXT: and a1, a1, a2
1087+
; RV32I-NEXT: and a2, a0, a2
1088+
; RV32I-NEXT: slli a0, a0, 24
1089+
; RV32I-NEXT: or a1, a1, a3
1090+
; RV32I-NEXT: lui a3, 61681
1091+
; RV32I-NEXT: slli a2, a2, 8
1092+
; RV32I-NEXT: or a0, a0, a2
1093+
; RV32I-NEXT: lui a2, 209715
1094+
; RV32I-NEXT: addi a3, a3, -241
1095+
; RV32I-NEXT: or a0, a0, a1
1096+
; RV32I-NEXT: srli a1, a0, 4
1097+
; RV32I-NEXT: and a0, a0, a3
1098+
; RV32I-NEXT: and a1, a1, a3
1099+
; RV32I-NEXT: lui a3, 349525
1100+
; RV32I-NEXT: addi a2, a2, 819
1101+
; RV32I-NEXT: addi a3, a3, 1365
1102+
; RV32I-NEXT: slli a0, a0, 4
1103+
; RV32I-NEXT: or a0, a1, a0
1104+
; RV32I-NEXT: srli a1, a0, 2
1105+
; RV32I-NEXT: and a0, a0, a2
1106+
; RV32I-NEXT: and a1, a1, a2
1107+
; RV32I-NEXT: slli a0, a0, 2
1108+
; RV32I-NEXT: or a0, a1, a0
1109+
; RV32I-NEXT: srli a1, a0, 1
1110+
; RV32I-NEXT: and a0, a0, a3
1111+
; RV32I-NEXT: and a1, a1, a3
1112+
; RV32I-NEXT: slli a0, a0, 1
1113+
; RV32I-NEXT: or a0, a1, a0
1114+
; RV32I-NEXT: ret
1115+
;
1116+
; RV32ZBB-LABEL: brev_i32:
1117+
; RV32ZBB: # %bb.0:
1118+
; RV32ZBB-NEXT: rev8 a0, a0
1119+
; RV32ZBB-NEXT: lui a1, 61681
1120+
; RV32ZBB-NEXT: srli a2, a0, 4
1121+
; RV32ZBB-NEXT: addi a1, a1, -241
1122+
; RV32ZBB-NEXT: and a2, a2, a1
1123+
; RV32ZBB-NEXT: and a0, a0, a1
1124+
; RV32ZBB-NEXT: lui a1, 209715
1125+
; RV32ZBB-NEXT: addi a1, a1, 819
1126+
; RV32ZBB-NEXT: slli a0, a0, 4
1127+
; RV32ZBB-NEXT: or a0, a2, a0
1128+
; RV32ZBB-NEXT: srli a2, a0, 2
1129+
; RV32ZBB-NEXT: and a0, a0, a1
1130+
; RV32ZBB-NEXT: and a1, a2, a1
1131+
; RV32ZBB-NEXT: lui a2, 349525
1132+
; RV32ZBB-NEXT: addi a2, a2, 1365
1133+
; RV32ZBB-NEXT: slli a0, a0, 2
1134+
; RV32ZBB-NEXT: or a0, a1, a0
1135+
; RV32ZBB-NEXT: srli a1, a0, 1
1136+
; RV32ZBB-NEXT: and a0, a0, a2
1137+
; RV32ZBB-NEXT: and a1, a1, a2
1138+
; RV32ZBB-NEXT: slli a0, a0, 1
1139+
; RV32ZBB-NEXT: or a0, a1, a0
1140+
; RV32ZBB-NEXT: ret
1141+
;
1142+
; RV32ZBBXQCIBM-LABEL: brev_i32:
1143+
; RV32ZBBXQCIBM: # %bb.0:
1144+
; RV32ZBBXQCIBM-NEXT: qc.brev32 a0, a0
1145+
; RV32ZBBXQCIBM-NEXT: ret
1146+
%v0 = tail call i32 @llvm.bitreverse.i32(i32 %a0)
1147+
ret i32 %v0
1148+
}
1149+
1150+
define i64 @brev_i64(i64 %a0) {
1151+
; RV32I-LABEL: brev_i64:
1152+
; RV32I: # %bb.0:
1153+
; RV32I-NEXT: srli a2, a1, 8
1154+
; RV32I-NEXT: lui a3, 16
1155+
; RV32I-NEXT: srli a4, a1, 24
1156+
; RV32I-NEXT: slli a5, a1, 24
1157+
; RV32I-NEXT: lui a6, 61681
1158+
; RV32I-NEXT: srli a7, a0, 8
1159+
; RV32I-NEXT: addi a3, a3, -256
1160+
; RV32I-NEXT: and a2, a2, a3
1161+
; RV32I-NEXT: or a2, a2, a4
1162+
; RV32I-NEXT: srli a4, a0, 24
1163+
; RV32I-NEXT: and a7, a7, a3
1164+
; RV32I-NEXT: or a4, a7, a4
1165+
; RV32I-NEXT: lui a7, 209715
1166+
; RV32I-NEXT: and a1, a1, a3
1167+
; RV32I-NEXT: slli a1, a1, 8
1168+
; RV32I-NEXT: or a1, a5, a1
1169+
; RV32I-NEXT: lui a5, 349525
1170+
; RV32I-NEXT: and a3, a0, a3
1171+
; RV32I-NEXT: slli a0, a0, 24
1172+
; RV32I-NEXT: addi a6, a6, -241
1173+
; RV32I-NEXT: addi a7, a7, 819
1174+
; RV32I-NEXT: addi a5, a5, 1365
1175+
; RV32I-NEXT: slli a3, a3, 8
1176+
; RV32I-NEXT: or a0, a0, a3
1177+
; RV32I-NEXT: or a1, a1, a2
1178+
; RV32I-NEXT: or a0, a0, a4
1179+
; RV32I-NEXT: srli a2, a1, 4
1180+
; RV32I-NEXT: and a1, a1, a6
1181+
; RV32I-NEXT: srli a3, a0, 4
1182+
; RV32I-NEXT: and a0, a0, a6
1183+
; RV32I-NEXT: and a2, a2, a6
1184+
; RV32I-NEXT: slli a1, a1, 4
1185+
; RV32I-NEXT: and a3, a3, a6
1186+
; RV32I-NEXT: slli a0, a0, 4
1187+
; RV32I-NEXT: or a1, a2, a1
1188+
; RV32I-NEXT: or a0, a3, a0
1189+
; RV32I-NEXT: srli a2, a1, 2
1190+
; RV32I-NEXT: and a1, a1, a7
1191+
; RV32I-NEXT: srli a3, a0, 2
1192+
; RV32I-NEXT: and a0, a0, a7
1193+
; RV32I-NEXT: and a2, a2, a7
1194+
; RV32I-NEXT: slli a1, a1, 2
1195+
; RV32I-NEXT: and a3, a3, a7
1196+
; RV32I-NEXT: slli a0, a0, 2
1197+
; RV32I-NEXT: or a1, a2, a1
1198+
; RV32I-NEXT: or a0, a3, a0
1199+
; RV32I-NEXT: srli a2, a1, 1
1200+
; RV32I-NEXT: and a1, a1, a5
1201+
; RV32I-NEXT: srli a3, a0, 1
1202+
; RV32I-NEXT: and a0, a0, a5
1203+
; RV32I-NEXT: and a2, a2, a5
1204+
; RV32I-NEXT: slli a1, a1, 1
1205+
; RV32I-NEXT: and a3, a3, a5
1206+
; RV32I-NEXT: slli a4, a0, 1
1207+
; RV32I-NEXT: or a0, a2, a1
1208+
; RV32I-NEXT: or a1, a3, a4
1209+
; RV32I-NEXT: ret
1210+
;
1211+
; RV32ZBB-LABEL: brev_i64:
1212+
; RV32ZBB: # %bb.0:
1213+
; RV32ZBB-NEXT: rev8 a1, a1
1214+
; RV32ZBB-NEXT: lui a2, 61681
1215+
; RV32ZBB-NEXT: lui a3, 209715
1216+
; RV32ZBB-NEXT: rev8 a0, a0
1217+
; RV32ZBB-NEXT: srli a4, a1, 4
1218+
; RV32ZBB-NEXT: addi a2, a2, -241
1219+
; RV32ZBB-NEXT: srli a5, a0, 4
1220+
; RV32ZBB-NEXT: and a4, a4, a2
1221+
; RV32ZBB-NEXT: and a1, a1, a2
1222+
; RV32ZBB-NEXT: and a5, a5, a2
1223+
; RV32ZBB-NEXT: and a0, a0, a2
1224+
; RV32ZBB-NEXT: lui a2, 349525
1225+
; RV32ZBB-NEXT: addi a3, a3, 819
1226+
; RV32ZBB-NEXT: addi a2, a2, 1365
1227+
; RV32ZBB-NEXT: slli a1, a1, 4
1228+
; RV32ZBB-NEXT: slli a0, a0, 4
1229+
; RV32ZBB-NEXT: or a1, a4, a1
1230+
; RV32ZBB-NEXT: or a0, a5, a0
1231+
; RV32ZBB-NEXT: srli a4, a1, 2
1232+
; RV32ZBB-NEXT: and a1, a1, a3
1233+
; RV32ZBB-NEXT: srli a5, a0, 2
1234+
; RV32ZBB-NEXT: and a0, a0, a3
1235+
; RV32ZBB-NEXT: and a4, a4, a3
1236+
; RV32ZBB-NEXT: slli a1, a1, 2
1237+
; RV32ZBB-NEXT: and a3, a5, a3
1238+
; RV32ZBB-NEXT: slli a0, a0, 2
1239+
; RV32ZBB-NEXT: or a1, a4, a1
1240+
; RV32ZBB-NEXT: or a0, a3, a0
1241+
; RV32ZBB-NEXT: srli a3, a1, 1
1242+
; RV32ZBB-NEXT: and a1, a1, a2
1243+
; RV32ZBB-NEXT: srli a4, a0, 1
1244+
; RV32ZBB-NEXT: and a0, a0, a2
1245+
; RV32ZBB-NEXT: and a3, a3, a2
1246+
; RV32ZBB-NEXT: slli a1, a1, 1
1247+
; RV32ZBB-NEXT: and a2, a4, a2
1248+
; RV32ZBB-NEXT: slli a4, a0, 1
1249+
; RV32ZBB-NEXT: or a0, a3, a1
1250+
; RV32ZBB-NEXT: or a1, a2, a4
1251+
; RV32ZBB-NEXT: ret
1252+
;
1253+
; RV32ZBBXQCIBM-LABEL: brev_i64:
1254+
; RV32ZBBXQCIBM: # %bb.0:
1255+
; RV32ZBBXQCIBM-NEXT: qc.brev32 a2, a1
1256+
; RV32ZBBXQCIBM-NEXT: qc.brev32 a1, a0
1257+
; RV32ZBBXQCIBM-NEXT: mv a0, a2
1258+
; RV32ZBBXQCIBM-NEXT: ret
1259+
%v0 = tail call i64 @llvm.bitreverse.i64(i64 %a0)
1260+
ret i64 %v0
1261+
}

0 commit comments

Comments
 (0)