Skip to content

Commit 267c526

Browse files
wangpc-ppgithub-actions[bot]
authored andcommitted
Automerge: [RISCV] Support non-power-of-2 types when expanding memcmp
We can convert non-power-of-2 types into extended value types and then they will be widen. Reviewers: lukel97 Reviewed By: lukel97 Pull Request: llvm/llvm-project#114971
2 parents 1105405 + ca29c63 commit 267c526

File tree

5 files changed

+709
-976
lines changed

5 files changed

+709
-976
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16190,10 +16190,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
1619016190
return SDValue();
1619116191

1619216192
unsigned OpSize = OpVT.getSizeInBits();
16193-
// TODO: Support non-power-of-2 types.
16194-
if (!isPowerOf2_32(OpSize))
16195-
return SDValue();
16196-
1619716193
// The size should be larger than XLen and smaller than the maximum vector
1619816194
// size.
1619916195
if (OpSize <= Subtarget.getXLen() ||
@@ -16214,14 +16210,25 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
1621416210
Attribute::NoImplicitFloat))
1621516211
return SDValue();
1621616212

16213+
// Bail out for non-byte-sized types.
16214+
if (!OpVT.isByteSized())
16215+
return SDValue();
16216+
1621716217
unsigned VecSize = OpSize / 8;
16218-
EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
16219-
EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
16218+
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16219+
EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
1622016220

1622116221
SDValue VecX = DAG.getBitcast(VecVT, X);
1622216222
SDValue VecY = DAG.getBitcast(VecVT, Y);
16223-
SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
16224-
return DAG.getSetCC(DL, VT, DAG.getNode(ISD::VECREDUCE_OR, DL, XLenVT, Cmp),
16223+
SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16224+
SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16225+
16226+
SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16227+
DAG.getCondCode(ISD::SETNE), Mask, VL);
16228+
return DAG.getSetCC(DL, VT,
16229+
DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16230+
DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16231+
VL),
1622516232
DAG.getConstant(0, DL, XLenVT), CC);
1622616233
}
1622716234

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2985,20 +2985,13 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
29852985
}
29862986

29872987
if (IsZeroCmp && ST->hasVInstructions()) {
2988-
unsigned RealMinVLen = ST->getRealMinVLen();
2989-
// Support Fractional LMULs if the lengths are larger than XLen.
2990-
// TODO: Support non-power-of-2 types.
2991-
for (unsigned FLMUL = 8; FLMUL >= 2; FLMUL /= 2) {
2992-
unsigned Len = RealMinVLen / FLMUL;
2993-
if (Len > ST->getXLen())
2994-
Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
2995-
}
2996-
for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors();
2997-
LMUL *= 2) {
2998-
unsigned Len = RealMinVLen * LMUL;
2999-
if (Len > ST->getXLen())
3000-
Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
3001-
}
2988+
unsigned VLenB = ST->getRealMinVLen() / 8;
2989+
// The minimum size should be `XLen / 8 + 1`, and the maxinum size should be
2990+
// `VLenB * MaxLMUL` so that it fits in a single register group.
2991+
unsigned MinSize = ST->getXLen() / 8 + 1;
2992+
unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
2993+
for (unsigned Size = MinSize; Size <= MaxSize; Size++)
2994+
Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
30022995
}
30032996
return Options;
30042997
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV64
4+
5+
define i1 @icmp_non_byte_type(ptr %p1, ptr %p2) nounwind {
6+
; CHECK-RV32-LABEL: icmp_non_byte_type:
7+
; CHECK-RV32: # %bb.0:
8+
; CHECK-RV32-NEXT: lw a2, 0(a0)
9+
; CHECK-RV32-NEXT: lw a3, 4(a0)
10+
; CHECK-RV32-NEXT: lw a4, 8(a0)
11+
; CHECK-RV32-NEXT: lw a0, 12(a0)
12+
; CHECK-RV32-NEXT: lw a5, 12(a1)
13+
; CHECK-RV32-NEXT: lw a6, 4(a1)
14+
; CHECK-RV32-NEXT: lw a7, 8(a1)
15+
; CHECK-RV32-NEXT: lw a1, 0(a1)
16+
; CHECK-RV32-NEXT: xor a0, a0, a5
17+
; CHECK-RV32-NEXT: xor a3, a3, a6
18+
; CHECK-RV32-NEXT: xor a4, a4, a7
19+
; CHECK-RV32-NEXT: xor a1, a2, a1
20+
; CHECK-RV32-NEXT: or a0, a3, a0
21+
; CHECK-RV32-NEXT: or a1, a1, a4
22+
; CHECK-RV32-NEXT: or a0, a1, a0
23+
; CHECK-RV32-NEXT: seqz a0, a0
24+
; CHECK-RV32-NEXT: ret
25+
;
26+
; CHECK-RV64-LABEL: icmp_non_byte_type:
27+
; CHECK-RV64: # %bb.0:
28+
; CHECK-RV64-NEXT: ld a2, 0(a0)
29+
; CHECK-RV64-NEXT: ld a0, 8(a0)
30+
; CHECK-RV64-NEXT: ld a3, 8(a1)
31+
; CHECK-RV64-NEXT: ld a1, 0(a1)
32+
; CHECK-RV64-NEXT: xor a0, a0, a3
33+
; CHECK-RV64-NEXT: xor a1, a2, a1
34+
; CHECK-RV64-NEXT: or a0, a1, a0
35+
; CHECK-RV64-NEXT: seqz a0, a0
36+
; CHECK-RV64-NEXT: ret
37+
%v1 = load i127, ptr %p1
38+
%v2 = load i127, ptr %p2
39+
%ret = icmp eq i127 %v1, %v2
40+
ret i1 %ret
41+
}

0 commit comments

Comments
 (0)