Skip to content

Commit 035f7a4

Browse files
committed
Add an all-in-one histogram intrinsic, along with lowering for AArch64
Current interface is: llvm.experimental.histogram(<vecty> ptrs, <intty> inc_amount, <vecty> mask) The integer type used by 'inc_amount' needs to match the type of the buckets in memory. The intrinsic covers the following operations: * Gather load * histogram on the elements of 'ptrs' * multiply the histogram results by 'inc_amount' * add the result of the multiply to the values loaded by the gather * scatter store the results of the add These operations can obviously be scalarized on platforms without the relevant instructions.
1 parent 24e8c6a commit 035f7a4

File tree

9 files changed

+215
-0
lines changed

9 files changed

+215
-0
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,11 @@ enum NodeType {
13951395
// which is later translated to an implicit use in the MIR.
13961396
CONVERGENCECTRL_GLUE,
13971397

1398+
// Experimental vector histogram intrinsic
1399+
// Operands: input chain, baseptr, indices, inc, mask
1400+
// Output: output chain
1401+
EXPERIMENTAL_HISTOGRAM,
1402+
13981403
/// BUILTIN_OP_END - This must be the last enum value in this list.
13991404
/// The target-specific pre-isel opcode values start here.
14001405
BUILTIN_OP_END

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,9 @@ class SelectionDAG {
15251525
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
15261526
ISD::MemIndexType IndexType,
15271527
bool IsTruncating = false);
1528+
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl,
1529+
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
1530+
ISD::MemIndexType IndexType);
15281531

15291532
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT,
15301533
MachineMemOperand *MMO);

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ BEGIN_TWO_BYTE_PACK()
542542
friend class MaskedLoadStoreSDNode;
543543
friend class MaskedGatherScatterSDNode;
544544
friend class VPGatherScatterSDNode;
545+
friend class MaskedHistogramSDNode;
545546

546547
uint16_t : NumMemSDNodeBits;
547548

@@ -564,6 +565,7 @@ BEGIN_TWO_BYTE_PACK()
564565
friend class MaskedLoadSDNode;
565566
friend class MaskedGatherSDNode;
566567
friend class VPGatherSDNode;
568+
friend class MaskedHistogramSDNode;
567569

568570
uint16_t : NumLSBaseSDNodeBits;
569571

@@ -1411,6 +1413,7 @@ class MemSDNode : public SDNode {
14111413
return getOperand(2);
14121414
case ISD::MGATHER:
14131415
case ISD::MSCATTER:
1416+
case ISD::EXPERIMENTAL_HISTOGRAM:
14141417
return getOperand(3);
14151418
default:
14161419
return getOperand(1);
@@ -1459,6 +1462,7 @@ class MemSDNode : public SDNode {
14591462
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
14601463
case ISD::GET_FPENV_MEM:
14611464
case ISD::SET_FPENV_MEM:
1465+
case ISD::EXPERIMENTAL_HISTOGRAM:
14621466
return true;
14631467
default:
14641468
return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
@@ -2939,6 +2943,26 @@ class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
29392943
}
29402944
};
29412945

2946+
class MaskedHistogramSDNode : public MemSDNode {
2947+
public:
2948+
friend class SelectionDAG;
2949+
2950+
MaskedHistogramSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2951+
EVT MemVT, MachineMemOperand *MMO,
2952+
ISD::MemIndexType IndexType)
2953+
: MemSDNode(ISD::EXPERIMENTAL_HISTOGRAM, Order, DL, VTs, MemVT, MMO) {
2954+
LSBaseSDNodeBits.AddressingMode = IndexType;
2955+
}
2956+
2957+
ISD::MemIndexType getIndexType() const {
2958+
return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2959+
}
2960+
2961+
static bool classof(const SDNode *N) {
2962+
return N->getOpcode() == ISD::EXPERIMENTAL_HISTOGRAM;
2963+
}
2964+
};
2965+
29422966
class FPStateAccessSDNode : public MemSDNode {
29432967
public:
29442968
friend class SelectionDAG;

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,13 @@ def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty
18481848
llvm_i32_ty],
18491849
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
18501850

1851+
// Experimental histogram
1852+
def int_experimental_histogram : DefaultAttrsIntrinsic<[],
1853+
[ llvm_anyvector_ty, // Vector of pointers
1854+
llvm_anyint_ty, // Increment
1855+
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
1856+
[ IntrNoSync, IntrWillReturn ]>;
1857+
18511858
// Operators
18521859
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
18531860
// Integer arithmetic

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9530,6 +9530,39 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
95309530
return V;
95319531
}
95329532

9533+
SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT,
9534+
const SDLoc &dl,
9535+
ArrayRef<SDValue> Ops,
9536+
MachineMemOperand *MMO,
9537+
ISD::MemIndexType IndexType) {
9538+
assert(Ops.size() == 6 && "Incompatible number of operands");
9539+
9540+
FoldingSetNodeID ID;
9541+
AddNodeIDNode(ID, ISD::EXPERIMENTAL_HISTOGRAM, VTs, Ops);
9542+
ID.AddInteger(MemVT.getRawBits());
9543+
ID.AddInteger(getSyntheticNodeSubclassData<MaskedHistogramSDNode>(
9544+
dl.getIROrder(), VTs, MemVT, MMO, IndexType));
9545+
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
9546+
ID.AddInteger(MMO->getFlags());
9547+
void *IP = nullptr;
9548+
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
9549+
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
9550+
return SDValue(E, 0);
9551+
}
9552+
9553+
auto *N = newSDNode<MaskedHistogramSDNode>(dl.getIROrder(), dl.getDebugLoc(),
9554+
VTs, MemVT, MMO, IndexType);
9555+
createOperands(N, Ops);
9556+
9557+
// FIXME: assert conditions on operands.
9558+
9559+
CSEMap.InsertNode(N, IP);
9560+
InsertNode(N);
9561+
SDValue V(N, 0);
9562+
NewSDValueDbgMsg(V, "Creating new node: ", this);
9563+
return V;
9564+
}
9565+
95339566
SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
95349567
EVT MemVT, MachineMemOperand *MMO) {
95359568
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7940,6 +7940,56 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
79407940
case Intrinsic::experimental_convergence_entry:
79417941
case Intrinsic::experimental_convergence_loop:
79427942
visitConvergenceControl(I, Intrinsic);
7943+
return;
7944+
case Intrinsic::experimental_histogram: {
7945+
// FIXME: Move this to a separate function.
7946+
Value *Ptr = I.getOperand(0);
7947+
SDValue Inc = getValue(I.getOperand(1));
7948+
SDValue Mask = getValue(I.getOperand(2));
7949+
7950+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7951+
DataLayout TargetDL = DAG.getDataLayout();
7952+
EVT VT = Inc.getValueType();
7953+
Align Alignment = DAG.getEVTAlign(VT);
7954+
7955+
const MDNode *Ranges = getRangeMetadata(I);
7956+
7957+
SDValue Root = DAG.getRoot();
7958+
SDValue Base;
7959+
SDValue Index;
7960+
ISD::MemIndexType IndexType;
7961+
SDValue Scale;
7962+
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
7963+
I.getParent(), VT.getScalarStoreSize());
7964+
7965+
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
7966+
7967+
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7968+
MachinePointerInfo(AS),
7969+
MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
7970+
MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
7971+
7972+
if (!UniformBase) {
7973+
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
7974+
Index = getValue(Ptr);
7975+
IndexType = ISD::SIGNED_SCALED;
7976+
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
7977+
}
7978+
7979+
EVT IdxVT = Index.getValueType();
7980+
EVT EltTy = IdxVT.getVectorElementType();
7981+
if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
7982+
EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
7983+
Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
7984+
}
7985+
7986+
SDValue Ops[] = { Root, Inc, Mask, Base, Index, Scale };
7987+
SDValue Histogram = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), VT,
7988+
sdl, Ops, MMO, IndexType);
7989+
7990+
setValue(&I, Histogram);
7991+
DAG.setRoot(Histogram);
7992+
}
79437993
}
79447994
}
79457995

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
528528
case ISD::PATCHPOINT:
529529
return "patchpoint";
530530

531+
case ISD::EXPERIMENTAL_HISTOGRAM: return "histogram";
532+
531533
// Vector Predication
532534
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
533535
case ISD::SDID: \

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,6 +1603,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16031603
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
16041604
}
16051605

1606+
// Histcnt is SVE2 only
1607+
if (Subtarget->hasSVE2() && Subtarget->isSVEAvailable())
1608+
setOperationAction(ISD::EXPERIMENTAL_HISTOGRAM, MVT::Other, Custom);
1609+
16061610
if (!Subtarget->isNeonAvailable()) {
16071611
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Custom);
16081612
setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Custom);
@@ -6673,6 +6677,56 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
66736677
return LowerFunnelShift(Op, DAG);
66746678
case ISD::FLDEXP:
66756679
return LowerFLDEXP(Op, DAG);
6680+
case ISD::EXPERIMENTAL_HISTOGRAM: {
6681+
// FIXME: Move to another function.
6682+
// FIXME: Maybe share some code with LowerMGather/Scatter?
6683+
MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(Op);
6684+
SDLoc DL(HG);
6685+
SDValue Chain = HG->getOperand(0);
6686+
SDValue Inc = HG->getOperand(1);
6687+
SDValue Mask = HG->getOperand(2);
6688+
SDValue Ptr = HG->getOperand(3);
6689+
SDValue Index = HG->getOperand(4);
6690+
SDValue Scale = HG->getOperand(5);
6691+
6692+
EVT IncVT = Inc.getValueType();
6693+
EVT IndexVT = Index.getValueType();
6694+
EVT MemVT = EVT::getVectorVT(*DAG.getContext(), IncVT,
6695+
IndexVT.getVectorElementCount());
6696+
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
6697+
SDValue PassThru = DAG.getSplatVector(MemVT, DL, Zero);
6698+
SDValue IncSplat = DAG.getSplatVector(MemVT, DL, Inc);
6699+
SDValue Ops[] = {Chain, PassThru, Mask, Ptr, Index, Scale};
6700+
6701+
// Set the MMO to load only, rather than load|store.
6702+
MachineMemOperand *GMMO = HG->getMemOperand();
6703+
GMMO->setFlags(MachineMemOperand::MOLoad);
6704+
ISD::MemIndexType IndexType = HG->getIndexType();
6705+
SDValue Gather =
6706+
DAG.getMaskedGather(DAG.getVTList(MemVT, MVT::Other), MemVT, DL,
6707+
Ops, HG->getMemOperand(),
6708+
IndexType, ISD::NON_EXTLOAD);
6709+
6710+
SDValue GChain = Gather.getValue(1);
6711+
6712+
// Perform the histcnt, multiply by inc, add to bucket data.
6713+
SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_histcnt, DL, IncVT);
6714+
SDValue HistCnt = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, ID, Mask,
6715+
Index, Index);
6716+
SDValue Mul = DAG.getNode(ISD::MUL, DL, MemVT, HistCnt, IncSplat);
6717+
SDValue Add = DAG.getNode(ISD::ADD, DL, MemVT, Gather, Mul);
6718+
6719+
6720+
// Create a new MMO for the scatter.
6721+
MachineMemOperand *SMMO = DAG.getMachineFunction().getMachineMemOperand(
6722+
GMMO->getPointerInfo(), MachineMemOperand::MOStore,
6723+
GMMO->getSize(), GMMO->getAlign(), GMMO->getAAInfo());
6724+
6725+
SDValue ScatterOps[] = { GChain, Add, Mask, Ptr, Index, Scale };
6726+
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MemVT, DL,
6727+
ScatterOps, SMMO, IndexType, false);
6728+
return Scatter;
6729+
}
66766730
}
66776731
}
66786732

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s
3+
4+
define void @histogram_i64(<vscale x 2 x ptr> %buckets, i64 %inc, <vscale x 2 x i1> %mask) #0 {
5+
; CHECK-LABEL: histogram_i64:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: histcnt z1.d, p0/z, z0.d, z0.d
8+
; CHECK-NEXT: mov z3.d, x0
9+
; CHECK-NEXT: ld1d { z2.d }, p0/z, [z0.d]
10+
; CHECK-NEXT: ptrue p1.d
11+
; CHECK-NEXT: mad z1.d, p1/m, z3.d, z2.d
12+
; CHECK-NEXT: st1d { z1.d }, p0, [z0.d]
13+
; CHECK-NEXT: ret
14+
call void @llvm.experimental.histogram.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 %inc, <vscale x 2 x i1> %mask)
15+
ret void
16+
}
17+
18+
;; FIXME: We maybe need some dagcombines here? We're multiplying the output of the histcnt
19+
;; by 1, so we should be able to remove that and directly add the histcnt to the
20+
;; current bucket data.
21+
define void @histogram_i32_literal(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask) #0 {
22+
; CHECK-LABEL: histogram_i32_literal:
23+
; CHECK: // %bb.0:
24+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, z0.s, sxtw #2]
25+
; CHECK-NEXT: ptrue p1.s
26+
; CHECK-NEXT: mov z3.s, #1 // =0x1
27+
; CHECK-NEXT: histcnt z2.s, p0/z, z0.s, z0.s
28+
; CHECK-NEXT: mla z1.s, p1/m, z2.s, z3.s
29+
; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2]
30+
; CHECK-NEXT: ret
31+
32+
%buckets = getelementptr i32, ptr %base, <vscale x 4 x i32> %indices
33+
call void @llvm.experimental.histogram.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
34+
ret void
35+
}
36+
37+
attributes #0 = { "target-features"="+sve2" vscale_range(1, 16) }

0 commit comments

Comments
 (0)