Skip to content

Commit f1de9f5

Browse files
vmustyaigcbot
authored andcommitted
Replace zero dpas accumulator argument with null register in VC
DPAS instruction interpret null-register accumulator input (src0) as zero value. So, the compiler don't need to emit extra MOV instructions to initialize the DPAS accumulator registers.
1 parent e3333c9 commit f1de9f5

File tree

2 files changed

+81
-4
lines changed

2 files changed

+81
-4
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2023 Intel Corporation
3+
Copyright (C) 2017-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -198,6 +198,7 @@ class GenXPatternMatch : public FunctionPass,
198198
bool simplifyCmp(CmpInst *Cmp);
199199
CmpInst *reduceCmpWidth(CmpInst *Cmp);
200200
bool simplifyNullDst(CallInst *Inst);
201+
bool simplifyDpasNullSrc(CallInst *Inst);
201202
// Transform logic operation with a mask from <N x iM> to <N/(32/M) x i32>
202203
bool extendMask(BinaryOperator *BO);
203204
bool mergeApply(CallInst *CI);
@@ -523,11 +524,20 @@ void GenXPatternMatch::visitBinaryOperator(BinaryOperator &I) {
523524
}
524525

525526
void GenXPatternMatch::visitCallInst(CallInst &I) {
526-
if (Kind == PatternMatchKind::PostLegalization) {
527-
return;
527+
auto IID = vc::getAnyIntrinsicID(&I);
528+
529+
switch (IID) {
530+
default:
531+
break;
532+
case GenXIntrinsic::genx_dpas:
533+
case GenXIntrinsic::genx_dpas2:
534+
Changed |= simplifyDpasNullSrc(&I);
535+
break;
528536
}
529537

530-
auto IID = vc::getAnyIntrinsicID(&I);
538+
if (Kind == PatternMatchKind::PostLegalization)
539+
return;
540+
531541
switch (IID) {
532542
default:
533543
break;
@@ -3719,6 +3729,47 @@ bool GenXPatternMatch::simplifyNullDst(CallInst *Inst) {
37193729
return false;
37203730
}
37213731

3732+
bool GenXPatternMatch::simplifyDpasNullSrc(CallInst *Inst) {
3733+
auto IID = vc::getAnyIntrinsicID(Inst);
3734+
IGC_ASSERT_EXIT(IID == GenXIntrinsic::genx_dpas ||
3735+
IID == GenXIntrinsic::genx_dpas2);
3736+
3737+
auto *Acc = dyn_cast<Constant>(Inst->getArgOperand(0));
3738+
if (!Acc || !Acc->isZeroValue())
3739+
return false;
3740+
3741+
IRBuilder<> Builder(Inst);
3742+
3743+
// Accumulator input is constant zero, so we could use dpas_nosrc0 intrinsic
3744+
auto *Src1 = Inst->getArgOperand(1);
3745+
auto *Src2 = Inst->getArgOperand(2);
3746+
3747+
Value *Desc = Inst->getArgOperand(3);
3748+
if (IID == GenXIntrinsic::genx_dpas2) {
3749+
auto Src1Precision =
3750+
cast<ConstantInt>(Inst->getArgOperand(3))->getZExtValue() & 0xff;
3751+
auto Src2Precision =
3752+
cast<ConstantInt>(Inst->getArgOperand(4))->getZExtValue() & 0xff;
3753+
auto SystolicDepth =
3754+
cast<ConstantInt>(Inst->getArgOperand(5))->getZExtValue() & 0xff;
3755+
auto RepeatCount =
3756+
cast<ConstantInt>(Inst->getArgOperand(6))->getZExtValue() & 0xff;
3757+
3758+
Desc = Builder.getInt32(Src1Precision | Src2Precision << 8 |
3759+
SystolicDepth << 16 | RepeatCount << 24);
3760+
}
3761+
3762+
auto *Func = vc::getAnyDeclaration(
3763+
Inst->getModule(), GenXIntrinsic::genx_dpas_nosrc0,
3764+
{Inst->getType(), Src1->getType(), Src2->getType()});
3765+
auto *NewCI = Builder.CreateCall(Func, {Src1, Src2, Desc});
3766+
NewCI->takeName(Inst);
3767+
Inst->replaceAllUsesWith(NewCI);
3768+
Inst->eraseFromParent();
3769+
3770+
return true;
3771+
}
3772+
37223773
bool canExtendMask(BinaryOperator *BO) {
37233774
Type *InstTy = BO->getType();
37243775
auto Op0 = dyn_cast<ConstantDataVector>(BO->getOperand(0));
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: %opt %use_old_pass_manager% -GenXPatternMatch -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck %s
10+
11+
declare <16 x i32> @llvm.genx.dpas.v16i32.v16i32.v128i32.v8i32(<16 x i32>, <128 x i32>, <8 x i32>, i32)
12+
declare <16 x i32> @llvm.genx.dpas2.v16i32.v16i32.v128i32.v8i32(<16 x i32>, <128 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32)
13+
14+
; CHECK-LABEL: @dpas(
15+
define <16 x i32> @dpas(<128 x i32> %src1, <8 x i32> %src2) {
16+
; CHECK: %res = call <16 x i32> @llvm.genx.dpas.nosrc0.v16i32.v128i32.v8i32(<128 x i32> %src1, <8 x i32> %src2, i32 17303560)
17+
%res = call <16 x i32> @llvm.genx.dpas.v16i32.v16i32.v128i32.v8i32(<16 x i32> zeroinitializer, <128 x i32> %src1, <8 x i32> %src2, i32 17303560)
18+
ret <16 x i32> %res
19+
}
20+
21+
; CHECK-LABEL: @dpas2(
22+
define <16 x i32> @dpas2(<128 x i32> %src1, <8 x i32> %src2) {
23+
; CHECK: %res = call <16 x i32> @llvm.genx.dpas.nosrc0.v16i32.v128i32.v8i32(<128 x i32> %src1, <8 x i32> %src2, i32 17303560)
24+
%res = call <16 x i32> @llvm.genx.dpas2.v16i32.v16i32.v128i32.v8i32(<16 x i32> zeroinitializer, <128 x i32> %src1, <8 x i32> %src2, i32 8, i32 8, i32 8, i32 1, i32 1, i32 1)
25+
ret <16 x i32> %res
26+
}

0 commit comments

Comments
 (0)