Skip to content

[HLSL][SPIRV][DXIL] Implement WaveActiveMax intrinsic #123428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4795,6 +4795,12 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
let Prototype = "unsigned int(bool)";
}

def HLSLWaveActiveMax : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_wave_active_max"];
let Attributes = [NoThrow, Const];
let Prototype = "void (...)";
}

def HLSLWaveActiveSum : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_wave_active_sum"];
let Attributes = [NoThrow, Const];
Expand Down
36 changes: 36 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19295,6 +19295,25 @@ static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
}
}

// Return wave active sum that corresponds to the QT scalar type
static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch,
CGHLSLRuntime &RT, QualType QT) {
switch (Arch) {
case llvm::Triple::spirv:
if (QT->isUnsignedIntegerType())
return llvm::Intrinsic::spv_wave_reduce_umax;
return llvm::Intrinsic::spv_wave_reduce_max;
case llvm::Triple::dxil: {
if (QT->isUnsignedIntegerType())
return llvm::Intrinsic::dx_wave_reduce_umax;
return llvm::Intrinsic::dx_wave_reduce_max;
}
default:
llvm_unreachable("Intrinsic WaveActiveMax"
" not supported by target architecture");
}
}

Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down Expand Up @@ -19624,6 +19643,23 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
/*AssumeConvergent=*/true),
ArrayRef{OpExpr}, "hlsl.wave.active.sum");
}
case Builtin::BI__builtin_hlsl_wave_active_max: {
// Due to the use of variadic arguments, explicitly retreive argument
Value *OpExpr = EmitScalarExpr(E->getArg(0));
llvm::FunctionType *FT = llvm::FunctionType::get(
OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
Intrinsic::ID IID = getWaveActiveMaxIntrinsic(
getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
E->getArg(0)->getType());

// Get overloaded name
std::string Name =
Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
/*Local=*/false,
/*AssumeConvergent=*/true),
ArrayRef{OpExpr}, "hlsl.wave.active.max");
}
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
// We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
// defined in SPIRVBuiltins.td. So instead we manually get the matching name
Expand Down
99 changes: 99 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -2468,6 +2468,105 @@ __attribute__((convergent)) double3 WaveReadLaneAt(double3, int32_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_read_lane_at)
__attribute__((convergent)) double4 WaveReadLaneAt(double4, int32_t);

//===----------------------------------------------------------------------===//
// WaveActiveMax builtins
//===----------------------------------------------------------------------===//

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) half WaveActiveMax(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) half2 WaveActiveMax(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) half3 WaveActiveMax(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) half4 WaveActiveMax(half4);

#ifdef __HLSL_ENABLE_16_BIT
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int16_t WaveActiveMax(int16_t);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int16_t2 WaveActiveMax(int16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int16_t3 WaveActiveMax(int16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int16_t4 WaveActiveMax(int16_t4);

_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint16_t WaveActiveMax(uint16_t);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint16_t2 WaveActiveMax(uint16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint16_t3 WaveActiveMax(uint16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint16_t4 WaveActiveMax(uint16_t4);
#endif

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int WaveActiveMax(int);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int2 WaveActiveMax(int2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int3 WaveActiveMax(int3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int4 WaveActiveMax(int4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint WaveActiveMax(uint);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint2 WaveActiveMax(uint2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint3 WaveActiveMax(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint4 WaveActiveMax(uint4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int64_t WaveActiveMax(int64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int64_t2 WaveActiveMax(int64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int64_t3 WaveActiveMax(int64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) int64_t4 WaveActiveMax(int64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint64_t WaveActiveMax(uint64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint64_t2 WaveActiveMax(uint64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint64_t3 WaveActiveMax(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) uint64_t4 WaveActiveMax(uint64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) float WaveActiveMax(float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) float2 WaveActiveMax(float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) float3 WaveActiveMax(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) float4 WaveActiveMax(float4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) double WaveActiveMax(double);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) double2 WaveActiveMax(double2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) double3 WaveActiveMax(double3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_max)
__attribute__((convergent)) double4 WaveActiveMax(double4);

//===----------------------------------------------------------------------===//
// WaveActiveSum builtins
//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2430,6 +2430,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
TheCall->setType(ArgTyA);
break;
}
case Builtin::BI__builtin_hlsl_wave_active_max:
case Builtin::BI__builtin_hlsl_wave_active_sum: {
if (SemaRef.checkArgCount(TheCall, 1))
return true;
Expand Down
46 changes: 46 additions & 0 deletions clang/test/CodeGenHLSL/builtins/WaveActiveMax.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV

// Test basic lowering to runtime function call.

// CHECK-LABEL: test_int
int test_int(int expr) {
// CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.max.i32([[TY]] %[[#]])
// CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.max.i32([[TY]] %[[#]])
// CHECK: ret [[TY]] %[[RET]]
return WaveActiveMax(expr);
}

// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.max.i32([[TY]]) #[[#attr:]]
// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.max.i32([[TY]]) #[[#attr:]]

// CHECK-LABEL: test_uint64_t
uint64_t test_uint64_t(uint64_t expr) {
// CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.umax.i64([[TY]] %[[#]])
// CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.umax.i64([[TY]] %[[#]])
// CHECK: ret [[TY]] %[[RET]]
return WaveActiveMax(expr);
}

// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.umax.i64([[TY]]) #[[#attr:]]
// CHECK-SPIRV: declare spir_func [[TY]] @llvm.spv.wave.reduce.umax.i64([[TY]]) #[[#attr:]]

// Test basic lowering to runtime function call with array and float value.

// CHECK-LABEL: test_floatv4
float4 test_floatv4(float4 expr) {
// CHECK-SPIRV: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.reduce.max.v4f32([[TY1]] %[[#]]
// CHECK-DXIL: %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.reduce.max.v4f32([[TY1]] %[[#]])
// CHECK: ret [[TY1]] %[[RET1]]
return WaveActiveMax(expr);
}

// CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]
// CHECK-SPIRV: declare spir_func [[TY1]] @llvm.spv.wave.reduce.max.v4f32([[TY1]]) #[[#attr]]

// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}

29 changes: 29 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/WaveActiveMax-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify

int test_too_few_arg() {
return __builtin_hlsl_wave_active_max();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
}

float2 test_too_many_arg(float2 p0) {
return __builtin_hlsl_wave_active_max(p0, p0);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
}

bool test_expr_bool_type_check(bool p0) {
return __builtin_hlsl_wave_active_max(p0);
// expected-error@-1 {{invalid operand of type 'bool'}}
}

bool2 test_expr_bool_vec_type_check(bool2 p0) {
return __builtin_hlsl_wave_active_max(p0);
// expected-error@-1 {{invalid operand of type 'bool2' (aka 'vector<bool, 2>')}}
}

struct S { float f; };

S test_expr_struct_type_check(S p0) {
return __builtin_hlsl_wave_active_max(p0);
// expected-error@-1 {{invalid operand of type 'S' where a scalar or vector is required}}
}

2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def int_dx_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1
def int_dx_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_usum : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ let TargetPrefix = "spv" in {
def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,16 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
IntrinArgIndex<0>, IntrinArgI8<WaveOpKind_Sum>,
IntrinArgI8<SignedOpKind_Unsigned>
]>,
IntrinSelect<int_dx_wave_reduce_max,
[
IntrinArgIndex<0>, IntrinArgI8<WaveOpKind_Max>,
IntrinArgI8<SignedOpKind_Signed>
]>,
IntrinSelect<int_dx_wave_reduce_umax,
[
IntrinArgIndex<0>, IntrinArgI8<WaveOpKind_Max>,
IntrinArgI8<SignedOpKind_Unsigned>
]>,
];

let arguments = [OverloadTy, Int8Ty, Int8Ty];
Expand All @@ -1008,7 +1018,7 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int16Ty, Int32Ty, Int64Ty]>
];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
let attributes = [Attributes<DXIL1_0, []>];
}

def WaveAllBitCount : DXILOp<135, waveAllOp> {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
switch (ID) {
case Intrinsic::dx_frac:
case Intrinsic::dx_rsqrt:
case Intrinsic::dx_wave_reduce_max:
case Intrinsic::dx_wave_reduce_umax:
case Intrinsic::dx_wave_reduce_sum:
case Intrinsic::dx_wave_reduce_usum:
case Intrinsic::dx_wave_readlane:
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectDot4AddPackedExpansion(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;

bool selectWaveReduceMax(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, bool IsUnsigned) const;

bool selectWaveReduceSum(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;

Expand Down Expand Up @@ -2132,6 +2135,34 @@ bool SPIRVInstructionSelector::selectWaveActiveCountBits(
return Result;
}

bool SPIRVInstructionSelector::selectWaveReduceMax(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I,
bool IsUnsigned) const {
assert(I.getNumOperands() == 3);
assert(I.getOperand(2).isReg());
MachineBasicBlock &BB = *I.getParent();
Register InputRegister = I.getOperand(2).getReg();
SPIRVType *InputType = GR.getSPIRVTypeForVReg(InputRegister);

if (!InputType)
report_fatal_error("Input Type could not be determined.");

SPIRVType *IntTy = GR.getOrCreateSPIRVIntegerType(32, I, TII);
// Retreive the operation to use based on input type
bool IsFloatTy = GR.isScalarOrVectorOfType(InputRegister, SPIRV::OpTypeFloat);
auto IntegerOpcodeType =
IsUnsigned ? SPIRV::OpGroupNonUniformUMax : SPIRV::OpGroupNonUniformSMax;
auto Opcode = IsFloatTy ? SPIRV::OpGroupNonUniformFMax : IntegerOpcodeType;
return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addUse(GR.getOrCreateConstInt(SPIRV::Scope::Subgroup, I, IntTy, TII))
.addImm(SPIRV::GroupOperation::Reduce)
.addUse(I.getOperand(2).getReg())
.constrainAllUses(TII, TRI, RBI);
}

bool SPIRVInstructionSelector::selectWaveReduceSum(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
Expand Down Expand Up @@ -3086,6 +3117,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectWaveOpInst(ResVReg, ResType, I, SPIRV::OpGroupNonUniformAny);
case Intrinsic::spv_wave_is_first_lane:
return selectWaveOpInst(ResVReg, ResType, I, SPIRV::OpGroupNonUniformElect);
case Intrinsic::spv_wave_reduce_umax:
return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ true);
case Intrinsic::spv_wave_reduce_max:
return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ false);
case Intrinsic::spv_wave_reduce_sum:
return selectWaveReduceSum(ResVReg, ResType, I);
case Intrinsic::spv_wave_readlane:
Expand Down
Loading
Loading