Skip to content

Recommit "[Clang][RISCV] Introduce tuple types for RVV bfloat16 #72216" #72370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions clang/include/clang/Basic/RISCVVTypes.def
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,62 @@ RVV_VECTOR_TYPE_FLOAT("__rvv_float64m2x4_t", RvvFloat64m2x4, RvvFloat64m2x4Ty, 2

RVV_VECTOR_TYPE_FLOAT("__rvv_float64m4x2_t", RvvFloat64m4x2, RvvFloat64m4x2Ty, 4, 64, 2)

//===- BFloat16 tuple types -------------------------------------------------===//
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x2_t", RvvBFloat16mf4x2, RvvBFloat16mf4x2Ty,
1, 16, 2)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x3_t", RvvBFloat16mf4x3, RvvBFloat16mf4x3Ty,
1, 16, 3)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x4_t", RvvBFloat16mf4x4, RvvBFloat16mf4x4Ty,
1, 16, 4)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x5_t", RvvBFloat16mf4x5, RvvBFloat16mf4x5Ty,
1, 16, 5)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x6_t", RvvBFloat16mf4x6, RvvBFloat16mf4x6Ty,
1, 16, 6)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x7_t", RvvBFloat16mf4x7, RvvBFloat16mf4x7Ty,
1, 16, 7)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x8_t", RvvBFloat16mf4x8, RvvBFloat16mf4x8Ty,
1, 16, 8)

RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x2_t", RvvBFloat16mf2x2, RvvBFloat16mf2x2Ty,
2, 16, 2)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x3_t", RvvBFloat16mf2x3, RvvBFloat16mf2x3Ty,
2, 16, 3)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x4_t", RvvBFloat16mf2x4, RvvBFloat16mf2x4Ty,
2, 16, 4)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x5_t", RvvBFloat16mf2x5, RvvBFloat16mf2x5Ty,
2, 16, 5)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x6_t", RvvBFloat16mf2x6, RvvBFloat16mf2x6Ty,
2, 16, 6)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x7_t", RvvBFloat16mf2x7, RvvBFloat16mf2x7Ty,
2, 16, 7)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x8_t", RvvBFloat16mf2x8, RvvBFloat16mf2x8Ty,
2, 16, 8)

RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x2_t", RvvBFloat16m1x2, RvvBFloat16m1x2Ty,
4, 16, 2)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x3_t", RvvBFloat16m1x3, RvvBFloat16m1x3Ty,
4, 16, 3)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x4_t", RvvBFloat16m1x4, RvvBFloat16m1x4Ty,
4, 16, 4)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x5_t", RvvBFloat16m1x5, RvvBFloat16m1x5Ty,
4, 16, 5)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x6_t", RvvBFloat16m1x6, RvvBFloat16m1x6Ty,
4, 16, 6)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x7_t", RvvBFloat16m1x7, RvvBFloat16m1x7Ty,
4, 16, 7)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x8_t", RvvBFloat16m1x8, RvvBFloat16m1x8Ty,
4, 16, 8)

RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2x2_t", RvvBFloat16m2x2, RvvBFloat16m2x2Ty,
8, 16, 2)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2x3_t", RvvBFloat16m2x3, RvvBFloat16m2x3Ty,
8, 16, 3)
RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2x4_t", RvvBFloat16m2x4, RvvBFloat16m2x4Ty,
8, 16, 4)

RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m4x2_t", RvvBFloat16m4x2, RvvBFloat16m4x2Ty,
16, 16, 2)

#undef RVV_VECTOR_TYPE_BFLOAT
#undef RVV_VECTOR_TYPE_FLOAT
#undef RVV_VECTOR_TYPE_INT
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ enum PredefinedTypeIDs {
///
/// Type IDs for non-predefined types will start at
/// NUM_PREDEF_TYPE_IDs.
const unsigned NUM_PREDEF_TYPE_IDS = 500;
const unsigned NUM_PREDEF_TYPE_IDS = 502;
Copy link
Collaborator

@topperc topperc Nov 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is 502 ok, but 600 is bad?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not the problem of 502 or 600. The test case failure in clang/test/Modules/decl-params-determinisim.m is because it is expecting the original TypeID before change, and updating the capacity NUM_PREDEF_TYPE_IDS here changes the result and so the test case needs to be updated.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I extended NUM_PREDEF_TYPE_IDS to the least required, and left a note inside the test case to let future developers be aware that the test case it intertwine with the constant.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then your commit message should have explained this better and not said "indeterminate results"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes you are right. My mistake here. Update the PR description.


// Ensure we do not overrun the predefined types we reserved
// in the enum PredefinedTypeIDs above.
Expand Down
5 changes: 3 additions & 2 deletions clang/include/clang/Support/RISCVVIntrinsicUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,14 @@ enum class TypeModifier : uint8_t {
UnsignedInteger = 1 << 3,
SignedInteger = 1 << 4,
Float = 1 << 5,
BFloat = 1 << 6,
// LMUL1 should be kind of VectorTypeModifier, but that might come with
// Widening2XVector for widening reduction.
// However that might require VectorTypeModifier become bitmask rather than
// simple enum, so we decide keek LMUL1 in TypeModifier for code size
// optimization of clang binary size.
LMUL1 = 1 << 6,
MaxOffset = 6,
LMUL1 = 1 << 7,
MaxOffset = 7,
LLVM_MARK_AS_BITMASK_ENUM(LMUL1),
};

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Support/RISCVVIntrinsicUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,9 @@ void RVVType::applyModifier(const PrototypeDescriptor &Transformer) {
case TypeModifier::Float:
ScalarType = ScalarTypeKind::Float;
break;
case TypeModifier::BFloat:
ScalarType = ScalarTypeKind::BFloat;
break;
case TypeModifier::LMUL1:
LMUL = LMULType(0);
// Update ElementBitwidth need to update Scale too.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,31 @@
// CHECK-NEXT: [[F64M2X3:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 8
// CHECK-NEXT: [[F64M2X4:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 8
// CHECK-NEXT: [[F64M4X2:%.*]] = alloca { <vscale x 4 x double>, <vscale x 4 x double> }, align 8
// CHECK-NEXT: [[BF16MF4X2:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF4X3:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF4X4:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF4X5:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF4X6:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF4X7:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF4X8:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X2:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X3:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X4:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X5:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X6:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X7:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16MF2X8:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X2:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X3:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X4:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X5:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X6:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X7:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M1X8:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M2X2:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M2X3:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M2X4:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 2
// CHECK-NEXT: [[BF16M4X2:%.*]] = alloca { <vscale x 16 x bfloat>, <vscale x 16 x bfloat> }, align 2
// CHECK-NEXT: ret void
//
void foo () {
Expand Down Expand Up @@ -664,4 +689,34 @@ void foo () {
vfloat64m2x4_t f64m2x4;

vfloat64m4x2_t f64m4x2;
// bf16
vbfloat16mf4x2_t bf16mf4x2;
vbfloat16mf4x3_t bf16mf4x3;
vbfloat16mf4x4_t bf16mf4x4;
vbfloat16mf4x5_t bf16mf4x5;
vbfloat16mf4x6_t bf16mf4x6;
vbfloat16mf4x7_t bf16mf4x7;
vbfloat16mf4x8_t bf16mf4x8;

vbfloat16mf2x2_t bf16mf2x2;
vbfloat16mf2x3_t bf16mf2x3;
vbfloat16mf2x4_t bf16mf2x4;
vbfloat16mf2x5_t bf16mf2x5;
vbfloat16mf2x6_t bf16mf2x6;
vbfloat16mf2x7_t bf16mf2x7;
vbfloat16mf2x8_t bf16mf2x8;

vbfloat16m1x2_t bf16m1x2;
vbfloat16m1x3_t bf16m1x3;
vbfloat16m1x4_t bf16m1x4;
vbfloat16m1x5_t bf16m1x5;
vbfloat16m1x6_t bf16m1x6;
vbfloat16m1x7_t bf16m1x7;
vbfloat16m1x8_t bf16m1x8;

vbfloat16m2x2_t bf16m2x2;
vbfloat16m2x3_t bf16m2x3;
vbfloat16m2x4_t bf16m2x4;

vbfloat16m4x2_t bf16m4x2;
}
21 changes: 13 additions & 8 deletions clang/test/Modules/decl-params-determinisim.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,30 @@

/// Spot check entries to make sure they are in current ordering.
/// op13 encodes the anonymous decl number which should be in order.

/// NOTE: This test case is on determinism of TypeID for function declaration.
/// Change related to TypeID (or PredefinedTypeIDs) will affect the result and
/// will require update for this test case.

// CHECK: <TYPE_FUNCTION_PROTO
// CHECK-NEXT: <DECL_PARM_VAR
// CHECK-SAME: op11=4024
// CHECK-NEXT: <DECL_PARM_VAR
// CHECK-SAME: op11=4032
// CHECK-NEXT: <DECL_PARM_VAR
// CHECK-SAME: op11=4040
// CHECK-NEXT: <DECL_PARM_VAR
// CHECK-SAME: op11=4048
// CHECK-NEXT: <DECL_PARM_VAR
// CHECK-SAME: op11=4056
// CHECK-NEXT: <DECL_PARM_VAR
// CHECK-SAME: op11=4064

/// Decl records start at 43
// CHECK: <DECL_RECORD
// CHECK-SAME: op9=4352
// CHECK-NEXT: <DECL_RECORD
// CHECK-SAME: op9=4360
// CHECK-NEXT: <DECL_RECORD
// CHECK-SAME: op9=4368
// CHECK-NEXT: <DECL_RECORD
// CHECK-SAME: op9=4376
// CHECK-NEXT: <DECL_RECORD
// CHECK-SAME: op9=4384
// CHECK-NEXT: <DECL_RECORD
// CHECK-SAME: op9=4392

//--- headers/a.h
void f(struct A0 *a0,
Expand Down
4 changes: 3 additions & 1 deletion clang/utils/TableGen/RISCVVEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,9 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
auto TupleT = TypeCache.computeType(
BT, Log2LMUL,
PrototypeDescriptor(BaseTypeModifier::Vector, getTupleVTM(NF),
TypeModifier::Float));
(BT == BasicType::BFloat16
? TypeModifier::BFloat
: TypeModifier::Float)));
if (TupleT)
printType(*TupleT);
}
Expand Down