Skip to content

Commit 7f409cd

Browse files
authored
[Object][Wasm] Allow parsing of GC types in type and table sections (#79235)
This change allows a WasmObjectFile to be created from a wasm file even if it uses typed funcrefs and GC types. It does not significantly change how lib/Object models its various internal types (e.g. WasmSignature, WasmElemSegment), so LLVM does not really "support" or understand such files, but it is sufficient to parse the type, global and element sections, discarding types that are not understood. This is useful for low-level binary tools such as nm and objcopy, which use only limited aspects of the binary (such as function definitions) or deal with sections as opaque blobs. This is done by allowing `WasmValType` to have a value of `OTHERREF` (representing any unmodeled reference type), and adding a field to `WasmSignature` indicating it's a placeholder for an unmodeled reference type (since there is a 1:1 correspondence between WasmSignature objects and types in the type section). Then the object file parsers for the type and element sections are expanded to parse encoded reference types and discard any unmodeled fields.
1 parent 7fdb932 commit 7f409cd

File tree

12 files changed

+333
-36
lines changed

12 files changed

+333
-36
lines changed

lld/wasm/InputChunks.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,13 @@ class InputFunction : public InputChunk {
259259
file->codeSection->Content.slice(inputSectionOffset, function->Size);
260260
debugName = function->DebugName;
261261
comdat = function->Comdat;
262+
assert(s.Kind != WasmSignature::Placeholder);
262263
}
263264

264265
InputFunction(StringRef name, const WasmSignature &s)
265-
: InputChunk(nullptr, InputChunk::Function, name), signature(s) {}
266+
: InputChunk(nullptr, InputChunk::Function, name), signature(s) {
267+
assert(s.Kind == WasmSignature::Function);
268+
}
266269

267270
static bool classof(const InputChunk *c) {
268271
return c->kind() == InputChunk::Function ||

lld/wasm/InputElement.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ class InputGlobal : public InputElement {
7676
class InputTag : public InputElement {
7777
public:
7878
InputTag(const WasmSignature &s, const WasmTag &t, ObjFile *f)
79-
: InputElement(t.SymbolName, f), signature(s) {}
79+
: InputElement(t.SymbolName, f), signature(s) {
80+
assert(s.Kind == WasmSignature::Tag);
81+
}
8082

8183
const WasmSignature &signature;
8284
};

lld/wasm/InputFiles.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
8181
std::unique_ptr<Binary> bin =
8282
CHECK(createBinary(mb), mb.getBufferIdentifier());
8383
auto *obj = cast<WasmObjectFile>(bin.get());
84+
if (obj->hasUnmodeledTypes())
85+
fatal(toString(mb.getBufferIdentifier()) +
86+
"file has unmodeled reference or GC types");
8487
if (obj->isSharedObject())
8588
return make<SharedFile>(mb);
8689
return make<ObjFile>(mb, archiveName, lazy);

lld/wasm/WriterUtils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ std::string toString(ValType type) {
3535
return "funcref";
3636
case ValType::EXTERNREF:
3737
return "externref";
38+
case ValType::OTHERREF:
39+
return "otherref";
3840
}
3941
llvm_unreachable("Invalid wasm::ValType");
4042
}

llvm/include/llvm/BinaryFormat/Wasm.h

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,25 @@ enum : unsigned {
5656
WASM_TYPE_F32 = 0x7D,
5757
WASM_TYPE_F64 = 0x7C,
5858
WASM_TYPE_V128 = 0x7B,
59+
WASM_TYPE_NULLFUNCREF = 0x73,
60+
WASM_TYPE_NULLEXTERNREF = 0x72,
61+
WASM_TYPE_NULLREF = 0x71,
5962
WASM_TYPE_FUNCREF = 0x70,
6063
WASM_TYPE_EXTERNREF = 0x6F,
64+
WASM_TYPE_ANYREF = 0x6E,
65+
WASM_TYPE_EQREF = 0x6D,
66+
WASM_TYPE_I31REF = 0x6C,
67+
WASM_TYPE_STRUCTREF = 0x6B,
68+
WASM_TYPE_ARRAYREF = 0x6A,
69+
WASM_TYPE_EXNREF = 0x69,
70+
WASM_TYPE_NONNULLABLE = 0x64,
71+
WASM_TYPE_NULLABLE = 0x63,
6172
WASM_TYPE_FUNC = 0x60,
73+
WASM_TYPE_ARRAY = 0x5E,
74+
WASM_TYPE_STRUCT = 0x5F,
75+
WASM_TYPE_SUB = 0x50,
76+
WASM_TYPE_SUB_FINAL = 0x4F,
77+
WASM_TYPE_REC = 0x4E,
6278
WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
6379
};
6480

@@ -93,6 +109,20 @@ enum : unsigned {
93109
WASM_OPCODE_I64_SUB = 0x7d,
94110
WASM_OPCODE_I64_MUL = 0x7e,
95111
WASM_OPCODE_REF_NULL = 0xd0,
112+
WASM_OPCODE_REF_FUNC = 0xd2,
113+
WASM_OPCODE_GC_PREFIX = 0xfb,
114+
};
115+
116+
// Opcodes in the GC-prefixed space (0xfb)
117+
enum : unsigned {
118+
WASM_OPCODE_STRUCT_NEW = 0x00,
119+
WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01,
120+
WASM_OPCODE_ARRAY_NEW = 0x06,
121+
WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07,
122+
WASM_OPCODE_ARRAY_NEW_FIXED = 0x08,
123+
WASM_OPCODE_REF_I31 = 0x1c,
124+
// any.convert_extern and extern.convert_any don't seem to be supported by
125+
// Binaryen.
96126
};
97127

98128
// Opcodes used in synthetic functions.
@@ -127,7 +157,8 @@ enum : unsigned {
127157

128158
enum : unsigned {
129159
WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01,
130-
WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02,
160+
WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1
161+
WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
131162
WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
132163
};
133164
const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3;
@@ -229,6 +260,9 @@ enum class ValType {
229260
V128 = WASM_TYPE_V128,
230261
FUNCREF = WASM_TYPE_FUNCREF,
231262
EXTERNREF = WASM_TYPE_EXTERNREF,
263+
// Unmodeled value types include ref types with heap types other than
264+
// func or extern, and type-specialized funcrefs
265+
OTHERREF = 0xff,
232266
};
233267

234268
struct WasmDylinkImportInfo {
@@ -297,6 +331,8 @@ struct WasmInitExprMVP {
297331
} Value;
298332
};
299333

334+
// Extended-const init exprs and exprs with GC types are not explicitly
335+
// modeled, but the raw body of the expr is attached.
300336
struct WasmInitExpr {
301337
uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
302338
// one instruction)
@@ -367,6 +403,11 @@ struct WasmDataSegment {
367403
uint32_t Comdat; // from the "comdat info" section
368404
};
369405

406+
// Represents a Wasm element segment, with some limitations compared the spec:
407+
// 1) Does not model passive or declarative segments (Segment will end up with
408+
// an Offset field of i32.const 0)
409+
// 2) Does not model init exprs (Segment will get an empty Functions list)
410+
// 2) Does not model types other than basic funcref/externref (see ValType)
370411
struct WasmElemSegment {
371412
uint32_t Flags;
372413
uint32_t TableNumber;
@@ -436,6 +477,10 @@ struct WasmLinkingData {
436477
struct WasmSignature {
437478
SmallVector<ValType, 1> Returns;
438479
SmallVector<ValType, 4> Params;
480+
// LLVM can parse types other than functions encoded in the type section,
481+
// but does not actually model them. Instead a placeholder signature is
482+
// created in the Object's signature list.
483+
enum { Function, Tag, Placeholder } Kind = Function;
439484
// Support empty and tombstone instances, needed by DenseMap.
440485
enum { Plain, Empty, Tombstone } State = Plain;
441486

llvm/include/llvm/Object/Wasm.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ class WasmSymbol {
3939
const wasm::WasmTableType *TableType,
4040
const wasm::WasmSignature *Signature)
4141
: Info(Info), GlobalType(GlobalType), TableType(TableType),
42-
Signature(Signature) {}
42+
Signature(Signature) {
43+
assert(!Signature || Signature->Kind != wasm::WasmSignature::Placeholder);
44+
}
4345

4446
const wasm::WasmSymbolInfo &Info;
4547
const wasm::WasmGlobalType *GlobalType;
@@ -209,6 +211,7 @@ class WasmObjectFile : public ObjectFile {
209211
Expected<SubtargetFeatures> getFeatures() const override;
210212
bool isRelocatableObject() const override;
211213
bool isSharedObject() const;
214+
bool hasUnmodeledTypes() const { return HasUnmodeledTypes; }
212215

213216
struct ReadContext {
214217
const uint8_t *Start;
@@ -291,6 +294,7 @@ class WasmObjectFile : public ObjectFile {
291294
bool HasLinkingSection = false;
292295
bool HasDylinkSection = false;
293296
bool HasMemory64 = false;
297+
bool HasUnmodeledTypes = false;
294298
wasm::WasmLinkingData LinkingData;
295299
uint32_t NumImportedGlobals = 0;
296300
uint32_t NumImportedTables = 0;

llvm/lib/MC/WasmObjectWriter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,8 @@ void WasmObjectWriter::writeTableSection(ArrayRef<wasm::WasmTable> Tables) {
972972

973973
encodeULEB128(Tables.size(), W->OS);
974974
for (const wasm::WasmTable &Table : Tables) {
975+
assert(Table.Type.ElemType != wasm::ValType::OTHERREF &&
976+
"Cannot encode general ref-typed tables");
975977
encodeULEB128((uint32_t)Table.Type.ElemType, W->OS);
976978
encodeULEB128(Table.Type.Limits.Flags, W->OS);
977979
encodeULEB128(Table.Type.Limits.Minimum, W->OS);

0 commit comments

Comments
 (0)