|
11 | 11 | //===----------------------------------------------------------------------===//
|
12 | 12 | //
|
13 | 13 | // This file defines the Syntax type, the main public-facing classes and
|
14 |
| -// subclasses for dealing with Swift Syntax. |
| 14 | +// subclasses for dealing with Swift Syntax. It essentially wraps |
| 15 | +// SyntaxData(Ref) and provides convenience APIs (like retrieving children) |
| 16 | +// based on the syntax kind. |
15 | 17 | //
|
16 |
| -// Syntax types contain a strong reference to the root of the tree to keep |
17 |
| -// the subtree above alive, and a weak reference to the data representing |
18 |
| -// the syntax node (weak to prevent retain cycles). All significant public API |
19 |
| -// are contained in Syntax and its subclasses. |
| 18 | +// There are two versions of the Syntax type. |
| 19 | +// SyntaxRef: |
| 20 | +// SyntaxRef is designed around efficiency. It *does not* retain the |
| 21 | +// SyntaxDataRef that stores its data - the user must gurantee that the |
| 22 | +// SyntaxDataRef outlives the SyntaxRef that references it. Instead, |
| 23 | +// SyntaxDataRef provides a *view* into the SyntaxDataRef and the view provides |
| 24 | +// all convinience APIs. The advantage of this is that the underlying SyntaxData |
| 25 | +// can be stack-allocated and does not need to be copied when the the SyntaxRef |
| 26 | +// is being passsed around or when the SyntaxRef is being casted. |
20 | 27 | //
|
| 28 | +// Syntax: |
| 29 | +// The syntax nodes are designed for memory safety. Syntax nodes always retain |
| 30 | +// (and ref-count) heap-allocated SyntaxData nodes. While this provides maximum |
| 31 | +// memory safety, the heap allocations and the ref-counting has a significant |
| 32 | +// performance overhead. |
| 33 | +// |
| 34 | +// Note that the two access modes can also be mixed. When a syntax tree is |
| 35 | +// accessed by Syntax (memory-safe) nodes, they can be demoted to SyntaxRef |
| 36 | +// nodes to perform perfomance-critical tasks. |
21 | 37 | //===----------------------------------------------------------------------===//
|
22 | 38 |
|
23 | 39 | #ifndef SWIFT_SYNTAX_SYNTAX_H
|
@@ -47,19 +63,137 @@ SyntaxNode makeRoot(const RawSyntax *Raw) {
|
47 | 63 |
|
48 | 64 | const auto NoParent = llvm::None;
|
49 | 65 |
|
50 |
| -/// The main handle for syntax nodes - subclasses contain all public |
51 |
| -/// structured editing APIs. |
| 66 | +/// Marker type to construct \c SyntaxRef nodes without validation. This is used |
| 67 | +/// to create \c SyntaxRef inside \c OwnedSyntaxRef and \c |
| 68 | +/// OptionalOwnedSyntaxRef that point to a \c SyntaxDataRef which is yet to be |
| 69 | +/// initialised. |
| 70 | +/// Validation will occur in these types once the \c SyntaxRef is accessed. |
| 71 | +struct no_validation_t {}; |
| 72 | + |
| 73 | +// MARK: - OwnedSyntaxRef |
| 74 | + |
| 75 | +/// Holds a \c SyntaxDataRef and provides a \c SyntaxRef (or one of its |
| 76 | +/// subclasses) as an accessor to the \c SyntaxDataRef. |
| 77 | +/// The user of this type needs to make sure that the \c OwnedSyntaxRef always |
| 78 | +/// outlives the \c SyntaxRef provided by it, because otherwise the \c SyntaxRef |
| 79 | +/// points to invalid memory. |
| 80 | +/// It allows transparent access to the \c SyntaxRef through the \c -> operator. |
| 81 | +/// |
| 82 | +/// All methods that return a \c OwnedSyntaxRef should be inlined to avoid |
| 83 | +/// copying the \c SyntaxDataRef, which is rather expensive because the struct |
| 84 | +/// is rather large. |
52 | 85 | ///
|
53 |
| -/// Essentially, this is a wrapper around \c SyntaxData that provides |
54 |
| -/// convenience methods based on the node's kind. |
| 86 | +/// A typical initialisation of a OwnedSyntaxRef looks as follows: |
| 87 | +/// \code |
| 88 | +/// OwnedSyntaxRef<MySyntaxRef> Result; |
| 89 | +/// someSyntaxDataRef.getChildRef(Index, Result.getDataPtr()); |
| 90 | +/// \endcode |
| 91 | +/// The first line creates an empty \c OwnedSyntaxRef with uninitialised memory. |
| 92 | +/// The second line invokes a method that fills \c Data of \c OwnedSyntaxRef. |
| 93 | +/// This way, we directly write the \c SyntaxDataRef to the correct memory |
| 94 | +/// location and avoid copying it around. |
| 95 | +template <typename SyntaxRefType> |
| 96 | +class OwnedSyntaxRef { |
| 97 | + SyntaxDataRef Data; |
| 98 | + SyntaxRefType Ref; |
| 99 | + |
| 100 | +public: |
| 101 | + /// Create an *uninintialized* \c OwnedSyntaxRef. Its storage needs to be |
| 102 | + /// initialised by writing a \c SyntaxDataRef to the pointer returned by |
| 103 | + /// \c getDataPtr() |
| 104 | + /// Implementation Note: We need to initialise \c Ref without validation, |
| 105 | + /// because \c Data is still uninitialised. \c Ref will be validated when |
| 106 | + /// accessed using \c getRef or \c -> . |
| 107 | + OwnedSyntaxRef() : Data(), Ref(getDataPtr(), no_validation_t()) {} |
| 108 | + |
| 109 | + OwnedSyntaxRef(const OwnedSyntaxRef &Other) |
| 110 | + : Data(Other.Data), Ref(getDataPtr(), no_validation_t()) {} |
| 111 | + OwnedSyntaxRef(OwnedSyntaxRef &&Other) |
| 112 | + : Data(std::move(Other.Data)), Ref(getDataPtr(), no_validation_t()) {} |
| 113 | + |
| 114 | + /// The pointer to the location at which \c this stores the \c Data. |
| 115 | + /// Can be used to retroactively populate the \c Data after \c OwnedSyntaxRef |
| 116 | + /// has been constructed with uninitialised memory. |
| 117 | + SyntaxDataRef *getDataPtr() { return &Data; } |
| 118 | + |
| 119 | + const SyntaxRefType &getRef() { |
| 120 | + assert(Ref.getDataRef() == getDataPtr() && |
| 121 | + "Ref no longer pointing to Data?"); |
| 122 | +#ifndef NDEBUG |
| 123 | + // This might be the first access to Ref after Data has been modified. |
| 124 | + // Validate the node. |
| 125 | + Ref.validate(); |
| 126 | +#endif |
| 127 | + return Ref; |
| 128 | + } |
| 129 | + |
| 130 | + const SyntaxRefType *operator->() { |
| 131 | + assert(Ref.getDataRef() == getDataPtr() && |
| 132 | + "Ref no longer pointing to Data?"); |
| 133 | +#ifndef NDEBUG |
| 134 | + // This might be the first access to Ref after Data has been modified. |
| 135 | + // Validate the node. |
| 136 | + Ref.validate(); |
| 137 | +#endif |
| 138 | + return &Ref; |
| 139 | + } |
| 140 | +}; |
| 141 | + |
| 142 | +/// Same as \c OwnedSyntaxRef but can be null. We don't use \c |
| 143 | +/// Optional<OwnedSyntaxRef<SyntaxRefType>>>, because then we couldn't access |
| 144 | +/// the underlying \c SytnaxRefType via the \c -> operator (the use of \c -> |
| 145 | +/// would access the \c OwnedSyntaxRef<SyntaxRefType> wrapped by \c Optional and |
| 146 | +/// not the \c SyntaxRefType wrapped by \c OwnedSyntaxRef. |
| 147 | +template <typename SyntaxRefType> |
| 148 | +class OptionalOwnedSyntaxRef { |
| 149 | + Optional<SyntaxDataRef> Data; |
| 150 | + SyntaxRefType Ref; |
| 151 | + |
| 152 | +public: |
| 153 | + OptionalOwnedSyntaxRef() : Data(), Ref(getDataPtr(), no_validation_t()) {} |
| 154 | + |
| 155 | + OptionalOwnedSyntaxRef(const OptionalOwnedSyntaxRef &Other) |
| 156 | + : Data(Other.Data), Ref(getDataPtr(), no_validation_t()) {} |
| 157 | + OptionalOwnedSyntaxRef(OptionalOwnedSyntaxRef &&Other) |
| 158 | + : Data(std::move(Other.Data)), Ref(getDataPtr(), no_validation_t()) {} |
| 159 | + |
| 160 | + SyntaxDataRef *getDataPtr() { return Data.getPointer(); } |
| 161 | + |
| 162 | + bool hasValue() const { return Data.hasValue(); } |
| 163 | + |
| 164 | + explicit operator bool() const { return hasValue(); } |
| 165 | + |
| 166 | + const SyntaxRefType &getRef() { |
| 167 | + assert(Ref.getDataRef() == getDataPtr() && |
| 168 | + "Ref no longer pointing to Data?"); |
| 169 | + assert(hasValue() && "Accessing a OptionalOwnedSyntaxRef without a value"); |
| 170 | +#ifndef NDEBUG |
| 171 | + // This might be the first access to Ref after Data has been populated. |
| 172 | + // Validate the node. |
| 173 | + Ref.validate(); |
| 174 | +#endif |
| 175 | + return Ref; |
| 176 | + } |
| 177 | + |
| 178 | + SyntaxRefType *operator->() { |
| 179 | + assert(Ref.getDataRef() == getDataPtr() && |
| 180 | + "Ref no longer pointing to Data?"); |
| 181 | + assert(hasValue() && "OptionalOwnedSyntaxRef doesn't have a value"); |
| 182 | + return &Ref; |
| 183 | + } |
| 184 | +}; |
| 185 | + |
| 186 | +// MARK: - Syntax |
| 187 | + |
| 188 | +/// See comment on top of file. |
55 | 189 | class Syntax {
|
56 | 190 | protected:
|
57 |
| - RC<const SyntaxData> Data; |
| 191 | + const RC<const SyntaxData> Data; |
58 | 192 |
|
59 | 193 | public:
|
60 |
| - explicit Syntax(const RC<const SyntaxData> &Data) : Data(Data) {} |
61 |
| - |
62 |
| - virtual ~Syntax() {} |
| 194 | + explicit Syntax(const RC<const SyntaxData> &Data) : Data(Data) { |
| 195 | + assert(Data != nullptr && "Syntax must be backed by non-null Data"); |
| 196 | + } |
63 | 197 |
|
64 | 198 | /// Get the kind of syntax.
|
65 | 199 | SyntaxKind getKind() const;
|
@@ -194,8 +328,196 @@ class Syntax {
|
194 | 328 | AbsoluteOffsetPosition getAbsoluteEndPositionAfterTrailingTrivia() const {
|
195 | 329 | return Data->getAbsoluteEndPositionAfterTrailingTrivia();
|
196 | 330 | }
|
| 331 | +}; |
| 332 | + |
| 333 | +// MARK: - SyntaxRef |
| 334 | + |
| 335 | +/// See comment on top of file. |
| 336 | +class SyntaxRef { |
| 337 | + const SyntaxDataRef * const Data; |
| 338 | + |
| 339 | +public: |
| 340 | + /// Create a \c SyntaxRef and validate that the \p Data can actually represent |
| 341 | + /// a \c SyntaxRef. Validation in particular performs checks for derived |
| 342 | + /// types. |
| 343 | + explicit SyntaxRef(const SyntaxDataRef *Data) : Data(Data) { |
| 344 | + assert(Data != nullptr && "SyntaxRef must reference Data"); |
| 345 | + this->validate(); |
| 346 | + } |
| 347 | + SyntaxRef(const SyntaxDataRef *Data, no_validation_t) : Data(Data) { |
| 348 | + assert(Data != nullptr && "SyntaxRef must reference Data"); |
| 349 | + } |
| 350 | + |
| 351 | + /// Demote a \c Syntax to a \c SyntaxRef |
| 352 | + SyntaxRef(const Syntax &Node) : SyntaxRef(Node.getData().get()) {} |
| 353 | + |
| 354 | + void validate() {} |
| 355 | + |
| 356 | + // MARK: - Get underlying data |
| 357 | + |
| 358 | + /// Get the \c SyntaxDataRef that stores the data of this \c SyntaxRef node. |
| 359 | + const SyntaxDataRef *getDataRef() const { |
| 360 | + return Data; |
| 361 | + } |
| 362 | + |
| 363 | + const AbsoluteRawSyntax &getAbsoluteRaw() const { |
| 364 | + return getDataRef()->getAbsoluteRaw(); |
| 365 | + } |
| 366 | + |
| 367 | + /// Get the shared raw syntax. |
| 368 | + const RawSyntax *getRaw() const { return getDataRef()->getRaw(); } |
| 369 | + |
| 370 | + /// Get the kind of syntax. |
| 371 | + SyntaxKind getKind() const { return getRaw()->getKind(); } |
| 372 | + |
| 373 | + /// Get an ID for the \c RawSyntax node backing this \c Syntax which is |
| 374 | + /// stable across incremental parses. |
| 375 | + /// Note that this is different from the \c AbsoluteRawSyntax's \c NodeId, |
| 376 | + /// which uniquely identifies this node in the tree, but is not stable across |
| 377 | + /// incremental parses. |
| 378 | + SyntaxNodeId getId() const { return getRaw()->getId(); } |
| 379 | + |
| 380 | + /// Return the number of bytes this node takes when spelled out in the source, |
| 381 | + /// including trivia. |
| 382 | + size_t getTextLength() const { return getRaw()->getTextLength(); } |
| 383 | + |
| 384 | + // MARK: Parents/children |
| 385 | + |
| 386 | + /// Return the parent of this node, if it has one, otherwise return \c None. |
| 387 | + llvm::Optional<SyntaxRef> getParentRef() const { |
| 388 | + if (auto ParentDataRef = getDataRef()->getParentRef()) { |
| 389 | + return SyntaxRef(ParentDataRef); |
| 390 | + } else { |
| 391 | + return None; |
| 392 | + } |
| 393 | + } |
| 394 | + |
| 395 | + /// Get the number of child nodes in this piece of syntax. |
| 396 | + size_t getNumChildren() const { return getDataRef()->getNumChildren(); } |
| 397 | + |
| 398 | + /// Returns the child index of this node in its parent, if it has one, |
| 399 | + /// otherwise 0. |
| 400 | + CursorIndex getIndexInParent() const { |
| 401 | + return getDataRef()->getIndexInParent(); |
| 402 | + } |
| 403 | + |
| 404 | + /// Get the \p N -th child of this piece of syntax. |
| 405 | + OptionalOwnedSyntaxRef<SyntaxRef> getChildRef(const size_t N) const { |
| 406 | + OptionalOwnedSyntaxRef<SyntaxRef> Result; |
| 407 | + getDataRef()->getChildRef(N, Result.getDataPtr()); |
| 408 | + return Result; |
| 409 | + } |
| 410 | + |
| 411 | + // MARK: Position |
| 412 | + |
| 413 | + /// Get the offset at which the leading trivia of this node starts. |
| 414 | + AbsoluteOffsetPosition getAbsolutePositionBeforeLeadingTrivia() const { |
| 415 | + return getDataRef()->getAbsolutePositionBeforeLeadingTrivia(); |
| 416 | + } |
| 417 | + |
| 418 | + /// Get the offset at which the actual content (i.e. non-triva) of this node |
| 419 | + /// starts. |
| 420 | + AbsoluteOffsetPosition getAbsolutePositionAfterLeadingTrivia() const { |
| 421 | + return getDataRef()->getAbsolutePositionAfterLeadingTrivia(); |
| 422 | + } |
197 | 423 |
|
198 |
| - // TODO: hasSameStructureAs ? |
| 424 | + /// Get the offset at which the trailing trivia of this node starts. |
| 425 | + AbsoluteOffsetPosition getAbsoluteEndPositionBeforeTrailingTrivia() const { |
| 426 | + return getDataRef()->getAbsoluteEndPositionBeforeTrailingTrivia(); |
| 427 | + } |
| 428 | + |
| 429 | + /// Get the offset at which the trailing trivia of this node ends. |
| 430 | + AbsoluteOffsetPosition getAbsoluteEndPositionAfterTrailingTrivia() const { |
| 431 | + return getDataRef()->getAbsoluteEndPositionAfterTrailingTrivia(); |
| 432 | + } |
| 433 | + |
| 434 | + // MARK: - Get node kind |
| 435 | + |
| 436 | + /// Returns true if this syntax node represents a token. |
| 437 | + bool isToken() const { return getRaw()->isToken(); } |
| 438 | + |
| 439 | + /// Returns true if this syntax node represents a statement. |
| 440 | + bool isStmt() const { return getRaw()->isStmt(); } |
| 441 | + |
| 442 | + /// Returns true if this syntax node represents a declaration. |
| 443 | + bool isDecl() const { return getRaw()->isDecl(); } |
| 444 | + |
| 445 | + /// Returns true if this syntax node represents an expression. |
| 446 | + bool isExpr() const { return getRaw()->isExpr(); } |
| 447 | + |
| 448 | + /// Returns true if this syntax node represents a pattern. |
| 449 | + bool isPattern() const { return getRaw()->isPattern(); } |
| 450 | + |
| 451 | + /// Returns true if this syntax node represents a type. |
| 452 | + bool isType() const { return getRaw()->isType(); } |
| 453 | + |
| 454 | + /// Returns true if this syntax is of some "unknown" kind. |
| 455 | + bool isUnknown() const { return getRaw()->isUnknown(); } |
| 456 | + |
| 457 | + /// Returns true if the node is "missing" in the source (i.e. it was |
| 458 | + /// expected (or optional) but not written. |
| 459 | + bool isMissing() const { return getRaw()->isMissing(); } |
| 460 | + |
| 461 | + /// Returns true if the node is "present" in the source. |
| 462 | + bool isPresent() const { return getRaw()->isPresent(); } |
| 463 | + |
| 464 | + // MARK: Casting |
| 465 | + |
| 466 | + /// Returns true if the syntax node is of the given type \p T. |
| 467 | + template <typename T> |
| 468 | + bool is() const { |
| 469 | + return T::classof(this); |
| 470 | + } |
| 471 | + |
| 472 | + /// Cast this Syntax node to a more specific type, asserting it's of the |
| 473 | + /// right kind \p T. |
| 474 | + template <typename T> |
| 475 | + T castTo() const { |
| 476 | + assert(is<T>() && "castTo<T>() node of incompatible type!"); |
| 477 | + return T(getDataRef()); |
| 478 | + } |
| 479 | + |
| 480 | + /// If this Syntax node is of the right kind \p T, cast and return it, |
| 481 | + /// otherwise return None. |
| 482 | + template <typename T> |
| 483 | + llvm::Optional<T> getAs() const { |
| 484 | + if (is<T>()) { |
| 485 | + return castTo<T>(); |
| 486 | + } else { |
| 487 | + return None; |
| 488 | + } |
| 489 | + } |
| 490 | + |
| 491 | + static bool kindof(SyntaxKind Kind) { return true; } |
| 492 | + |
| 493 | + static bool classof(const SyntaxRef *S) { |
| 494 | + // Trivially true. |
| 495 | + return true; |
| 496 | + } |
| 497 | + |
| 498 | + // MARK: - Miscellaneous |
| 499 | + |
| 500 | + /// Print the syntax node with full fidelity to the given output stream. |
| 501 | + void print(llvm::raw_ostream &OS, |
| 502 | + SyntaxPrintOptions Opts = SyntaxPrintOptions()) const { |
| 503 | + if (auto Raw = getRaw()) { |
| 504 | + Raw->print(OS, Opts); |
| 505 | + } |
| 506 | + } |
| 507 | + |
| 508 | + /// Print a debug representation of the syntax node to the given output stream |
| 509 | + /// and indentation level. |
| 510 | + void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const { |
| 511 | + getRaw()->dump(OS, Indent); |
| 512 | + } |
| 513 | + |
| 514 | + /// Print a debug representation of the syntax node to standard error. |
| 515 | + SWIFT_DEBUG_DUMP { getRaw()->dump(); } |
| 516 | + |
| 517 | + bool hasSameIdentityAs(const SyntaxRef &Other) const { |
| 518 | + return getDataRef()->getAbsoluteRaw().getNodeId() == |
| 519 | + Other.getDataRef()->getAbsoluteRaw().getNodeId(); |
| 520 | + } |
199 | 521 | };
|
200 | 522 |
|
201 | 523 | } // end namespace syntax
|
|
0 commit comments