Skip to content

[5.0] Cherry pick swiftSyntax related changes to swift-5.0-branch #18792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Aug 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7899a22
[libSyntax] Enable serialization of syntax trees for incremental tran…
ahoppen Jul 16, 2018
28514cd
[libSyntax] Add incremental syntax tree deserialization to SwiftSyntax
ahoppen May 24, 2018
52274db
[incrParse] Add test cases to test the incremental syntax tree transfer
ahoppen Jul 20, 2018
9767e9f
[libSyntax] Add test case to match the incrementally transferred synt…
ahoppen Jul 25, 2018
345399c
[SourceKit] Fix compile error
ahoppen Jul 25, 2018
3de55b7
[swiftsyntax] Fix issue that caused swift-swiftsyntax-test to not be …
ahoppen Jul 26, 2018
9a6bf4f
[libSyntax] Make RawSyntax a struct
ahoppen May 24, 2018
6c8bbaf
[swiftSyntax] Add type annotations to speed up compile time
ahoppen Jun 26, 2018
b8342c7
[JSONSerialization] Introduce ScalarReferenceTraits
ahoppen Jun 1, 2018
6cc54d9
[JSONSerialization] Make key a StringRef
ahoppen Jun 1, 2018
b5e965e
[libSyntax] Remove the C++ SyntaxClassifier
ahoppen Jul 27, 2018
555bbe5
[libSyntax] Add a swift token classifier for syntax highlighting
ahoppen May 25, 2018
61469af
[swiftSyntax] Add test cases for the SyntaxClassifier
ahoppen Jul 26, 2018
9374aff
[SourceKit] Serialize the syntax tree in the EditorConsumer
ahoppen May 31, 2018
104e79e
[SwiftSyntax] Record the nodes that have been reused during an increm…
ahoppen May 30, 2018
b2d4d14
[SwiftSyntax] Don't reclassify nodes that haven't changed since last …
ahoppen May 30, 2018
a0639af
[SourceKit] Fix compile error on Ubuntu 14
ahoppen Aug 1, 2018
2001ef5
[SwiftSyntax] Refactor AbsolutePosition
ahoppen Jun 28, 2018
7731e5e
[sourcekitd] Add support for transferring raw data
ahoppen Jun 16, 2018
6d792e8
[SourceKit] Change return value of functions in EditorConsumer to void
ahoppen Jul 31, 2018
630dc1c
[incrParse] Compute NodeStart on the fly when looking up nodes
ahoppen Jun 1, 2018
ade4e9c
[SourceKit] Move syntax tree serialization to separate function
ahoppen Jun 1, 2018
29e20ff
[SourceKit] Reserve 4KB for the syntax tree buffer
ahoppen Jun 1, 2018
cea9ec2
[incrParse] Fix issue because of which the syntax tree always got ser…
ahoppen Jun 1, 2018
4ceff41
[libSyntax] Add logging for how long syntax tree serialization took
ahoppen Jun 26, 2018
856a7ea
[SourceKit] Pass reused node IDs by reference
ahoppen Aug 1, 2018
3f20003
[swiftSyntax] Add classifications for comments
ahoppen Jun 29, 2018
1507eae
[libSyntax] Lazily compute a node's text length
ahoppen Aug 1, 2018
416a63b
[libSyntax] Add a binary serialization format for syntax trees
ahoppen Jun 16, 2018
16a1610
[SwiftSyntax] Add a deserializer for the ByteTree format
ahoppen Jun 16, 2018
17cce98
[byteTree] Add a document describing the ByteTree format
ahoppen Aug 3, 2018
b00ee1c
[libSyntax] Reenable caching of token nodes
ahoppen Aug 1, 2018
446b7cc
[libSyntax] Add a reference counted version of OwnedString
ahoppen Aug 13, 2018
9e60748
[libSyntax] Support serializing the syntax tree as ByteTree
ahoppen Jun 26, 2018
c78d2c2
[libSyntax] Make RawSyntaxCacheNode retain its underlying RawSyntax node
ahoppen Aug 11, 2018
35a2c04
[Parser] Delete ASTContext of ParserUnit
ahoppen Aug 10, 2018
38b073e
[swiftSyntax] Make AbsolutePosition a value type
ahoppen Aug 14, 2018
71dfd0d
[libSyntax] Partly revert #18698 to fix ASAN bot
ahoppen Aug 15, 2018
dd006f1
[libSyntax] Generate TokenKinds.def from gyb_syntax_support
ahoppen Aug 15, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions cmake/modules/AddSwift.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,7 @@ function(_add_swift_executable_single name)
cmake_parse_arguments(SWIFTEXE_SINGLE
"EXCLUDE_FROM_ALL;DONT_STRIP_NON_MAIN_SYMBOLS;DISABLE_ASLR"
"SDK;ARCHITECTURE"
"DEPENDS;LLVM_COMPONENT_DEPENDS;LINK_LIBRARIES;LINK_FAT_LIBRARIES"
"DEPENDS;LLVM_COMPONENT_DEPENDS;LINK_LIBRARIES;LINK_FAT_LIBRARIES;COMPILE_FLAGS"
${ARGN})

set(SWIFTEXE_SINGLE_SOURCES ${SWIFTEXE_SINGLE_UNPARSED_ARGUMENTS})
Expand Down Expand Up @@ -2096,6 +2096,7 @@ function(_add_swift_executable_single name)
MODULE_NAME ${name}
SDK ${SWIFTEXE_SINGLE_SDK}
ARCHITECTURE ${SWIFTEXE_SINGLE_ARCHITECTURE}
COMPILE_FLAGS ${SWIFTEXE_SINGLE_COMPILE_FLAGS}
IS_MAIN)
add_swift_source_group("${SWIFTEXE_SINGLE_EXTERNAL_SOURCES}")

Expand Down Expand Up @@ -2260,7 +2261,7 @@ function(add_swift_executable name)
cmake_parse_arguments(SWIFTEXE
"EXCLUDE_FROM_ALL;DONT_STRIP_NON_MAIN_SYMBOLS;DISABLE_ASLR"
""
"DEPENDS;LLVM_COMPONENT_DEPENDS;LINK_LIBRARIES"
"DEPENDS;LLVM_COMPONENT_DEPENDS;LINK_LIBRARIES;COMPILE_FLAGS"
${ARGN})

translate_flag(${SWIFTEXE_EXCLUDE_FROM_ALL}
Expand All @@ -2283,6 +2284,7 @@ function(add_swift_executable name)
LINK_LIBRARIES ${SWIFTEXE_LINK_LIBRARIES}
SDK ${SWIFT_HOST_VARIANT_SDK}
ARCHITECTURE ${SWIFT_HOST_VARIANT_ARCH}
COMPILE_FLAGS ${SWIFTEXE_COMPILE_FLAGS}
${SWIFTEXE_EXCLUDE_FROM_ALL_FLAG}
${SWIFTEXE_DONT_STRIP_NON_MAIN_SYMBOLS_FLAG}
${SWIFTEXE_DISABLE_ASLR_FLAG})
Expand All @@ -2301,11 +2303,16 @@ function(add_swift_host_tool executable)
ADDSWIFTHOSTTOOL # prefix
"" # options
"" # single-value args
"SWIFT_COMPONENT" # multi-value args
"SWIFT_COMPONENT;COMPILE_FLAGS;DEPENDS" # multi-value args
${ARGN})

# Create the executable rule.
add_swift_executable(${executable} ${ADDSWIFTHOSTTOOL_UNPARSED_ARGUMENTS})
add_swift_executable(
${executable}
${ADDSWIFTHOSTTOOL_UNPARSED_ARGUMENTS}
DEPENDS ${ADDSWIFTHOSTTOOL_DEPENDS}
COMPILE_FLAGS ${ADDSWIFTHOSTTOOL_COMPILE_FLAGS}
)

# And then create the install rule if we are asked to.
if (ADDSWIFTHOSTTOOL_SWIFT_COMPONENT)
Expand Down
26 changes: 26 additions & 0 deletions docs/ByteTree.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# ByteTree

The ByteTree format is a binary format to efficiently serialize and deserialize trees. It was designed to serialize the syntax tree in `libSyntax` but the framework allows serialisation of arbitrary trees. It currently offers a serialiser written in C++ and a deserialiser written in Swift.

## Overview
The ByteTree format consists of two different constructs: *objects* and *scalars*. A scalar is a raw sequence of binary data. Scalars are untyped and the meaning of their binary data needs to be inferred by the client based on their position in the tree. An object consists of multiple *fields*, indexed by their position within the object, which again can be either objects or scalars.

## Serialization of scalars

A scalar is encoded as its size followed by the data. Size is a `uint_32` that represents the size of the data in bytes in little endian order.

For example, the string "Hello World" would be encoded as `(uint32_t)11` `"Hello World"`, or in hex `0B 00 00 00 48 65 6C 6C 6F 20 57 6F 72 6C 64`.

## Serialization of objects

An object consists of its size, measured in the number of fields and represented as a `uint_32t` in little endian order, followed by the direct concatenation of its fields. Because each field is again prefixed with its size, no delimites are necessary in between the fields.

Arrays are modelled as objects whose fields are all of the same type and whose length is variadic (and is indicated by the object's size).

## Versioning

The ByteTree format is prepended by a 4-byte protocol version number that describes the version of the object tree that was serialized. Its exact semantics are up to each specific application, but it is encouraged to interpret it as a two-comentent number where the first component, consisting of the first three bytes, is incremented for breaking changes and the last byte is incremented for backwards-compatible changes.

## Serialization safety

Since all fields in objects are accessed by their index, issues quickly arise if a new field is accidentally added at the beginning of an object. To prevent issues like this, the ByteTree serialiser and deserialiser requires the explicit specification of each field's index within the object. These indicies are never serialised. Their sole purpose is to check that all fields are read in the correct order in assertion builds.
306 changes: 306 additions & 0 deletions include/swift/Basic/ByteTreeSerialization.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
//===--- ByteTreeSerialization.h - ByteTree serialization -------*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Provides an interface for serializing an object tree to a custom
/// binary format called ByteTree.
///
//===----------------------------------------------------------------------===//

#ifndef SWIFT_BASIC_BYTETREESERIALIZATION_H
#define SWIFT_BASIC_BYTETREESERIALIZATION_H

#include "llvm/Support/BinaryStreamError.h"
#include "llvm/Support/BinaryStreamWriter.h"

namespace {
// Only used by compiler if both template types are the same
template <typename T, T>
struct SameType;
} // anonymous namespace

namespace swift {
namespace byteTree {
class ByteTreeWriter;

/// Add a template specialization of \c ObjectTraits for any that type
/// serializes as an object consisting of multiple fields.
template <class T>
struct ObjectTraits {
// Must provide:

/// Return the number of fields that will be written in \c write when
/// \p Object gets serialized.
// static unsigned numFields(const T &Object);

/// Serialize \p Object by calling \c Writer.write for all the fields of
/// \p Object.
// static void write(BinaryTreeWriter &Writer, const T &Object);
};

/// Add a template specialization of \c ScalarTraits for any that type
/// serializes into a raw set of bytes.
template <class T>
struct ScalarTraits {
// Must provide:

/// Return the number of bytes the serialized format of \p Value will take up.
// static unsigned size(const T &Value);

/// Serialize \p Value by writing its binary format into \p Writer. Any errors
/// that may be returned by \p Writer can be returned by this function and
/// will be handled on the call-side.
// static llvm::Error write(llvm::BinaryStreamWriter &Writer, const T &Value);
};

/// Add a template specialization of \c WrapperTypeTraits for any that type
/// serializes as a type that already has a specialization of \c ScalarTypes.
/// This will typically be useful for types like enums that have a 1-to-1
/// mapping to e.g. an integer.
template <class T>
struct WrapperTypeTraits {
// Must provide:

/// Write the serializable representation of \p Value to \p Writer. This will
/// typically take the form \c Writer.write(convertedValue(Value), Index)
/// where \c convertedValue has to be defined.
// static void write(ByteTreeWriter &Writer, const T &Value, unsigned Index);
};

// Test if ObjectTraits<T> is defined on type T.
template <class T>
struct has_ObjectTraits {
using Signature_numFields = unsigned (*)(const T &);
using Signature_write = void (*)(ByteTreeWriter &Writer, const T &Object);

template <typename U>
static char test(SameType<Signature_numFields, &U::numFields> *,
SameType<Signature_write, &U::write> *);

template <typename U>
static double test(...);

public:
static bool const value =
(sizeof(test<ObjectTraits<T>>(nullptr, nullptr)) == 1);
};

// Test if ScalarTraits<T> is defined on type T.
template <class T>
struct has_ScalarTraits {
using Signature_size = unsigned (*)(const T &Object);
using Signature_write = llvm::Error (*)(llvm::BinaryStreamWriter &Writer,
const T &Object);

template <typename U>
static char test(SameType<Signature_size, &U::size> *,
SameType<Signature_write, &U::write> *);

template <typename U>
static double test(...);

public:
static bool const value =
(sizeof(test<ScalarTraits<T>>(nullptr, nullptr)) == 1);
};

// Test if WrapperTypeTraits<T> is defined on type T.
template <class T>
struct has_WrapperTypeTraits {
using Signature_write = void (*)(ByteTreeWriter &Writer, const T &Object,
unsigned Index);

template <typename U>
static char test(SameType<Signature_write, &U::write> *);

template <typename U>
static double test(...);

public:
static bool const value = (sizeof(test<WrapperTypeTraits<T>>(nullptr)) == 1);
};

class ByteTreeWriter {
private:
/// The writer to which the binary data is written.
llvm::BinaryStreamWriter &StreamWriter;

/// The number of fields this object contains. \c UINT_MAX if it has not been
/// set yet. No member may be written to the object if expected number of
/// fields has not been set yet.
unsigned NumFields = UINT_MAX;

/// The index of the next field to write. Used in assertion builds to keep
/// track that no indicies are jumped and that the object contains the
/// expected number of fields.
unsigned CurrentFieldIndex = 0;

/// The \c ByteTreeWriter can only be constructed internally. Use
/// \c ByteTreeWriter.write to serialize a new object.
ByteTreeWriter(llvm::BinaryStreamWriter &StreamWriter)
: StreamWriter(StreamWriter) {}

/// Set the expected number of fields the object written by this writer is
/// expected to have.
void setNumFields(uint32_t NumFields) {
assert(NumFields != UINT_MAX &&
"NumFields may not be reset since it has already been written to "
"the byte stream");
assert((this->NumFields == UINT_MAX) && "NumFields has already been set");

auto Error = StreamWriter.writeInteger(NumFields);
(void)Error;
assert(!Error);

this->NumFields = NumFields;
}

/// Validate that \p Index is the next field that is expected to be written,
/// does not exceed the number of fields in this object and that
/// \c setNumFields has already been called.
void validateAndIncreaseFieldIndex(unsigned Index) {
assert((NumFields != UINT_MAX) &&
"setNumFields must be called before writing any value");
assert(Index == CurrentFieldIndex && "Writing index out of order");
assert(Index < NumFields &&
"Writing more fields than object is expected to have");

CurrentFieldIndex++;
}

~ByteTreeWriter() {
assert(CurrentFieldIndex == NumFields &&
"Object had more or less elements than specified");
}

public:
/// Write a binary serialization of \p Object to \p StreamWriter, prefixing
/// the stream by the specified ProtocolVersion.
template <typename T>
typename std::enable_if<has_ObjectTraits<T>::value, void>::type
static write(uint32_t ProtocolVersion, llvm::BinaryStreamWriter &StreamWriter,
const T &Object) {
ByteTreeWriter Writer(StreamWriter);

auto Error = Writer.StreamWriter.writeInteger(ProtocolVersion);
(void)Error;
assert(!Error);

// There always is one root. We need to set NumFields so that index
// validation succeeds, but we don't want to serialize this.
Writer.NumFields = 1;
Writer.write(Object, /*Index=*/0);
}

template <typename T>
typename std::enable_if<has_ObjectTraits<T>::value, void>::type
write(const T &Object, unsigned Index) {
validateAndIncreaseFieldIndex(Index);

auto ObjectWriter = ByteTreeWriter(StreamWriter);
ObjectWriter.setNumFields(ObjectTraits<T>::numFields(Object));

ObjectTraits<T>::write(ObjectWriter, Object);
}

template <typename T>
typename std::enable_if<has_ScalarTraits<T>::value, void>::type
write(const T &Value, unsigned Index) {
validateAndIncreaseFieldIndex(Index);

uint32_t ValueSize = ScalarTraits<T>::size(Value);
auto SizeError = StreamWriter.writeInteger(ValueSize);
(void)SizeError;
assert(!SizeError);

auto StartOffset = StreamWriter.getOffset();
auto ContentError = ScalarTraits<T>::write(StreamWriter, Value);
(void)ContentError;
assert(!ContentError);
(void)StartOffset;
assert((StreamWriter.getOffset() - StartOffset == ValueSize) &&
"Number of written bytes does not match size returned by "
"ScalarTraits<T>::size");
}

template <typename T>
typename std::enable_if<has_WrapperTypeTraits<T>::value, void>::type
write(const T &Value, unsigned Index) {
auto LengthBeforeWrite = CurrentFieldIndex;
WrapperTypeTraits<T>::write(*this, Value, Index);
(void)LengthBeforeWrite;
assert(CurrentFieldIndex == LengthBeforeWrite + 1 &&
"WrapperTypeTraits did not call BinaryWriter.write");
}
};

// Define serialization schemes for common types

template <>
struct ScalarTraits<uint8_t> {
static unsigned size(const uint8_t &Value) { return 1; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const uint8_t &Value) {
return Writer.writeInteger(Value);
}
};

template <>
struct ScalarTraits<uint16_t> {
static unsigned size(const uint16_t &Value) { return 2; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const uint16_t &Value) {
return Writer.writeInteger(Value);
}
};

template <>
struct ScalarTraits<uint32_t> {
static unsigned size(const uint32_t &Value) { return 4; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const uint32_t &Value) {
return Writer.writeInteger(Value);
}
};

template <>
struct WrapperTypeTraits<bool> {
static void write(ByteTreeWriter &Writer, const bool &Value,
unsigned Index) {
Writer.write(static_cast<uint8_t>(Value), Index);
}
};

template <>
struct ScalarTraits<llvm::StringRef> {
static unsigned size(const llvm::StringRef &Str) { return Str.size(); }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const llvm::StringRef &Str) {
return Writer.writeFixedString(Str);
}
};

template <>
struct ScalarTraits<llvm::NoneType> {
// Serialize llvm::None as a value with 0 length
static unsigned size(const llvm::NoneType &None) { return 0; }
static llvm::Error write(llvm::BinaryStreamWriter &Writer,
const llvm::NoneType &None) {
// Nothing to write
return llvm::ErrorSuccess();
}
};

} // end namespace byteTree
} // end namespace swift

#endif
Loading