Skip to content

Integrate experimental string processing modules and enable end-to-end regex. #40531

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions cmake/modules/AddSwift.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,13 @@ function(add_libswift_module module)
""
"DEPENDS"
${ARGN})
set(sources ${ALSM_UNPARSED_ARGUMENTS})
list(TRANSFORM sources PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/")
set(raw_sources ${ALSM_UNPARSED_ARGUMENTS})
set(sources)
foreach(raw_source ${raw_sources})
get_filename_component(
raw_source "${raw_source}" REALPATH BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
list(APPEND sources "${raw_source}")
endforeach()

set(target_name "LibSwift${module}")

Expand Down
7 changes: 2 additions & 5 deletions lib/Sema/CSGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1266,18 +1266,15 @@ namespace {
ctx.Id_Regex.str());
return Type();
}
auto substringType = ctx.getSubstringType();
auto dynCapturesType = ctx.getDynamicCapturesType();
if (!dynCapturesType) {
ctx.Diags.diagnose(E->getLoc(),
diag::string_processing_lib_missing,
"DynamicCaptures");
return Type();
}
// TODO: Replace `(Substring, DynamicCaptures)` with type inferred from
// the regex.
auto matchType = TupleType::get({substringType, dynCapturesType}, ctx);
return BoundGenericStructType::get(regexDecl, Type(), {matchType});
// TODO: Replace `DynamicCaptures` with type inferred from the regex.
return BoundGenericStructType::get(regexDecl, Type(), {dynCapturesType});
}

Type visitDeclRefExpr(DeclRefExpr *E) {
Expand Down
4 changes: 3 additions & 1 deletion libswift/Sources/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
# See http://swift.org/LICENSE.txt for license information
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors

add_subdirectory(ExperimentalRegex)
if(SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING)
add_subdirectory(ExperimentalRegex)
endif()
add_subdirectory(SIL)
add_subdirectory(Optimizer)
13 changes: 11 additions & 2 deletions libswift/Sources/ExperimentalRegex/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,16 @@
# See http://swift.org/LICENSE.txt for license information
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors

file(GLOB_RECURSE _LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES
"${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}/Sources/_MatchingEngine/*.swift")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually we don't collect sources with glob, but list all sources in the cmake files

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because sources are from a separate repo that changes very rapidly, IMO listing sources would involve a lot of pull requests to apple/swift just to update the source list which then slows down development of the string processing repo. With glob, we can add files without submitting PRs the compiler repo unless we need to change the compiler interface.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either is ok, as we want to be tag-based. The hazard of manually listing files is that these things easily fall out of sync and we can think we've added API only to find out we haven't. We're not talking about just compiler-visible sources but adding public stdlib API, so this is a hazard that is not otherwise checked by the build system.

What's the hazard of globbing? I think this would be very interesting to know so that we're not cargo-culting an approach built for a completely different integration model.

set(LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES)
foreach(source ${_LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES})
file(TO_CMAKE_PATH "${source}" source)
list(APPEND LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES ${source})
endforeach()
message(STATUS "Using Experimental String Processing library for libswift ExperimentalRegex (${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}).")

add_libswift_module(ExperimentalRegex
Regex.swift
)
"${LIBSWIFT_EXPERIMENTAL_REGEX_SOURCES}"
Regex.swift)

13 changes: 8 additions & 5 deletions libswift/Sources/ExperimentalRegex/Regex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ public func experimental_regex_strawperson(
) -> UnsafePointer<CChar>? {
guard let s = ptr else { return nil }

func error(_ str: String) -> UnsafePointer<CChar>? {
func makeError(_ str: String) -> UnsafePointer<CChar>? {
let count = str.utf8.count + 1
return str.withCString {
assert($0[count-1] == 0)
Expand All @@ -16,12 +16,15 @@ public func experimental_regex_strawperson(
}

let str = String(cString: s)
if str.starts(with: "*") || str.starts(with: "+") || str.starts(with: "?") {
return error("cannot start regex with quantifier")
do {
let _ = try parse(str, .traditional)
return nil
} catch {
return makeError(
"cannot parse regular expression: \(String(describing: error))")
}
return nil
}

public func registerParser() {
public func registerRegexParser() {
Parser_registerParseRegexStrawperson({ experimental_regex_strawperson($0) })
}
9 changes: 7 additions & 2 deletions libswift/Sources/Optimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
# See http://swift.org/LICENSE.txt for license information
# See http://swift.org/CONTRIBUTORS.txt for Swift project authors

add_libswift_module(Optimizer
DEPENDS SIL ExperimentalRegex)
set(dependencies)
list(APPEND dependencies SIL)
if(SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING)
list(APPEND dependencies ExperimentalRegex)
endif()

add_libswift_module(Optimizer DEPENDS ${dependencies})

add_subdirectory(Analysis)
add_subdirectory(InstructionPasses)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,19 @@

import SIL
import OptimizerBridging

#if canImport(ExperimentalRegex)
import ExperimentalRegex
#endif

@_cdecl("initializeLibSwift")
public func initializeLibSwift() {
registerSILClasses()
registerSwiftPasses()
registerParser()

#if canImport(ExperimentalRegex)
registerRegexParser()
#endif
}

private func registerPass(
Expand Down
12 changes: 9 additions & 3 deletions stdlib/public/MatchingEngine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,17 @@
set(swift_matching_engine_link_libraries
swiftCore)

file(GLOB_RECURSE _MATCHING_ENGINE_SOURCES
"${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}/Sources/_MatchingEngine/*.swift")
set(MATCHING_ENGINE_SOURCES)
foreach(source ${_MATCHING_ENGINE_SOURCES})
file(TO_CMAKE_PATH "${source}" source)
list(APPEND MATCHING_ENGINE_SOURCES ${source})
endforeach()
message(STATUS "Using Experimental String Processing library for _MatchingEngine (${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}).")

add_swift_target_library(swift_MatchingEngine ${SWIFT_STDLIB_LIBRARY_BUILD_TYPES} IS_STDLIB
MatchingEngine.swift
"${MATCHING_ENGINE_SOURCES}"

SWIFT_MODULE_DEPENDS_LINUX Glibc
SWIFT_MODULE_DEPENDS_FREEBSD Glibc
Expand All @@ -30,8 +38,6 @@ add_swift_target_library(swift_MatchingEngine ${SWIFT_STDLIB_LIBRARY_BUILD_TYPES
-Dswift_MatchingEngine_EXPORTS
SWIFT_COMPILE_FLAGS
${SWIFT_STANDARD_LIBRARY_SWIFT_FLAGS}
-parse-stdlib
-Xfrontend -enable-experimental-string-processing
LINK_FLAGS "${SWIFT_RUNTIME_SWIFT_LINK_FLAGS}"

INSTALL_IN_COMPONENT stdlib
Expand Down
7 changes: 0 additions & 7 deletions stdlib/public/MatchingEngine/MatchingEngine.swift

This file was deleted.

12 changes: 9 additions & 3 deletions stdlib/public/StringProcessing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,17 @@ set(swift_string_processing_link_libraries
swiftCore
swift_MatchingEngine)

file(GLOB_RECURSE _STRING_PROCESSING_SOURCES
"${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}/Sources/_StringProcessing/*.swift")
set(STRING_PROCESSING_SOURCES)
foreach(source ${_STRING_PROCESSING_SOURCES})
file(TO_CMAKE_PATH "${source}" source)
list(APPEND STRING_PROCESSING_SOURCES ${source})
endforeach()
message(STATUS "Using Experimental String Processing library for _StringProcessing (${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}).")

add_swift_target_library(swift_StringProcessing ${SWIFT_STDLIB_LIBRARY_BUILD_TYPES} IS_STDLIB
Regex.swift
"${STRING_PROCESSING_SOURCES}"

SWIFT_MODULE_DEPENDS_LINUX Glibc
SWIFT_MODULE_DEPENDS_FREEBSD Glibc
Expand All @@ -31,8 +39,6 @@ add_swift_target_library(swift_StringProcessing ${SWIFT_STDLIB_LIBRARY_BUILD_TYP
-Dswift_StringProcessing_EXPORTS
SWIFT_COMPILE_FLAGS
${SWIFT_STANDARD_LIBRARY_SWIFT_FLAGS}
-parse-stdlib
-Xfrontend -enable-experimental-string-processing
LINK_FLAGS "${SWIFT_RUNTIME_SWIFT_LINK_FLAGS}"

SWIFT_MODULE_DEPENDS _MatchingEngine
Expand Down
10 changes: 0 additions & 10 deletions stdlib/public/StringProcessing/Regex.swift

This file was deleted.

10 changes: 5 additions & 5 deletions test/StringProcessing/Parse/regex.swift
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing
// REQUIRES: libswift

var s = 'abc'
_ = 'abc'

var s1 = ('*', '+', '?')
// expected-error@-1 3{{cannot start regex with quantifier}}
_ = ('[*', '+]', '.]')
// expected-error@-1 {{cannot parse regular expression}}

var s2 = '\w+'
var s3 = '\'\\'
_ = '\w+'
_ = '\'\\'
52 changes: 52 additions & 0 deletions test/StringProcessing/Runtime/regex_basic.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %target-run-simple-swift(-Xfrontend -enable-experimental-string-processing)

// REQUIRES: libswift,string_processing,executable_test

import StdlibUnittest

var RegexBasicTests = TestSuite("RegexBasic")

extension String {
func expectMatch<T>(
_ regex: Regex<T>,
file: String = #file,
line: UInt = #line
) -> RegexMatch<T> {
guard let result = match(regex) else {
expectUnreachable("Failed match", file: file, line: line)
fatalError()
}
return result
}
}

RegexBasicTests.test("Basic") {
let input = "aabccd"

let match1 = input.expectMatch('aabcc.')
expectEqual("aabccd", input[match1.range])
expectEqual(.empty, match1.captures)

let match2 = input.expectMatch('a*b.+.')
expectEqual("aabccd", input[match2.range])
expectEqual(.empty, match2.captures)
}

RegexBasicTests.test("Captures") {
let input = """
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM \
COMBINING MARK TUKWENTIS
"""
let regex = '([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*'
let match1 = input.expectMatch(regex)
expectEqual(input[...], input[match1.range])
expectEqual(
.tuple([
.substring("A6F0"),
.optional(.substring("A6F1")),
.substring("Extend")
]),
match1.captures)
}

runAllTests()
4 changes: 2 additions & 2 deletions test/StringProcessing/SILGen/regex_literal_silgen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ var s = 'abc'
// CHECK: [[REGEX_STR_LITERAL:%[0-9]+]] = string_literal utf8 "abc"
// CHECK: [[STRING_INIT:%[0-9]+]] = function_ref @$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC : $@convention(method) (Builtin.RawPointer, Builtin.Word, Builtin.Int1, @thin String.Type) -> @owned String
// CHECK: [[REGEX_STR:%[0-9]+]] = apply [[STRING_INIT]]([[REGEX_STR_LITERAL]]
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA0ACyxGSS_tcfC : $@convention(method) <τ_0_0> (@owned String, @thin Regex<τ_0_0>.Type) -> Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<(Substring, DynamicCaptures)>([[REGEX_STR]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, @thin Regex<τ_0_0>.Type) -> Regex<τ_0_0>
// CHECK: [[REGEX_INIT:%[0-9]+]] = function_ref @$s17_StringProcessing5RegexV06_regexA0ACyxGSS_tcfC : $@convention(method) <τ_0_0> (@owned String, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
// CHECK: apply [[REGEX_INIT]]<DynamicCaptures>({{%.+}}, [[REGEX_STR]], {{%.+}}) : $@convention(method) <τ_0_0> (@owned String, @thin Regex<τ_0_0>.Type) -> @out Regex<τ_0_0>
6 changes: 3 additions & 3 deletions test/StringProcessing/Sema/regex_literal_type_inference.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

let r0 = '.' // okay

let r1: Regex<(Substring, DynamicCaptures)> = '.(.)' // okay
let r1: Regex<DynamicCaptures> = '.(.)' // okay

// expected-error @+2 {{cannot assign value of type 'Regex<(Substring, DynamicCaptures)>' to type 'Regex<Substring>'}}
// expected-note @+1 {{arguments to generic parameter 'Match' ('(Substring, DynamicCaptures)' and 'Substring') are expected to be equal}}
// expected-error @+2 {{cannot assign value of type 'Regex<DynamicCaptures>' to type 'Regex<Substring>'}}
// expected-note @+1 {{arguments to generic parameter 'Capture' ('DynamicCaptures' and 'Substring') are expected to be equal}}
let r2: Regex<Substring> = '.(.)'

func takesRegex<Match>(_: Regex<Match>) {}
Expand Down
2 changes: 1 addition & 1 deletion test/StringProcessing/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
if 'string_processing' not in config.available_features:
config.unsupported = False
config.unsupported = True
11 changes: 0 additions & 11 deletions test/StringProcessing/string_processing_dummy.swift

This file was deleted.

10 changes: 10 additions & 0 deletions utils/build-script-impl
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ KNOWN_SETTINGS=(
swift-stdlib-experimental-hermetic-seal-at-link "0" "whether stdlib should be built with -experimental-hermetic-seal-at-link"
swift-disable-dead-stripping "0" "turns off Darwin-specific dead stripping for Swift host tools"
common-swift-flags "" "Flags used for Swift targets other than the stdlib, like the corelibs"
swift-enable-experimental-string-processing "1" "whether to build experimental string processing feature"

## FREESTANDING Stdlib Options
swift-freestanding-flavor "" "when building the FREESTANDING stdlib, which build style to use (options: apple, linux)"
Expand Down Expand Up @@ -1252,6 +1253,7 @@ LIBDISPATCH_SOURCE_DIR="${WORKSPACE}/swift-corelibs-libdispatch"
LIBDISPATCH_STATIC_SOURCE_DIR="${WORKSPACE}/swift-corelibs-libdispatch"
LIBICU_SOURCE_DIR="${WORKSPACE}/icu"
LIBCXX_SOURCE_DIR="${WORKSPACE}/llvm-project/libcxx"
EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR="${WORKSPACE}/swift-experimental-string-processing"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do these sources need to be in a separate repo?

Copy link
Contributor Author

@rxwei rxwei Dec 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We plan to keep string processing in a package outside of the Swift repo for quicker development iterations and better modularity. The _MatchingEngine target is built both as a libswift library (used by the parser) and as a runtime library. When it gets closer to the release we can consider moving it into the Swift repo.

Do you have concerns about this approach?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should really keep that sources inside the swift repo.

With "quicker development iteration", I guess you mean you want to avoid the overhead of the swift PR testing. But you'll have lit tests for that feature in the swift repo. This means, it will increase the probability that you'll break the swift lit tests. And that will add more troubles for everyone else.
Also, making swift depend on additional repos adds build complexity for all developers. Currently swift only strictly depends on llvm-project - and that's great. For example, most of the time I just update the swift repo to reduce my local build time.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would integrate based on a tag, so we'd do integration testing the PR that updates the tag. Beyond that, it doesn't make senes to do full integration testing as part of a normal development cycle.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you'll have lit tests for that feature in the swift repo.

We have unit tests, functional tests, and integration tests. We use lit for integration testing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still, I predict that this creates more troubles than it solves.
E.g. whenever you need to fix a bug, you have to update two repos: in swift-experimental-string-processing and in swift to update the tag.
Also, it just complicates the build process, which increases the probability of running into problems - as you can already see yourself: the Windows PR testing.


# We cannot currently apply the normal rules of skipping here for LLVM. Even if
# we are skipping building LLVM, we still need to at least build a few tools
Expand Down Expand Up @@ -2196,6 +2198,14 @@ for host in "${ALL_HOSTS[@]}"; do
)
fi

if [[ $(true_false "${SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING}") == "TRUE" ]] ; then
cmake_options=(
"${cmake_options[@]}"
-DSWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING:BOOL=TRUE
-DEXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR="${EXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR}"
)
fi

build_targets=(all "${SWIFT_STDLIB_TARGETS[@]}")
if [[ $(true_false "${build_perf_testsuite_this_time}") == "TRUE" ]]; then
native_swift_tools_path="$(build_directory_bin ${LOCAL_HOST} swift)"
Expand Down
1 change: 1 addition & 0 deletions utils/build-windows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ cmake^
-DSWIFT_ENABLE_EXPERIMENTAL_DISTRIBUTED=YES^
-DSWIFT_ENABLE_EXPERIMENTAL_DIFFERENTIABLE_PROGRAMMING=YES^
-DSWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING=YES^
-DEXPERIMENTAL_STRING_PROCESSING_SOURCE_DIR=%source_root%\swift-experimental-string-processing^
-DSWIFT_INSTALL_COMPONENTS="autolink-driver;compiler;clang-resource-dir-symlink;stdlib;sdk-overlay;editor-integration;tools;testsuite-tools;sourcekit-inproc;swift-remote-mirror;swift-remote-mirror-headers"^
-DSWIFT_PARALLEL_LINK_JOBS=8^
-DPYTHON_EXECUTABLE:PATH=%PYTHON_HOME%\python.exe^
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ def __init__(self, args, toolchain, source_dir, build_dir):
# Add static vprintf flag
self.cmake_options.extend(self._enable_stdlib_static_vprintf)

# Add experimental string processing flag.
self.cmake_options.extend(self._enable_experimental_string_processing)

# Add freestanding related flags.
self.cmake_options.extend(self._freestanding_is_darwin)

Expand Down Expand Up @@ -171,11 +168,6 @@ def _enable_stdlib_static_vprintf(self):
return [('SWIFT_STDLIB_STATIC_PRINT',
self.args.build_swift_stdlib_static_print)]

@property
def _enable_experimental_string_processing(self):
return [('SWIFT_ENABLE_EXPERIMENTAL_STRING_PROCESSING:BOOL',
self.args.enable_experimental_string_processing)]

@property
def _freestanding_is_darwin(self):
return [('SWIFT_FREESTANDING_IS_DARWIN:BOOL',
Expand Down
Loading