Skip to content

Commit 938c4a6

Browse files
committed
[Collections] Use persistent cache for GitHub package metadata
Motivation: Currently we use transient in-memory cache for storing GitHub package metadata. Modifications: - Add generic `SQLiteBackedCache` in Basics - Change `ManifestLoader` to use `SQLiteBackedCache` - Change `GitHubPackageMetadataProvider` to use `SQLiteBackedCache` - Adjust tests
1 parent b11f273 commit 938c4a6

File tree

10 files changed

+778
-620
lines changed

10 files changed

+778
-620
lines changed
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2021 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See http://swift.org/LICENSE.txt for license information
8+
See http://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
import Foundation
12+
13+
import TSCBasic
14+
import TSCUtility
15+
16+
/// SQLite backed persistent cache.
17+
public final class SQLiteBackedCache<Value: Codable>: Closable {
18+
public typealias Key = String
19+
20+
public let name: String
21+
public let fileSystem: TSCBasic.FileSystem
22+
public let location: SQLite.Location
23+
public let configuration: SQLiteBackedCacheConfiguration
24+
25+
private var state = State.idle
26+
private let stateLock = Lock()
27+
28+
private let diagnosticsEngine: DiagnosticsEngine?
29+
private let jsonEncoder: JSONEncoder
30+
private let jsonDecoder: JSONDecoder
31+
32+
public init(name: String, location: SQLite.Location, configuration: SQLiteBackedCacheConfiguration = .init(), diagnosticsEngine: DiagnosticsEngine? = nil) {
33+
self.name = name
34+
self.location = location
35+
switch self.location {
36+
case .path, .temporary:
37+
self.fileSystem = localFileSystem
38+
case .memory:
39+
self.fileSystem = InMemoryFileSystem()
40+
}
41+
self.configuration = configuration
42+
self.diagnosticsEngine = diagnosticsEngine
43+
self.jsonEncoder = JSONEncoder.makeWithDefaults()
44+
self.jsonDecoder = JSONDecoder.makeWithDefaults()
45+
}
46+
47+
public convenience init(name: String, path: AbsolutePath, configuration: SQLiteBackedCacheConfiguration = .init(), diagnosticsEngine: DiagnosticsEngine? = nil) {
48+
self.init(name: name, location: .path(path), configuration: configuration, diagnosticsEngine: diagnosticsEngine)
49+
}
50+
51+
deinit {
52+
// TODO: we could wrap the failure here with diagnostics if it wasn't optional throughout
53+
try? self.withStateLock {
54+
if case .connected(let db) = self.state {
55+
assertionFailure("db should be closed")
56+
try db.close()
57+
}
58+
}
59+
}
60+
61+
public func close() throws {
62+
try self.withStateLock {
63+
if case .connected(let db) = self.state {
64+
try db.close()
65+
}
66+
self.state = .disconnected
67+
}
68+
}
69+
70+
public func put(key: Key, value: Value, replace: Bool = false) throws {
71+
do {
72+
let query = "INSERT OR \(replace ? "REPLACE" : "IGNORE") INTO \(self.name) VALUES (?, ?);"
73+
try self.executeStatement(query) { statement -> Void in
74+
let data = try self.jsonEncoder.encode(value)
75+
let bindings: [SQLite.SQLiteValue] = [
76+
.string(key),
77+
.blob(data),
78+
]
79+
try statement.bind(bindings)
80+
try statement.step()
81+
}
82+
} catch (let error as SQLite.Errors) where error == .databaseFull {
83+
if !self.configuration.truncateWhenFull {
84+
throw error
85+
}
86+
self.diagnosticsEngine?.emit(.warning("truncating \(self.name) cache database since it reached max size of \(self.configuration.maxSizeInBytes ?? 0) bytes"))
87+
try self.executeStatement("DELETE FROM \(self.name);") { statement -> Void in
88+
try statement.step()
89+
}
90+
try self.put(key: key, value: value, replace: replace)
91+
} catch {
92+
throw error
93+
}
94+
}
95+
96+
public func get(key: Key) throws -> Value? {
97+
let query = "SELECT value FROM \(self.name) WHERE key = ? LIMIT 1;"
98+
return try self.executeStatement(query) { statement -> Value? in
99+
try statement.bind([.string(key)])
100+
let data = try statement.step()?.blob(at: 0)
101+
return try data.flatMap {
102+
try self.jsonDecoder.decode(Value.self, from: $0)
103+
}
104+
}
105+
}
106+
107+
public func remove(key: Key) throws {
108+
let query = "DELETE FROM \(self.name) WHERE key = ? LIMIT 1;"
109+
try self.executeStatement(query) { statement in
110+
try statement.bind([.string(key)])
111+
try statement.step()
112+
}
113+
}
114+
115+
private func executeStatement<T>(_ query: String, _ body: (SQLite.PreparedStatement) throws -> T) throws -> T {
116+
try self.withDB { db in
117+
let result: Result<T, Error>
118+
let statement = try db.prepare(query: query)
119+
do {
120+
result = .success(try body(statement))
121+
} catch {
122+
result = .failure(error)
123+
}
124+
try statement.finalize()
125+
switch result {
126+
case .failure(let error):
127+
throw error
128+
case .success(let value):
129+
return value
130+
}
131+
}
132+
}
133+
134+
private func withDB<T>(_ body: (SQLite) throws -> T) throws -> T {
135+
let createDB = { () throws -> SQLite in
136+
let db = try SQLite(location: self.location, configuration: self.configuration.underlying)
137+
try self.createSchemaIfNecessary(db: db)
138+
return db
139+
}
140+
141+
let db = try self.withStateLock { () -> SQLite in
142+
let db: SQLite
143+
switch (self.location, self.state) {
144+
case (.path(let path), .connected(let database)):
145+
if self.fileSystem.exists(path) {
146+
db = database
147+
} else {
148+
try database.close()
149+
try self.fileSystem.createDirectory(path.parentDirectory, recursive: true)
150+
db = try createDB()
151+
}
152+
case (.path(let path), _):
153+
if !self.fileSystem.exists(path) {
154+
try self.fileSystem.createDirectory(path.parentDirectory, recursive: true)
155+
}
156+
db = try createDB()
157+
case (_, .connected(let database)):
158+
db = database
159+
case (_, _):
160+
db = try createDB()
161+
}
162+
self.state = .connected(db)
163+
return db
164+
}
165+
166+
// FIXME: workaround linux sqlite concurrency issues causing CI failures
167+
#if os(Linux)
168+
return try self.withStateLock {
169+
return try body(db)
170+
}
171+
#else
172+
return try body(db)
173+
#endif
174+
}
175+
176+
private func createSchemaIfNecessary(db: SQLite) throws {
177+
let table = """
178+
CREATE TABLE IF NOT EXISTS \(self.name) (
179+
key STRING PRIMARY KEY NOT NULL,
180+
value BLOB NOT NULL
181+
);
182+
"""
183+
184+
try db.exec(query: table)
185+
try db.exec(query: "PRAGMA journal_mode=WAL;")
186+
}
187+
188+
private func withStateLock<T>(_ body: () throws -> T) throws -> T {
189+
switch self.location {
190+
case .path(let path):
191+
if !self.fileSystem.exists(path.parentDirectory) {
192+
try self.fileSystem.createDirectory(path.parentDirectory)
193+
}
194+
return try self.fileSystem.withLock(on: path, type: .exclusive, body)
195+
case .memory, .temporary:
196+
return try self.stateLock.withLock(body)
197+
}
198+
}
199+
200+
private enum State {
201+
case idle
202+
case connected(SQLite)
203+
case disconnected
204+
}
205+
}
206+
207+
public struct SQLiteBackedCacheConfiguration {
208+
public var truncateWhenFull: Bool
209+
210+
fileprivate var underlying: SQLite.Configuration
211+
212+
public init() {
213+
self.underlying = .init()
214+
self.truncateWhenFull = true
215+
self.maxSizeInMegabytes = 100
216+
// see https://www.sqlite.org/c3ref/busy_timeout.html
217+
self.busyTimeoutMilliseconds = 1000
218+
}
219+
220+
public var maxSizeInMegabytes: Int? {
221+
get {
222+
self.underlying.maxSizeInMegabytes
223+
}
224+
set {
225+
self.underlying.maxSizeInMegabytes = newValue
226+
}
227+
}
228+
229+
public var maxSizeInBytes: Int? {
230+
get {
231+
self.underlying.maxSizeInBytes
232+
}
233+
set {
234+
self.underlying.maxSizeInBytes = newValue
235+
}
236+
}
237+
238+
public var busyTimeoutMilliseconds: Int32 {
239+
get {
240+
self.underlying.busyTimeoutMilliseconds
241+
}
242+
set {
243+
self.underlying.busyTimeoutMilliseconds = newValue
244+
}
245+
}
246+
}

Sources/PackageCollections/PackageCollections.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ public struct PackageCollections: PackageCollectionsProtocol {
6565
if self.storageContainer.owned {
6666
try self.storageContainer.storage.close()
6767
}
68+
try self.metadataProvider.close()
6869
}
6970

7071
// MARK: - Collections

Sources/PackageCollections/Providers/GitHubPackageMetadataProvider.swift

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,27 +27,37 @@ struct GitHubPackageMetadataProvider: PackageMetadataProvider {
2727
private let diagnosticsEngine: DiagnosticsEngine?
2828
private let decoder: JSONDecoder
2929

30-
private let cache: ThreadSafeKeyValueStore<PackageReference, (package: Model.PackageBasicMetadata, timestamp: DispatchTime)>?
30+
private let cache: SQLiteBackedCache<CacheValue>?
3131

3232
init(configuration: Configuration = .init(), httpClient: HTTPClient? = nil, diagnosticsEngine: DiagnosticsEngine? = nil) {
3333
self.configuration = configuration
3434
self.httpClient = httpClient ?? Self.makeDefaultHTTPClient(diagnosticsEngine: diagnosticsEngine)
3535
self.diagnosticsEngine = diagnosticsEngine
3636
self.decoder = JSONDecoder.makeWithDefaults()
37-
self.cache = configuration.cacheTTLInSeconds > 0 ? .init() : nil
37+
if configuration.cacheTTLInSeconds > 0 {
38+
var cacheConfig = SQLiteBackedCacheConfiguration()
39+
cacheConfig.maxSizeInMegabytes = configuration.cacheSizeInMegabytes
40+
self.cache = SQLiteBackedCache<CacheValue>(name: "github_cache", path: configuration.cacheDir.appending(component: "package-metadata.db"), configuration: cacheConfig, diagnosticsEngine: diagnosticsEngine)
41+
} else {
42+
self.cache = nil
43+
}
44+
}
45+
46+
func close() throws {
47+
try self.cache?.close()
3848
}
3949

4050
func get(_ reference: PackageReference, callback: @escaping (Result<Model.PackageBasicMetadata, Error>) -> Void) {
4151
guard reference.kind == .remote else {
4252
return callback(.failure(Errors.invalidReferenceType(reference)))
4353
}
44-
guard let baseURL = self.apiURL(reference.location) else {
54+
guard let baseURL = Self.apiURL(reference.location) else {
4555
return callback(.failure(Errors.invalidGitURL(reference.location)))
4656
}
4757

48-
if let cachedMetadata = self.cache?[reference] {
49-
if cachedMetadata.timestamp + DispatchTimeInterval.seconds(self.configuration.cacheTTLInSeconds) > DispatchTime.now() {
50-
return callback(.success(cachedMetadata.package))
58+
if let cached = try? self.cache?.get(key: reference.identity.description) {
59+
if cached.dispatchTime + DispatchTimeInterval.seconds(self.configuration.cacheTTLInSeconds) > DispatchTime.now() {
60+
return callback(.success(cached.package))
5161
}
5262
}
5363

@@ -144,22 +154,12 @@ struct GitHubPackageMetadataProvider: PackageMetadataProvider {
144154
processedAt: Date()
145155
)
146156

147-
if let cache = self.cache {
148-
cache[reference] = (model, DispatchTime.now())
149-
150-
if cache.count > self.configuration.cacheSize {
151-
DispatchQueue.sharedConcurrent.async {
152-
// Delete oldest entries with some room for growth
153-
let sortedCacheEntries = cache.get().sorted { $0.value.timestamp < $1.value.timestamp }
154-
let deleteCount = sortedCacheEntries.count - (self.configuration.cacheSize / 2)
155-
self.diagnosticsEngine?.emit(note: "Cache size limit exceeded, deleting the oldest \(deleteCount) entries")
156-
157-
for index in 0 ..< deleteCount {
158-
_ = cache.removeValue(forKey: sortedCacheEntries[index].key)
159-
}
160-
}
161-
}
157+
do {
158+
try self.cache?.put(key: reference.identity.description, value: CacheValue(package: model, timestamp: DispatchTime.now()), replace: true)
159+
} catch {
160+
self.diagnosticsEngine?.emit(.warning("Failed to save GitHub metadata for package \(reference) to cache: \(error)"))
162161
}
162+
163163
callback(.success(model))
164164
}
165165
} catch {
@@ -168,7 +168,7 @@ struct GitHubPackageMetadataProvider: PackageMetadataProvider {
168168
}
169169
}
170170

171-
internal func apiURL(_ url: String) -> Foundation.URL? {
171+
internal static func apiURL(_ url: String) -> Foundation.URL? {
172172
do {
173173
let regex = try NSRegularExpression(pattern: #"([^/@]+)[:/]([^:/]+)/([^/.]+)(\.git)?$"#, options: .caseInsensitive)
174174
if let match = regex.firstMatch(in: url, options: [], range: NSRange(location: 0, length: url.count)) {
@@ -214,17 +214,20 @@ struct GitHubPackageMetadataProvider: PackageMetadataProvider {
214214
public struct Configuration {
215215
public var apiLimitWarningThreshold: Int
216216
public var authTokens: [AuthTokenType: String]?
217+
public var cacheDir: AbsolutePath
218+
public var cacheSizeInMegabytes: Int
217219
public var cacheTTLInSeconds: Int
218-
public var cacheSize: Int
219220

220221
public init(authTokens: [AuthTokenType: String]? = nil,
221222
apiLimitWarningThreshold: Int? = nil,
223+
cacheDir: AbsolutePath? = nil,
222224
cacheTTLInSeconds: Int? = nil,
223-
cacheSize: Int? = nil) {
225+
cacheSizeInMegabytes: Int? = nil) {
224226
self.authTokens = authTokens
225227
self.apiLimitWarningThreshold = apiLimitWarningThreshold ?? 5
228+
self.cacheDir = cacheDir.map(resolveSymlinks) ?? localFileSystem.swiftPMCacheDirectory.appending(components: "package-metadata")
229+
self.cacheSizeInMegabytes = cacheSizeInMegabytes ?? 10
226230
self.cacheTTLInSeconds = cacheTTLInSeconds ?? 3600
227-
self.cacheSize = cacheSize ?? 1000
228231
}
229232
}
230233

@@ -236,6 +239,20 @@ struct GitHubPackageMetadataProvider: PackageMetadataProvider {
236239
case invalidAuthToken(URL)
237240
case apiLimitsExceeded(URL, Int)
238241
}
242+
243+
struct CacheValue: Codable {
244+
let package: Model.PackageBasicMetadata
245+
let timestamp: UInt64
246+
247+
var dispatchTime: DispatchTime {
248+
DispatchTime(uptimeNanoseconds: self.timestamp)
249+
}
250+
251+
init(package: Model.PackageBasicMetadata, timestamp: DispatchTime) {
252+
self.package = package
253+
self.timestamp = timestamp.uptimeNanoseconds
254+
}
255+
}
239256
}
240257

241258
extension GitHubPackageMetadataProvider {

0 commit comments

Comments
 (0)