Skip to content

Commit 39344d7

Browse files
authored
Reduce number of read calls when not asking for progress reporting. (#623)
* Reduce number of read calls when not asking for progress reporting. Also, when asking for progress reporting, cap the number of updates at approximately 100 instead of fileSize/4k. * Add readUntilEOF option to file reading primitive and set it to true by default * Address review comments - change readUntilEOF to readUntilLength
1 parent ff12a98 commit 39344d7

File tree

4 files changed

+67
-54
lines changed

4 files changed

+67
-54
lines changed

Benchmarks/Benchmarks/DataIO/BenchmarkDataIO.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,19 @@ let benchmarks = {
103103
blackHole(try Data(contentsOf: readMe))
104104
}
105105

106+
Benchmark("read-hugeFile",
107+
configuration: .init(
108+
setup: {
109+
try! generateTestData(count: 1 << 30).write(to: readMe)
110+
},
111+
teardown: {
112+
cleanup(at: readMe)
113+
}
114+
)
115+
) { benchmark in
116+
blackHole(try Data(contentsOf: readMe))
117+
}
118+
106119
// MARK: base64
107120

108121
Benchmark("base64-encode", configuration: .init(scalingFactor: .kilo)) { benchmark in

Sources/FoundationEssentials/Data/Data+Reading.swift

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,6 @@ internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, ma
322322

323323
let fileSize = min(Int(clamping: filestat.st_size), maxLength ?? Int.max)
324324
let fileType = filestat.st_mode & S_IFMT
325-
let preferredChunkSize = filestat.st_blksize
326325
#if !NO_FILESYSTEM
327326
let shouldMap = shouldMapFileDescriptor(fd, path: inPath, options: options)
328327
#else
@@ -380,7 +379,7 @@ internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, ma
380379

381380
localProgress?.becomeCurrent(withPendingUnitCount: Int64(fileSize))
382381
do {
383-
let length = try readBytesFromFileDescriptor(fd, path: inPath, buffer: bytes, length: fileSize, chunkSize: size_t(preferredChunkSize), reportProgress: reportProgress)
382+
let length = try readBytesFromFileDescriptor(fd, path: inPath, buffer: bytes, length: fileSize, reportProgress: reportProgress)
384383
localProgress?.resignCurrent()
385384

386385
result = ReadBytesResult(bytes: bytes, length: length, deallocator: .free)
@@ -399,17 +398,24 @@ internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, ma
399398
#endif
400399
}
401400

402-
// Takes an `Int` size and returns an `Int` to match `Data`'s count. If we are going to read more than Int.max, throws - because we won't be able to store it in `Data`.
403-
private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer inBuffer: UnsafeMutableRawPointer, length: Int, chunkSize: size_t, reportProgress: Bool) throws -> Int {
404-
401+
/// Read data from a file descriptor.
402+
/// Takes an `Int` size and returns an `Int` to match `Data`'s count. If we are going to read more than Int.max, throws - because we won't be able to store it in `Data`.
403+
/// If `readUntilLength` is `false`, then we will end the read if we receive less than `length` bytes. This can be used to read from something like a socket, where the `length` simply represents the maximum size you can read at once.
404+
private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer inBuffer: UnsafeMutableRawPointer, length: Int, readUntilLength: Bool = true, reportProgress: Bool) throws -> Int {
405405
var buffer = inBuffer
406406
// If chunkSize (8-byte value) is more than blksize_t.max (4 byte value), then use the 4 byte max and chunk
407-
var preferredChunkSize = chunkSize
408-
let localProgress = (reportProgress && Progress.current() != nil) ? Progress(totalUnitCount: Int64(length)) : nil
409407

410-
if localProgress != nil {
411-
// To report progress, we have to try reading in smaller chunks than the whole file. If we have a readingAttributes struct, we already know it. Otherwise, get one that makes sense for this destination.
412-
preferredChunkSize = chunkSize
408+
let preferredChunkSize: size_t
409+
let localProgress: Progress?
410+
411+
if Progress.current() != nil && reportProgress {
412+
localProgress = Progress(totalUnitCount: Int64(length))
413+
// To report progress, we have to try reading in smaller chunks than the whole file. Aim for about 1% increments.
414+
preferredChunkSize = max(length / 100, 1024 * 4)
415+
} else {
416+
localProgress = nil
417+
// Get it all in one go, if possible
418+
preferredChunkSize = length
413419
}
414420

415421
var numBytesRemaining = length
@@ -418,12 +424,12 @@ private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer in
418424
throw CocoaError(.userCancelled)
419425
}
420426

421-
// We will only request a max of Int32.max bytes
422-
var numBytesRequested = CUnsignedInt(clamping: preferredChunkSize)
427+
// We will only request a max of Int32.max bytes. Some platforms will return an error over that amount.
428+
var numBytesRequested = CUnsignedInt(clamping: min(preferredChunkSize, Int(CInt.max)))
423429

424430
// Furthermore, don't request more than the number of bytes remaining
425431
if numBytesRequested > numBytesRemaining {
426-
numBytesRequested = CUnsignedInt(clamping: numBytesRemaining)
432+
numBytesRequested = CUnsignedInt(clamping: min(numBytesRemaining, Int(CInt.max)))
427433
}
428434

429435
var numBytesRead: CInt
@@ -441,18 +447,24 @@ private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer in
441447
} while numBytesRead < 0 && errno == EINTR
442448

443449
if numBytesRead < 0 {
450+
// The read failed
444451
let errNum = errno
445452
logFileIOErrno(errNum, at: "read")
446-
// The read failed
447453
throw CocoaError.errorWithFilePath(path, errno: errNum, reading: true)
448454
} else if numBytesRead == 0 {
449455
// Getting zero here is weird, since it may imply unexpected end of file... If we do, return the number of bytes read so far (which is compatible with the way read() would work with just one call).
450456
break
451457
} else {
458+
// Partial read
452459
numBytesRemaining -= Int(clamping: numBytesRead)
460+
if numBytesRemaining < 0 {
461+
// Just in case; we do not want to have a negative amount of bytes remaining. We will just assume that is the end.
462+
numBytesRemaining = 0
463+
}
453464
localProgress?.completedUnitCount = Int64(length - numBytesRemaining)
454-
// Anytime we read less than actually requested, stop, since the length is considered "max" for socket calls
455-
if numBytesRead < numBytesRequested {
465+
466+
// The `readUntilLength` argument controls if we should end early when `read` returns less than the amount requested, or if we should continue to loop until we have reached `length` bytes.
467+
if !readUntilLength && numBytesRead < numBytesRequested {
456468
break
457469
}
458470

Sources/FoundationEssentials/Data/Data+Writing.swift

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -43,43 +43,20 @@ private func openFileDescriptorProtected(path: UnsafePointer<CChar>, flags: Int3
4343
#endif
4444
}
4545

46-
private func preferredChunkSizeForFileDescriptor(_ fd: Int32) -> Int? {
47-
#if canImport(Darwin)
48-
var preferredChunkSize = -1
49-
var fsinfo = statfs()
50-
var fileinfo = stat()
51-
if fstatfs(fd, &fsinfo) != -1 {
52-
if fstat(fd, &fileinfo) != -1 {
53-
preferredChunkSize = Int(fileinfo.st_blksize)
54-
} else {
55-
logFileIOErrno(errno, at: "fstat")
56-
}
57-
} else {
58-
preferredChunkSize = Int(fsinfo.f_iosize)
59-
}
60-
61-
// Make sure we don't return something completely bone-headed, like zero.
62-
if (preferredChunkSize <= 0) {
63-
return nil
64-
}
65-
66-
return preferredChunkSize
67-
#else
68-
return nil
69-
#endif
70-
}
71-
7246
private func writeToFileDescriptorWithProgress(_ fd: Int32, buffer: UnsafeRawBufferPointer, reportProgress: Bool) throws -> Int {
7347
// Fetch this once
7448
let length = buffer.count
75-
var preferredChunkSize = length
76-
let localProgress = (reportProgress && Progress.current() != nil) ? Progress(totalUnitCount: Int64(preferredChunkSize)) : nil
7749

78-
if localProgress != nil {
79-
// To report progress, we have to try writing in smaller chunks than the whole file. Get one that makes sense for this destination.
80-
if let smarterChunkSize = preferredChunkSizeForFileDescriptor(fd) {
81-
preferredChunkSize = smarterChunkSize
82-
}
50+
let preferredChunkSize: Int
51+
let localProgress: Progress?
52+
if reportProgress && Progress.current() != nil {
53+
// To report progress, we have to try writing in smaller chunks than the whole file.
54+
// Aim for about 1% increments in progress updates.
55+
preferredChunkSize = max(length / 100, 1024 * 4)
56+
localProgress = Progress(totalUnitCount: Int64(length))
57+
} else {
58+
preferredChunkSize = length
59+
localProgress = nil
8360
}
8461

8562
var nextRange = buffer.startIndex..<buffer.startIndex.advanced(by: length)
@@ -118,6 +95,10 @@ private func writeToFileDescriptorWithProgress(_ fd: Int32, buffer: UnsafeRawBuf
11895
break
11996
} else {
12097
numBytesRemaining -= Int(numBytesWritten)
98+
if numBytesRemaining < 0 {
99+
// Just in case, do not allow a negative number of bytes remaining
100+
numBytesRemaining = 0
101+
}
121102
if let localProgress {
122103
localProgress.completedUnitCount = Int64(length - numBytesRemaining)
123104
}

Tests/FoundationEssentialsTests/DataIOTests.swift

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,7 @@ class DataIOTests : XCTestCase {
3333
URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true).appendingPathComponent("testfile-\(UUID().uuidString)")
3434
}
3535

36-
func generateTestData() -> Data {
37-
// 16 MB file, big enough to trigger things like chunking
38-
let count = 16_777_216
39-
36+
func generateTestData(count: Int = 16_777_216) -> Data {
4037
let memory = malloc(count)!
4138
let ptr = memory.bindMemory(to: UInt8.self, capacity: count)
4239

@@ -209,10 +206,20 @@ class DataIOTests : XCTestCase {
209206
let size = 0x80010000
210207
let url = testURL()
211208

212-
let data = Data(count: size)
209+
let data = generateTestData(count: size)
213210

214211
try data.write(to: url)
212+
let read = try! Data(contentsOf: url)
213+
214+
// No need to compare the contents, but do compare the size
215+
XCTAssertEqual(data.count, read.count)
215216

217+
#if FOUNDATION_FRAMEWORK
218+
// Try the NSData path
219+
let readNS = try! NSData(contentsOf: url) as Data
220+
XCTAssertEqual(data.count, readNS.count)
221+
#endif
222+
216223
cleanup(at: url)
217224
#endif
218225
}

0 commit comments

Comments
 (0)