Skip to content

Reduce number of read calls when not asking for progress reporting. #623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Benchmarks/Benchmarks/DataIO/BenchmarkDataIO.swift
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ let benchmarks = {
blackHole(try Data(contentsOf: readMe))
}

Benchmark("read-hugeFile",
configuration: .init(
setup: {
try! generateTestData(count: 1 << 30).write(to: readMe)
},
teardown: {
cleanup(at: readMe)
}
)
) { benchmark in
blackHole(try Data(contentsOf: readMe))
}

// MARK: base64

Benchmark("base64-encode", configuration: .init(scalingFactor: .kilo)) { benchmark in
Expand Down
44 changes: 28 additions & 16 deletions Sources/FoundationEssentials/Data/Data+Reading.swift
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, ma

let fileSize = min(Int(clamping: filestat.st_size), maxLength ?? Int.max)
let fileType = filestat.st_mode & S_IFMT
let preferredChunkSize = filestat.st_blksize
#if !NO_FILESYSTEM
let shouldMap = shouldMapFileDescriptor(fd, path: inPath, options: options)
#else
Expand Down Expand Up @@ -370,7 +369,7 @@ internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, ma

localProgress?.becomeCurrent(withPendingUnitCount: Int64(fileSize))
do {
let length = try readBytesFromFileDescriptor(fd, path: inPath, buffer: bytes, length: fileSize, chunkSize: size_t(preferredChunkSize), reportProgress: reportProgress)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The block size is just too small (4k on macOS, e.g.), and results in a very large number of read calls.

let length = try readBytesFromFileDescriptor(fd, path: inPath, buffer: bytes, length: fileSize, reportProgress: reportProgress)
localProgress?.resignCurrent()

result = ReadBytesResult(bytes: bytes, length: length, deallocator: .free)
Expand All @@ -389,17 +388,24 @@ internal func readBytesFromFile(path inPath: PathOrURL, reportProgress: Bool, ma
#endif
}

// Takes an `Int` size and returns an `Int` to match `Data`'s count. If we are going to read more than Int.max, throws - because we won't be able to store it in `Data`.
private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer inBuffer: UnsafeMutableRawPointer, length: Int, chunkSize: size_t, reportProgress: Bool) throws -> Int {

/// Read data from a file descriptor.
/// Takes an `Int` size and returns an `Int` to match `Data`'s count. If we are going to read more than Int.max, throws - because we won't be able to store it in `Data`.
/// If `readUntilLength` is `false`, then we will end the read if we receive less than `length` bytes. This can be used to read from something like a socket, where the `length` simply represents the maximum size you can read at once.
private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer inBuffer: UnsafeMutableRawPointer, length: Int, readUntilLength: Bool = true, reportProgress: Bool) throws -> Int {
var buffer = inBuffer
// If chunkSize (8-byte value) is more than blksize_t.max (4 byte value), then use the 4 byte max and chunk
var preferredChunkSize = chunkSize
let localProgress = (reportProgress && Progress.current() != nil) ? Progress(totalUnitCount: Int64(length)) : nil

if localProgress != nil {
// To report progress, we have to try reading in smaller chunks than the whole file. If we have a readingAttributes struct, we already know it. Otherwise, get one that makes sense for this destination.
preferredChunkSize = chunkSize
let preferredChunkSize: size_t
let localProgress: Progress?

if Progress.current() != nil && reportProgress {
localProgress = Progress(totalUnitCount: Int64(length))
// To report progress, we have to try reading in smaller chunks than the whole file. Aim for about 1% increments.
preferredChunkSize = max(length / 100, 1024 * 4)
} else {
localProgress = nil
// Get it all in one go, if possible
preferredChunkSize = length
}

var numBytesRemaining = length
Expand All @@ -408,12 +414,12 @@ private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer in
throw CocoaError(.userCancelled)
}

// We will only request a max of Int32.max bytes
var numBytesRequested = CUnsignedInt(clamping: preferredChunkSize)
// We will only request a max of Int32.max bytes. Some platforms will return an error over that amount.
var numBytesRequested = CUnsignedInt(clamping: min(preferredChunkSize, Int(CInt.max)))

// Furthermore, don't request more than the number of bytes remaining
if numBytesRequested > numBytesRemaining {
numBytesRequested = CUnsignedInt(clamping: numBytesRemaining)
numBytesRequested = CUnsignedInt(clamping: min(numBytesRemaining, Int(CInt.max)))
}

var numBytesRead: CInt
Expand All @@ -431,18 +437,24 @@ private func readBytesFromFileDescriptor(_ fd: Int32, path: PathOrURL, buffer in
} while numBytesRead < 0 && errno == EINTR

if numBytesRead < 0 {
// The read failed
let errNum = errno
logFileIOErrno(errNum, at: "read")
// The read failed
throw CocoaError.errorWithFilePath(path, errno: errNum, reading: true)
} else if numBytesRead == 0 {
// Getting zero here is weird, since it may imply unexpected end of file... If we do, return the number of bytes read so far (which is compatible with the way read() would work with just one call).
break
} else {
// Partial read
numBytesRemaining -= Int(clamping: numBytesRead)
if numBytesRemaining < 0 {
// Just in case; we do not want to have a negative amount of bytes remaining. We will just assume that is the end.
numBytesRemaining = 0
}
localProgress?.completedUnitCount = Int64(length - numBytesRemaining)
// Anytime we read less than actually requested, stop, since the length is considered "max" for socket calls
if numBytesRead < numBytesRequested {

// The `readUntilLength` argument controls if we should end early when `read` returns less than the amount requested, or if we should continue to loop until we have reached `length` bytes.
if !readUntilLength && numBytesRead < numBytesRequested {
break
}

Expand Down
47 changes: 14 additions & 33 deletions Sources/FoundationEssentials/Data/Data+Writing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,43 +43,20 @@ private func openFileDescriptorProtected(path: UnsafePointer<CChar>, flags: Int3
#endif
}

private func preferredChunkSizeForFileDescriptor(_ fd: Int32) -> Int? {
#if canImport(Darwin)
var preferredChunkSize = -1
var fsinfo = statfs()
var fileinfo = stat()
if fstatfs(fd, &fsinfo) != -1 {
if fstat(fd, &fileinfo) != -1 {
preferredChunkSize = Int(fileinfo.st_blksize)
} else {
logFileIOErrno(errno, at: "fstat")
}
} else {
preferredChunkSize = Int(fsinfo.f_iosize)
}

// Make sure we don't return something completely bone-headed, like zero.
if (preferredChunkSize <= 0) {
return nil
}

return preferredChunkSize
#else
return nil
#endif
}

private func writeToFileDescriptorWithProgress(_ fd: Int32, buffer: UnsafeRawBufferPointer, reportProgress: Bool) throws -> Int {
// Fetch this once
let length = buffer.count
var preferredChunkSize = length
let localProgress = (reportProgress && Progress.current() != nil) ? Progress(totalUnitCount: Int64(preferredChunkSize)) : nil

if localProgress != nil {
// To report progress, we have to try writing in smaller chunks than the whole file. Get one that makes sense for this destination.
if let smarterChunkSize = preferredChunkSizeForFileDescriptor(fd) {
preferredChunkSize = smarterChunkSize
}
let preferredChunkSize: Int
let localProgress: Progress?
if reportProgress && Progress.current() != nil {
// To report progress, we have to try writing in smaller chunks than the whole file.
// Aim for about 1% increments in progress updates.
preferredChunkSize = max(length / 100, 1024 * 4)
localProgress = Progress(totalUnitCount: Int64(length))
} else {
preferredChunkSize = length
localProgress = nil
}

var nextRange = buffer.startIndex..<buffer.startIndex.advanced(by: length)
Expand Down Expand Up @@ -118,6 +95,10 @@ private func writeToFileDescriptorWithProgress(_ fd: Int32, buffer: UnsafeRawBuf
break
} else {
numBytesRemaining -= Int(numBytesWritten)
if numBytesRemaining < 0 {
// Just in case, do not allow a negative number of bytes remaining
numBytesRemaining = 0
}
if let localProgress {
localProgress.completedUnitCount = Int64(length - numBytesRemaining)
}
Expand Down
17 changes: 12 additions & 5 deletions Tests/FoundationEssentialsTests/DataIOTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ class DataIOTests : XCTestCase {
URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true).appendingPathComponent("testfile-\(UUID().uuidString)")
}

func generateTestData() -> Data {
// 16 MB file, big enough to trigger things like chunking
let count = 16_777_216

func generateTestData(count: Int = 16_777_216) -> Data {
let memory = malloc(count)!
let ptr = memory.bindMemory(to: UInt8.self, capacity: count)

Expand Down Expand Up @@ -209,10 +206,20 @@ class DataIOTests : XCTestCase {
let size = 0x80010000
let url = testURL()

let data = Data(count: size)
let data = generateTestData(count: size)

try data.write(to: url)
let read = try! Data(contentsOf: url)

// No need to compare the contents, but do compare the size
XCTAssertEqual(data.count, read.count)

#if FOUNDATION_FRAMEWORK
// Try the NSData path
let readNS = try! NSData(contentsOf: url) as Data
XCTAssertEqual(data.count, readNS.count)
#endif

cleanup(at: url)
#endif
}
Expand Down