@@ -14,15 +14,18 @@ import SwiftFormatConfiguration
14
14
import SwiftFormatCore
15
15
import SwiftSyntax
16
16
17
+ private let utf8Newline = UTF8 . CodeUnit ( ascii: " \n " )
18
+ private let utf8Tab = UTF8 . CodeUnit ( ascii: " \t " )
19
+
17
20
/// Emits linter errors for whitespace style violations by comparing the raw text of the input Swift
18
21
/// code with formatted text.
19
22
public class WhitespaceLinter {
20
23
21
24
/// The text of the input source code to be linted.
22
- private let userText : String
25
+ private let userText : [ UTF8 . CodeUnit ]
23
26
24
27
/// The formatted version of `userText`.
25
- private let formattedText : String
28
+ private let formattedText : [ UTF8 . CodeUnit ]
26
29
27
30
/// The Context object containing the DiagnosticEngine.
28
31
private let context : Context
@@ -37,8 +40,8 @@ public class WhitespaceLinter {
37
40
/// - formatted: The formatted text to compare to `user`.
38
41
/// - context: The context object containing the DiagnosticEngine instance we wish to use.
39
42
public init ( user: String , formatted: String , context: Context ) {
40
- self . userText = user
41
- self . formattedText = formatted
43
+ self . userText = Array ( user. utf8 )
44
+ self . formattedText = Array ( formatted. utf8 )
42
45
self . context = context
43
46
self . isLineTooLong = false
44
47
}
@@ -48,7 +51,7 @@ public class WhitespaceLinter {
48
51
var userOffset = 0
49
52
var formOffset = 0
50
53
var isFirstCharater = true
51
- var lastChar : Character ?
54
+ var lastChar : UTF8 . CodeUnit ?
52
55
53
56
repeat {
54
57
let userNext = nextCharacter ( offset: userOffset, data: self . userText)
@@ -90,13 +93,12 @@ public class WhitespaceLinter {
90
93
/// - userWs: The user leading whitespace buffer at the current character.
91
94
/// - formattedWs: The formatted leading whitespace buffer at the current character.
92
95
private func compareWhitespace(
93
- userOffset: Int , formOffset: Int , isFirstCharacter: Bool , userWs: String , formattedWs: String
96
+ userOffset: Int , formOffset: Int , isFirstCharacter: Bool ,
97
+ userWs: [ UTF8 . CodeUnit ] , formattedWs: [ UTF8 . CodeUnit ]
94
98
) {
95
99
// e.g. "\n" -> ["", ""], and "" -> [""]
96
- let userTokens = userWs. split (
97
- separator: " \n " , omittingEmptySubsequences: false ) . map ( String . init)
98
- let formTokens = formattedWs. split (
99
- separator: " \n " , omittingEmptySubsequences: false ) . map ( String . init)
100
+ let userTokens = userWs. split ( separator: utf8Newline, omittingEmptySubsequences: false )
101
+ let formTokens = formattedWs. split ( separator: utf8Newline, omittingEmptySubsequences: false )
100
102
101
103
checkForLineLengthErrors (
102
104
userOffset: userOffset,
@@ -135,7 +137,8 @@ public class WhitespaceLinter {
135
137
/// - user: The tokenized user whitespace buffer.
136
138
/// - form: The tokenized formatted whitespace buffer.
137
139
private func checkForLineLengthErrors(
138
- userOffset: Int , formOffset: Int , isFirstCharacter: Bool , user: [ String ] , form: [ String ]
140
+ userOffset: Int , formOffset: Int , isFirstCharacter: Bool ,
141
+ user: [ ArraySlice < UTF8 . CodeUnit > ] , form: [ ArraySlice < UTF8 . CodeUnit > ]
139
142
) {
140
143
// Only run this check at the start of a line.
141
144
guard
@@ -164,7 +167,7 @@ public class WhitespaceLinter {
164
167
let char = userText [ index]
165
168
166
169
// Count characters up to the newline.
167
- if char == " \n " { break } else { userLength += 1 }
170
+ if char == utf8Newline { break } else { userLength += 1 }
168
171
}
169
172
}
170
173
@@ -188,7 +191,7 @@ public class WhitespaceLinter {
188
191
let char = formattedText [ index]
189
192
190
193
// Count characters up to the newline.
191
- if char == " \n " { break } else { formLength += 1 }
194
+ if char == utf8Newline { break } else { formLength += 1 }
192
195
}
193
196
}
194
197
@@ -218,7 +221,8 @@ public class WhitespaceLinter {
218
221
/// - user: The tokenized user whitespace buffer.
219
222
/// - form: The tokenized formatted whitespace buffer.
220
223
private func checkForIndentationErrors(
221
- userOffset: Int , isFirstCharacter: Bool , user: [ String ] , form: [ String ]
224
+ userOffset: Int , isFirstCharacter: Bool ,
225
+ user: [ ArraySlice < UTF8 . CodeUnit > ] , form: [ ArraySlice < UTF8 . CodeUnit > ]
222
226
) {
223
227
guard form. count > 1 && user. count > 1 else {
224
228
// Ordinarily, we only look for indentation spacing following a newline. The first character
@@ -242,8 +246,8 @@ public class WhitespaceLinter {
242
246
}
243
247
if form. last != user. last {
244
248
let pos = calculatePosition ( offset: userOffset + offset, data: self . userText)
245
- let actual = indentation ( of: user. last ?? " " )
246
- let expected = indentation ( of: form. last ?? " " )
249
+ let actual = indentation ( of: user. last ?? [ ] )
250
+ let expected = indentation ( of: form. last ?? [ ] )
247
251
diagnose (
248
252
. indentationError( expected: expected, actual: actual) ,
249
253
line: pos. line,
@@ -258,7 +262,9 @@ public class WhitespaceLinter {
258
262
/// - userOffset: The current character offset within the user text.
259
263
/// - user: The tokenized user whitespace buffer.
260
264
/// - form: The tokenized formatted whitespace buffer.
261
- private func checkForTrailingWhitespaceErrors( userOffset: Int , user: [ String ] , form: [ String ] ) {
265
+ private func checkForTrailingWhitespaceErrors(
266
+ userOffset: Int , user: [ ArraySlice < UTF8 . CodeUnit > ] , form: [ ArraySlice < UTF8 . CodeUnit > ]
267
+ ) {
262
268
guard form. count > 1 && user. count > 1 else { return }
263
269
var offset = 0
264
270
for i in 0 ..< ( user. count - 1 ) {
@@ -282,14 +288,15 @@ public class WhitespaceLinter {
282
288
/// - user: The tokenized user whitespace buffer.
283
289
/// - form: The tokenized formatted whitespace buffer.
284
290
private func checkForSpacingErrors(
285
- userOffset: Int , isFirstCharacter: Bool , user: [ String ] , form: [ String ]
291
+ userOffset: Int , isFirstCharacter: Bool ,
292
+ user: [ ArraySlice < UTF8 . CodeUnit > ] , form: [ ArraySlice < UTF8 . CodeUnit > ]
286
293
) {
287
294
// The spaces in front of the first character of a file are indentation and not spacing related.
288
295
guard form. count == 1 && user. count == 1 && !isFirstCharacter else { return }
289
296
guard form [ 0 ] != user [ 0 ] else { return }
290
297
291
298
let pos = calculatePosition ( offset: userOffset, data: self . userText)
292
- let illegalSpacingCharacters = [ " \t " ]
299
+ let illegalSpacingCharacters : [ UTF8 . CodeUnit ] = [ utf8Tab ]
293
300
if illegalSpacingCharacters. contains ( where: { user [ 0 ] . contains ( $0) } ) {
294
301
diagnose ( . spacingCharError, line: pos. line, column: pos. column, utf8Offset: 0 )
295
302
} else if form [ 0 ] . count != user [ 0 ] . count {
@@ -315,7 +322,9 @@ public class WhitespaceLinter {
315
322
/// - userOffset: The current character offset within the user text.
316
323
/// - user: The tokenized user whitespace buffer.
317
324
/// - form: The tokenized formatted whitespace buffer.
318
- private func checkForRemoveLineErrors( userOffset: Int , user: [ String ] , form: [ String ] ) {
325
+ private func checkForRemoveLineErrors(
326
+ userOffset: Int , user: [ ArraySlice < UTF8 . CodeUnit > ] , form: [ ArraySlice < UTF8 . CodeUnit > ]
327
+ ) {
319
328
guard form. count < user. count else { return }
320
329
var offset = 0
321
330
for i in 0 ..< ( user. count - form. count) {
@@ -343,7 +352,9 @@ public class WhitespaceLinter {
343
352
/// - userOffset: The current character offset within the user text.
344
353
/// - user: The tokenized user whitespace buffer.
345
354
/// - form: The tokenized formatted whitespace buffer.
346
- private func checkForAddLineErrors( userOffset: Int , user: [ String ] , form: [ String ] ) {
355
+ private func checkForAddLineErrors(
356
+ userOffset: Int , user: [ ArraySlice < UTF8 . CodeUnit > ] , form: [ ArraySlice < UTF8 . CodeUnit > ]
357
+ ) {
347
358
guard form. count > user. count && !isLineTooLong else { return }
348
359
let pos = calculatePosition ( offset: userOffset, data: self . userText)
349
360
diagnose (
@@ -363,17 +374,17 @@ public class WhitespaceLinter {
363
374
/// - data: The input string.
364
375
/// - Returns a tuple of the new offset, the non-whitespace character we landed on, and a string
365
376
/// containing the leading whitespace.
366
- private func nextCharacter( offset: Int , data: String )
367
- -> ( offset: Int , char: Character ? , whitespace: String )
377
+ private func nextCharacter( offset: Int , data: [ UTF8 . CodeUnit ] )
378
+ -> ( offset: Int , char: UTF8 . CodeUnit ? , whitespace: [ UTF8 . CodeUnit ] )
368
379
{
369
- var whitespaceBuffer = " "
380
+ var whitespaceBuffer = [ UTF8 . CodeUnit ] ( )
370
381
371
382
for i in offset..< data. count {
372
383
let index = data. index ( data. startIndex, offsetBy: i)
373
384
let char = data [ index]
374
385
375
- if char. isWhitespace {
376
- whitespaceBuffer += String ( char)
386
+ if UnicodeScalar ( char) . properties . isWhitespace {
387
+ whitespaceBuffer. append ( char)
377
388
} else {
378
389
return ( offset: i, char: char, whitespace: whitespaceBuffer)
379
390
}
@@ -387,12 +398,12 @@ public class WhitespaceLinter {
387
398
/// - offset: The printable character offset.
388
399
/// - data: The input string for which we want the line and column numbers.
389
400
/// - Returns a tuple with the line and column numbers within `data`.
390
- private func calculatePosition( offset: Int , data: String ) -> ( line: Int , column: Int ) {
401
+ private func calculatePosition( offset: Int , data: [ UTF8 . CodeUnit ] ) -> ( line: Int , column: Int ) {
391
402
var line = 1
392
403
var column = 0
393
404
394
405
for (index, char) in data. enumerated ( ) {
395
- if char == " \n " {
406
+ if char == utf8Newline {
396
407
line += 1
397
408
column = 0
398
409
} else {
@@ -432,15 +443,15 @@ public class WhitespaceLinter {
432
443
/// leading spacing for a line.
433
444
///
434
445
/// A return value of nil indicates that there was no indentation.
435
- private func indentation( of whitespace: String ) -> WhitespaceIndentation {
446
+ private func indentation( of whitespace: ArraySlice < UTF8 . CodeUnit > ) -> WhitespaceIndentation {
436
447
if whitespace. count == 0 {
437
448
return . none
438
449
}
439
450
440
- var orderedRuns : [ ( char: Character , count: Int ) ] = [ ]
451
+ var orderedRuns : [ ( char: UTF8 . CodeUnit , count: Int ) ] = [ ]
441
452
for char in whitespace {
442
453
// Any non-whitespace character indicates the end of the indentation whitespace.
443
- guard char. isWhitespace else { break }
454
+ guard UnicodeScalar ( char) . properties . isWhitespace else { break }
444
455
445
456
let lastRun = orderedRuns. last
446
457
if lastRun? . char == char {
@@ -452,7 +463,7 @@ public class WhitespaceLinter {
452
463
453
464
let indents = orderedRuns. map { run in
454
465
// Assumes any non-tab whitespace character is some type of space.
455
- return run. char == " \t " ? Indent . tabs ( run. count) : Indent . spaces ( run. count)
466
+ return run. char == utf8Tab ? Indent . tabs ( run. count) : Indent . spaces ( run. count)
456
467
}
457
468
if indents. count == 1 , let onlyIndent = indents. first {
458
469
return . homogeneous( onlyIndent)
0 commit comments