Skip to content

Commit 764aef2

Browse files
committed
Move the details of C99 identifier mangling out of ProjectModel and
down into `Basic`. It looks as if at some point this was meant to be able to fail because it might remove all the characters and end up with an empty string, but now that it replaces characters it doesn't seem possible for it to end up with an empty string assuming it was given a non-empty string.
1 parent 470575c commit 764aef2

File tree

7 files changed

+234
-272
lines changed

7 files changed

+234
-272
lines changed

Sources/Basic/StringConversions.swift

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,212 @@ public extension String {
7373
public mutating func shellEscape() {
7474
self = shellEscaped()
7575
}
76+
77+
/// Creates a string that contains only value C99 characters.
78+
/// FIXME: We will likely want a better name...
79+
public func mangledForC99() -> String {
80+
// Map invalid C99-invalid Unicode scalars to a replacement character.
81+
let replacementUnichar: UnicodeScalar = "_"
82+
var mangledUnichars: [UnicodeScalar] = self.unicodeScalars.map {
83+
switch $0.value {
84+
case
85+
// A-Z
86+
0x0041...0x005A,
87+
// a-z
88+
0x0061...0x007A,
89+
// 0-9
90+
0x0030...0x0039,
91+
// _
92+
0x005F,
93+
// Latin (1)
94+
0x00AA...0x00AA,
95+
// Special characters (1)
96+
0x00B5...0x00B5, 0x00B7...0x00B7,
97+
// Latin (2)
98+
0x00BA...0x00BA, 0x00C0...0x00D6, 0x00D8...0x00F6,
99+
0x00F8...0x01F5, 0x01FA...0x0217, 0x0250...0x02A8,
100+
// Special characters (2)
101+
0x02B0...0x02B8, 0x02BB...0x02BB, 0x02BD...0x02C1,
102+
0x02D0...0x02D1, 0x02E0...0x02E4, 0x037A...0x037A,
103+
// Greek (1)
104+
0x0386...0x0386, 0x0388...0x038A, 0x038C...0x038C,
105+
0x038E...0x03A1, 0x03A3...0x03CE, 0x03D0...0x03D6,
106+
0x03DA...0x03DA, 0x03DC...0x03DC, 0x03DE...0x03DE,
107+
0x03E0...0x03E0, 0x03E2...0x03F3,
108+
// Cyrillic
109+
0x0401...0x040C, 0x040E...0x044F, 0x0451...0x045C,
110+
0x045E...0x0481, 0x0490...0x04C4, 0x04C7...0x04C8,
111+
0x04CB...0x04CC, 0x04D0...0x04EB, 0x04EE...0x04F5,
112+
0x04F8...0x04F9,
113+
// Armenian (1)
114+
0x0531...0x0556,
115+
// Special characters (3)
116+
0x0559...0x0559,
117+
// Armenian (2)
118+
0x0561...0x0587,
119+
// Hebrew
120+
0x05B0...0x05B9, 0x05BB...0x05BD, 0x05BF...0x05BF,
121+
0x05C1...0x05C2, 0x05D0...0x05EA, 0x05F0...0x05F2,
122+
// Arabic (1)
123+
0x0621...0x063A, 0x0640...0x0652,
124+
// Digits (1)
125+
0x0660...0x0669,
126+
// Arabic (2)
127+
0x0670...0x06B7, 0x06BA...0x06BE, 0x06C0...0x06CE,
128+
0x06D0...0x06DC, 0x06E5...0x06E8, 0x06EA...0x06ED,
129+
// Digits (2)
130+
0x06F0...0x06F9,
131+
// Devanagari and Special character 0x093D.
132+
0x0901...0x0903, 0x0905...0x0939, 0x093D...0x094D,
133+
0x0950...0x0952, 0x0958...0x0963,
134+
// Digits (3)
135+
0x0966...0x096F,
136+
// Bengali (1)
137+
0x0981...0x0983, 0x0985...0x098C, 0x098F...0x0990,
138+
0x0993...0x09A8, 0x09AA...0x09B0, 0x09B2...0x09B2,
139+
0x09B6...0x09B9, 0x09BE...0x09C4, 0x09C7...0x09C8,
140+
0x09CB...0x09CD, 0x09DC...0x09DD, 0x09DF...0x09E3,
141+
// Digits (4)
142+
0x09E6...0x09EF,
143+
// Bengali (2)
144+
0x09F0...0x09F1,
145+
// Gurmukhi (1)
146+
0x0A02...0x0A02, 0x0A05...0x0A0A, 0x0A0F...0x0A10,
147+
0x0A13...0x0A28, 0x0A2A...0x0A30, 0x0A32...0x0A33,
148+
0x0A35...0x0A36, 0x0A38...0x0A39, 0x0A3E...0x0A42,
149+
0x0A47...0x0A48, 0x0A4B...0x0A4D, 0x0A59...0x0A5C,
150+
0x0A5E...0x0A5E,
151+
// Digits (5)
152+
0x0A66...0x0A6F,
153+
// Gurmukhi (2)
154+
0x0A74...0x0A74,
155+
// Gujarti
156+
0x0A81...0x0A83, 0x0A85...0x0A8B, 0x0A8D...0x0A8D,
157+
0x0A8F...0x0A91, 0x0A93...0x0AA8, 0x0AAA...0x0AB0,
158+
0x0AB2...0x0AB3, 0x0AB5...0x0AB9, 0x0ABD...0x0AC5,
159+
0x0AC7...0x0AC9, 0x0ACB...0x0ACD, 0x0AD0...0x0AD0,
160+
0x0AE0...0x0AE0,
161+
// Digits (6)
162+
0x0AE6...0x0AEF,
163+
// Oriya and Special character 0x0B3D
164+
0x0B01...0x0B03, 0x0B05...0x0B0C, 0x0B0F...0x0B10,
165+
0x0B13...0x0B28, 0x0B2A...0x0B30, 0x0B32...0x0B33,
166+
0x0B36...0x0B39, 0x0B3D...0x0B43, 0x0B47...0x0B48,
167+
0x0B4B...0x0B4D, 0x0B5C...0x0B5D, 0x0B5F...0x0B61,
168+
// Digits (7)
169+
0x0B66...0x0B6F,
170+
// Tamil
171+
0x0B82...0x0B83, 0x0B85...0x0B8A, 0x0B8E...0x0B90,
172+
0x0B92...0x0B95, 0x0B99...0x0B9A, 0x0B9C...0x0B9C,
173+
0x0B9E...0x0B9F, 0x0BA3...0x0BA4, 0x0BA8...0x0BAA,
174+
0x0BAE...0x0BB5, 0x0BB7...0x0BB9, 0x0BBE...0x0BC2,
175+
0x0BC6...0x0BC8, 0x0BCA...0x0BCD,
176+
// Digits (8)
177+
0x0BE7...0x0BEF,
178+
// Telugu
179+
0x0C01...0x0C03, 0x0C05...0x0C0C, 0x0C0E...0x0C10,
180+
0x0C12...0x0C28, 0x0C2A...0x0C33, 0x0C35...0x0C39,
181+
0x0C3E...0x0C44, 0x0C46...0x0C48, 0x0C4A...0x0C4D,
182+
0x0C60...0x0C61,
183+
// Digits (9)
184+
0x0C66...0x0C6F,
185+
// Kannada
186+
0x0C82...0x0C83, 0x0C85...0x0C8C, 0x0C8E...0x0C90,
187+
0x0C92...0x0CA8, 0x0CAA...0x0CB3, 0x0CB5...0x0CB9,
188+
0x0CBE...0x0CC4, 0x0CC6...0x0CC8, 0x0CCA...0x0CCD,
189+
0x0CDE...0x0CDE, 0x0CE0...0x0CE1,
190+
// Digits (10)
191+
0x0CE6...0x0CEF,
192+
// Malayam
193+
0x0D02...0x0D03, 0x0D05...0x0D0C, 0x0D0E...0x0D10,
194+
0x0D12...0x0D28, 0x0D2A...0x0D39, 0x0D3E...0x0D43,
195+
0x0D46...0x0D48, 0x0D4A...0x0D4D, 0x0D60...0x0D61,
196+
// Digits (11)
197+
0x0D66...0x0D6F,
198+
// Thai...including Digits 0x0E50...0x0E59 }
199+
0x0E01...0x0E3A, 0x0E40...0x0E5B,
200+
// Lao (1)
201+
0x0E81...0x0E82, 0x0E84...0x0E84, 0x0E87...0x0E88,
202+
0x0E8A...0x0E8A, 0x0E8D...0x0E8D, 0x0E94...0x0E97,
203+
0x0E99...0x0E9F, 0x0EA1...0x0EA3, 0x0EA5...0x0EA5,
204+
0x0EA7...0x0EA7, 0x0EAA...0x0EAB, 0x0EAD...0x0EAE,
205+
0x0EB0...0x0EB9, 0x0EBB...0x0EBD, 0x0EC0...0x0EC4,
206+
0x0EC6...0x0EC6, 0x0EC8...0x0ECD,
207+
// Digits (12)
208+
0x0ED0...0x0ED9,
209+
// Lao (2)
210+
0x0EDC...0x0EDD,
211+
// Tibetan (1)
212+
0x0F00...0x0F00, 0x0F18...0x0F19,
213+
// Digits (13)
214+
0x0F20...0x0F33,
215+
// Tibetan (2)
216+
0x0F35...0x0F35, 0x0F37...0x0F37, 0x0F39...0x0F39,
217+
0x0F3E...0x0F47, 0x0F49...0x0F69, 0x0F71...0x0F84,
218+
0x0F86...0x0F8B, 0x0F90...0x0F95, 0x0F97...0x0F97,
219+
0x0F99...0x0FAD, 0x0FB1...0x0FB7, 0x0FB9...0x0FB9,
220+
// Georgian
221+
0x10A0...0x10C5, 0x10D0...0x10F6,
222+
// Latin (3)
223+
0x1E00...0x1E9B, 0x1EA0...0x1EF9,
224+
// Greek (2)
225+
0x1F00...0x1F15, 0x1F18...0x1F1D, 0x1F20...0x1F45,
226+
0x1F48...0x1F4D, 0x1F50...0x1F57, 0x1F59...0x1F59,
227+
0x1F5B...0x1F5B, 0x1F5D...0x1F5D, 0x1F5F...0x1F7D,
228+
0x1F80...0x1FB4, 0x1FB6...0x1FBC,
229+
// Special characters (4)
230+
0x1FBE...0x1FBE,
231+
// Greek (3)
232+
0x1FC2...0x1FC4, 0x1FC6...0x1FCC, 0x1FD0...0x1FD3,
233+
0x1FD6...0x1FDB, 0x1FE0...0x1FEC, 0x1FF2...0x1FF4,
234+
0x1FF6...0x1FFC,
235+
// Special characters (5)
236+
0x203F...0x2040,
237+
// Latin (4)
238+
0x207F...0x207F,
239+
// Special characters (6)
240+
0x2102...0x2102, 0x2107...0x2107, 0x210A...0x2113,
241+
0x2115...0x2115, 0x2118...0x211D, 0x2124...0x2124,
242+
0x2126...0x2126, 0x2128...0x2128, 0x212A...0x2131,
243+
0x2133...0x2138, 0x2160...0x2182, 0x3005...0x3007,
244+
0x3021...0x3029,
245+
// Hiragana
246+
0x3041...0x3093, 0x309B...0x309C,
247+
// Katakana
248+
0x30A1...0x30F6, 0x30FB...0x30FC,
249+
// Bopmofo [sic]
250+
0x3105...0x312C,
251+
// CJK Unified Ideographs
252+
0x4E00...0x9FA5,
253+
// Hangul,
254+
0xAC00...0xD7A3:
255+
return $0
256+
default:
257+
return replacementUnichar
258+
}
259+
}
260+
// Further, apply further restrictions to the prefix.
261+
loop: for (idx, c) in mangledUnichars.enumerated() {
262+
switch c.value {
263+
case
264+
// 0-9
265+
0x0030...0x0039,
266+
// Annex D.
267+
0x0660...0x0669, 0x06F0...0x06F9, 0x0966...0x096F,
268+
0x09E6...0x09EF, 0x0A66...0x0A6F, 0x0AE6...0x0AEF,
269+
0x0B66...0x0B6F, 0x0BE7...0x0BEF, 0x0C66...0x0C6F,
270+
0x0CE6...0x0CEF, 0x0D66...0x0D6F, 0x0E50...0x0E59,
271+
0x0ED0...0x0ED9, 0x0F20...0x0F33:
272+
mangledUnichars[idx] = replacementUnichar
273+
default:
274+
break loop
275+
}
276+
}
277+
278+
// Combine the characters as a string again and return it.
279+
// FIXME: We should only construct a new string if anything changed.
280+
// FIXME: There doesn't seem to be a way to create a string from an
281+
// array of Unicode scalars; but there must be a better way.
282+
return mangledUnichars.reduce(""){ $0 + String($1) }
283+
}
76284
}

Sources/Commands/init.swift

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,8 @@ final class InitPackage {
5555

5656
init(mode: InitMode) throws {
5757
self.mode = mode
58-
pkgname = rootd.basename
59-
// Also validates that the name is valid.
60-
moduleName = try c99name(name: rootd.basename)
58+
self.pkgname = rootd.basename
59+
self.moduleName = rootd.basename.mangledForC99()
6160
}
6261

6362
func writePackageStructure() throws {

Sources/PackageModel/Module.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public class Module: ModuleProtocol {
6363
self.type = type
6464
self.sources = sources
6565
self.dependencies = []
66-
self.c99name = try PackageModel.c99name(name: self.name)
66+
self.c99name = self.name.mangledForC99()
6767
self.isTest = isTest
6868
}
6969

0 commit comments

Comments
 (0)