@@ -73,4 +73,212 @@ public extension String {
73
73
public mutating func shellEscape( ) {
74
74
self = shellEscaped ( )
75
75
}
76
+
77
+ /// Creates a string that contains only value C99 characters.
78
+ /// FIXME: We will likely want a better name...
79
+ public func mangledForC99( ) -> String {
80
+ // Map invalid C99-invalid Unicode scalars to a replacement character.
81
+ let replacementUnichar : UnicodeScalar = " _ "
82
+ var mangledUnichars : [ UnicodeScalar ] = self . unicodeScalars. map {
83
+ switch $0. value {
84
+ case
85
+ // A-Z
86
+ 0x0041 ... 0x005A ,
87
+ // a-z
88
+ 0x0061 ... 0x007A ,
89
+ // 0-9
90
+ 0x0030 ... 0x0039 ,
91
+ // _
92
+ 0x005F ,
93
+ // Latin (1)
94
+ 0x00AA ... 0x00AA ,
95
+ // Special characters (1)
96
+ 0x00B5 ... 0x00B5 , 0x00B7 ... 0x00B7 ,
97
+ // Latin (2)
98
+ 0x00BA ... 0x00BA , 0x00C0 ... 0x00D6 , 0x00D8 ... 0x00F6 ,
99
+ 0x00F8 ... 0x01F5 , 0x01FA ... 0x0217 , 0x0250 ... 0x02A8 ,
100
+ // Special characters (2)
101
+ 0x02B0 ... 0x02B8 , 0x02BB ... 0x02BB , 0x02BD ... 0x02C1 ,
102
+ 0x02D0 ... 0x02D1 , 0x02E0 ... 0x02E4 , 0x037A ... 0x037A ,
103
+ // Greek (1)
104
+ 0x0386 ... 0x0386 , 0x0388 ... 0x038A , 0x038C ... 0x038C ,
105
+ 0x038E ... 0x03A1 , 0x03A3 ... 0x03CE , 0x03D0 ... 0x03D6 ,
106
+ 0x03DA ... 0x03DA , 0x03DC ... 0x03DC , 0x03DE ... 0x03DE ,
107
+ 0x03E0 ... 0x03E0 , 0x03E2 ... 0x03F3 ,
108
+ // Cyrillic
109
+ 0x0401 ... 0x040C , 0x040E ... 0x044F , 0x0451 ... 0x045C ,
110
+ 0x045E ... 0x0481 , 0x0490 ... 0x04C4 , 0x04C7 ... 0x04C8 ,
111
+ 0x04CB ... 0x04CC , 0x04D0 ... 0x04EB , 0x04EE ... 0x04F5 ,
112
+ 0x04F8 ... 0x04F9 ,
113
+ // Armenian (1)
114
+ 0x0531 ... 0x0556 ,
115
+ // Special characters (3)
116
+ 0x0559 ... 0x0559 ,
117
+ // Armenian (2)
118
+ 0x0561 ... 0x0587 ,
119
+ // Hebrew
120
+ 0x05B0 ... 0x05B9 , 0x05BB ... 0x05BD , 0x05BF ... 0x05BF ,
121
+ 0x05C1 ... 0x05C2 , 0x05D0 ... 0x05EA , 0x05F0 ... 0x05F2 ,
122
+ // Arabic (1)
123
+ 0x0621 ... 0x063A , 0x0640 ... 0x0652 ,
124
+ // Digits (1)
125
+ 0x0660 ... 0x0669 ,
126
+ // Arabic (2)
127
+ 0x0670 ... 0x06B7 , 0x06BA ... 0x06BE , 0x06C0 ... 0x06CE ,
128
+ 0x06D0 ... 0x06DC , 0x06E5 ... 0x06E8 , 0x06EA ... 0x06ED ,
129
+ // Digits (2)
130
+ 0x06F0 ... 0x06F9 ,
131
+ // Devanagari and Special character 0x093D.
132
+ 0x0901 ... 0x0903 , 0x0905 ... 0x0939 , 0x093D ... 0x094D ,
133
+ 0x0950 ... 0x0952 , 0x0958 ... 0x0963 ,
134
+ // Digits (3)
135
+ 0x0966 ... 0x096F ,
136
+ // Bengali (1)
137
+ 0x0981 ... 0x0983 , 0x0985 ... 0x098C , 0x098F ... 0x0990 ,
138
+ 0x0993 ... 0x09A8 , 0x09AA ... 0x09B0 , 0x09B2 ... 0x09B2 ,
139
+ 0x09B6 ... 0x09B9 , 0x09BE ... 0x09C4 , 0x09C7 ... 0x09C8 ,
140
+ 0x09CB ... 0x09CD , 0x09DC ... 0x09DD , 0x09DF ... 0x09E3 ,
141
+ // Digits (4)
142
+ 0x09E6 ... 0x09EF ,
143
+ // Bengali (2)
144
+ 0x09F0 ... 0x09F1 ,
145
+ // Gurmukhi (1)
146
+ 0x0A02 ... 0x0A02 , 0x0A05 ... 0x0A0A , 0x0A0F ... 0x0A10 ,
147
+ 0x0A13 ... 0x0A28 , 0x0A2A ... 0x0A30 , 0x0A32 ... 0x0A33 ,
148
+ 0x0A35 ... 0x0A36 , 0x0A38 ... 0x0A39 , 0x0A3E ... 0x0A42 ,
149
+ 0x0A47 ... 0x0A48 , 0x0A4B ... 0x0A4D , 0x0A59 ... 0x0A5C ,
150
+ 0x0A5E ... 0x0A5E ,
151
+ // Digits (5)
152
+ 0x0A66 ... 0x0A6F ,
153
+ // Gurmukhi (2)
154
+ 0x0A74 ... 0x0A74 ,
155
+ // Gujarti
156
+ 0x0A81 ... 0x0A83 , 0x0A85 ... 0x0A8B , 0x0A8D ... 0x0A8D ,
157
+ 0x0A8F ... 0x0A91 , 0x0A93 ... 0x0AA8 , 0x0AAA ... 0x0AB0 ,
158
+ 0x0AB2 ... 0x0AB3 , 0x0AB5 ... 0x0AB9 , 0x0ABD ... 0x0AC5 ,
159
+ 0x0AC7 ... 0x0AC9 , 0x0ACB ... 0x0ACD , 0x0AD0 ... 0x0AD0 ,
160
+ 0x0AE0 ... 0x0AE0 ,
161
+ // Digits (6)
162
+ 0x0AE6 ... 0x0AEF ,
163
+ // Oriya and Special character 0x0B3D
164
+ 0x0B01 ... 0x0B03 , 0x0B05 ... 0x0B0C , 0x0B0F ... 0x0B10 ,
165
+ 0x0B13 ... 0x0B28 , 0x0B2A ... 0x0B30 , 0x0B32 ... 0x0B33 ,
166
+ 0x0B36 ... 0x0B39 , 0x0B3D ... 0x0B43 , 0x0B47 ... 0x0B48 ,
167
+ 0x0B4B ... 0x0B4D , 0x0B5C ... 0x0B5D , 0x0B5F ... 0x0B61 ,
168
+ // Digits (7)
169
+ 0x0B66 ... 0x0B6F ,
170
+ // Tamil
171
+ 0x0B82 ... 0x0B83 , 0x0B85 ... 0x0B8A , 0x0B8E ... 0x0B90 ,
172
+ 0x0B92 ... 0x0B95 , 0x0B99 ... 0x0B9A , 0x0B9C ... 0x0B9C ,
173
+ 0x0B9E ... 0x0B9F , 0x0BA3 ... 0x0BA4 , 0x0BA8 ... 0x0BAA ,
174
+ 0x0BAE ... 0x0BB5 , 0x0BB7 ... 0x0BB9 , 0x0BBE ... 0x0BC2 ,
175
+ 0x0BC6 ... 0x0BC8 , 0x0BCA ... 0x0BCD ,
176
+ // Digits (8)
177
+ 0x0BE7 ... 0x0BEF ,
178
+ // Telugu
179
+ 0x0C01 ... 0x0C03 , 0x0C05 ... 0x0C0C , 0x0C0E ... 0x0C10 ,
180
+ 0x0C12 ... 0x0C28 , 0x0C2A ... 0x0C33 , 0x0C35 ... 0x0C39 ,
181
+ 0x0C3E ... 0x0C44 , 0x0C46 ... 0x0C48 , 0x0C4A ... 0x0C4D ,
182
+ 0x0C60 ... 0x0C61 ,
183
+ // Digits (9)
184
+ 0x0C66 ... 0x0C6F ,
185
+ // Kannada
186
+ 0x0C82 ... 0x0C83 , 0x0C85 ... 0x0C8C , 0x0C8E ... 0x0C90 ,
187
+ 0x0C92 ... 0x0CA8 , 0x0CAA ... 0x0CB3 , 0x0CB5 ... 0x0CB9 ,
188
+ 0x0CBE ... 0x0CC4 , 0x0CC6 ... 0x0CC8 , 0x0CCA ... 0x0CCD ,
189
+ 0x0CDE ... 0x0CDE , 0x0CE0 ... 0x0CE1 ,
190
+ // Digits (10)
191
+ 0x0CE6 ... 0x0CEF ,
192
+ // Malayam
193
+ 0x0D02 ... 0x0D03 , 0x0D05 ... 0x0D0C , 0x0D0E ... 0x0D10 ,
194
+ 0x0D12 ... 0x0D28 , 0x0D2A ... 0x0D39 , 0x0D3E ... 0x0D43 ,
195
+ 0x0D46 ... 0x0D48 , 0x0D4A ... 0x0D4D , 0x0D60 ... 0x0D61 ,
196
+ // Digits (11)
197
+ 0x0D66 ... 0x0D6F ,
198
+ // Thai...including Digits 0x0E50...0x0E59 }
199
+ 0x0E01 ... 0x0E3A , 0x0E40 ... 0x0E5B ,
200
+ // Lao (1)
201
+ 0x0E81 ... 0x0E82 , 0x0E84 ... 0x0E84 , 0x0E87 ... 0x0E88 ,
202
+ 0x0E8A ... 0x0E8A , 0x0E8D ... 0x0E8D , 0x0E94 ... 0x0E97 ,
203
+ 0x0E99 ... 0x0E9F , 0x0EA1 ... 0x0EA3 , 0x0EA5 ... 0x0EA5 ,
204
+ 0x0EA7 ... 0x0EA7 , 0x0EAA ... 0x0EAB , 0x0EAD ... 0x0EAE ,
205
+ 0x0EB0 ... 0x0EB9 , 0x0EBB ... 0x0EBD , 0x0EC0 ... 0x0EC4 ,
206
+ 0x0EC6 ... 0x0EC6 , 0x0EC8 ... 0x0ECD ,
207
+ // Digits (12)
208
+ 0x0ED0 ... 0x0ED9 ,
209
+ // Lao (2)
210
+ 0x0EDC ... 0x0EDD ,
211
+ // Tibetan (1)
212
+ 0x0F00 ... 0x0F00 , 0x0F18 ... 0x0F19 ,
213
+ // Digits (13)
214
+ 0x0F20 ... 0x0F33 ,
215
+ // Tibetan (2)
216
+ 0x0F35 ... 0x0F35 , 0x0F37 ... 0x0F37 , 0x0F39 ... 0x0F39 ,
217
+ 0x0F3E ... 0x0F47 , 0x0F49 ... 0x0F69 , 0x0F71 ... 0x0F84 ,
218
+ 0x0F86 ... 0x0F8B , 0x0F90 ... 0x0F95 , 0x0F97 ... 0x0F97 ,
219
+ 0x0F99 ... 0x0FAD , 0x0FB1 ... 0x0FB7 , 0x0FB9 ... 0x0FB9 ,
220
+ // Georgian
221
+ 0x10A0 ... 0x10C5 , 0x10D0 ... 0x10F6 ,
222
+ // Latin (3)
223
+ 0x1E00 ... 0x1E9B , 0x1EA0 ... 0x1EF9 ,
224
+ // Greek (2)
225
+ 0x1F00 ... 0x1F15 , 0x1F18 ... 0x1F1D , 0x1F20 ... 0x1F45 ,
226
+ 0x1F48 ... 0x1F4D , 0x1F50 ... 0x1F57 , 0x1F59 ... 0x1F59 ,
227
+ 0x1F5B ... 0x1F5B , 0x1F5D ... 0x1F5D , 0x1F5F ... 0x1F7D ,
228
+ 0x1F80 ... 0x1FB4 , 0x1FB6 ... 0x1FBC ,
229
+ // Special characters (4)
230
+ 0x1FBE ... 0x1FBE ,
231
+ // Greek (3)
232
+ 0x1FC2 ... 0x1FC4 , 0x1FC6 ... 0x1FCC , 0x1FD0 ... 0x1FD3 ,
233
+ 0x1FD6 ... 0x1FDB , 0x1FE0 ... 0x1FEC , 0x1FF2 ... 0x1FF4 ,
234
+ 0x1FF6 ... 0x1FFC ,
235
+ // Special characters (5)
236
+ 0x203F ... 0x2040 ,
237
+ // Latin (4)
238
+ 0x207F ... 0x207F ,
239
+ // Special characters (6)
240
+ 0x2102 ... 0x2102 , 0x2107 ... 0x2107 , 0x210A ... 0x2113 ,
241
+ 0x2115 ... 0x2115 , 0x2118 ... 0x211D , 0x2124 ... 0x2124 ,
242
+ 0x2126 ... 0x2126 , 0x2128 ... 0x2128 , 0x212A ... 0x2131 ,
243
+ 0x2133 ... 0x2138 , 0x2160 ... 0x2182 , 0x3005 ... 0x3007 ,
244
+ 0x3021 ... 0x3029 ,
245
+ // Hiragana
246
+ 0x3041 ... 0x3093 , 0x309B ... 0x309C ,
247
+ // Katakana
248
+ 0x30A1 ... 0x30F6 , 0x30FB ... 0x30FC ,
249
+ // Bopmofo [sic]
250
+ 0x3105 ... 0x312C ,
251
+ // CJK Unified Ideographs
252
+ 0x4E00 ... 0x9FA5 ,
253
+ // Hangul,
254
+ 0xAC00 ... 0xD7A3 :
255
+ return $0
256
+ default :
257
+ return replacementUnichar
258
+ }
259
+ }
260
+ // Further, apply further restrictions to the prefix.
261
+ loop: for (idx, c) in mangledUnichars. enumerated ( ) {
262
+ switch c. value {
263
+ case
264
+ // 0-9
265
+ 0x0030 ... 0x0039 ,
266
+ // Annex D.
267
+ 0x0660 ... 0x0669 , 0x06F0 ... 0x06F9 , 0x0966 ... 0x096F ,
268
+ 0x09E6 ... 0x09EF , 0x0A66 ... 0x0A6F , 0x0AE6 ... 0x0AEF ,
269
+ 0x0B66 ... 0x0B6F , 0x0BE7 ... 0x0BEF , 0x0C66 ... 0x0C6F ,
270
+ 0x0CE6 ... 0x0CEF , 0x0D66 ... 0x0D6F , 0x0E50 ... 0x0E59 ,
271
+ 0x0ED0 ... 0x0ED9 , 0x0F20 ... 0x0F33 :
272
+ mangledUnichars [ idx] = replacementUnichar
273
+ default :
274
+ break loop
275
+ }
276
+ }
277
+
278
+ // Combine the characters as a string again and return it.
279
+ // FIXME: We should only construct a new string if anything changed.
280
+ // FIXME: There doesn't seem to be a way to create a string from an
281
+ // array of Unicode scalars; but there must be a better way.
282
+ return mangledUnichars. reduce ( " " ) { $0 + String( $1) }
283
+ }
76
284
}
0 commit comments