Skip to content

Commit 5029bab

Browse files
committed
Use more optimal perfect hash table
1 parent 9e6e4e8 commit 5029bab

File tree

1 file changed

+112
-123
lines changed

1 file changed

+112
-123
lines changed

ext/mbstring/libmbfl/mbfl/mbfl_encoding.c

Lines changed: 112 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -146,179 +146,168 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
146146

147147
/* The following perfect hashing table was amended from gperf, and hashing code was generated using gperf.
148148
* The table was amended to refer to the table above such that it is lighter for the data cache.
149-
* Command used: gperf encodings.txt --readonly-tables --null-strings --ignore-case
150-
* The encodings.txt contains all the contents of the name fields of the mbfl_encoding_ptr_list table. */
149+
* You can use the generate_name_perfect_hash_table.php script to help generate the necessary lookup tables. */
151150

152-
static const int8_t mbfl_encoding_ptr_list_after_hashing[231] = {
153-
-1, -1,
154-
61,
151+
static const int8_t mbfl_encoding_ptr_list_after_hashing[] = {
152+
-1, -1, -1,
155153
66,
154+
-1, -1, -1, -1,
155+
-1, -1, -1, -1,
156+
9,
157+
-1, -1, -1, -1,
158+
-1,
156159
23,
157-
73,
160+
18,
158161
59,
159-
-1,
160-
1,
162+
78,
161163
-1, -1, -1,
162-
11,
164+
39,
165+
73,
166+
71,
167+
40,
163168
-1,
164-
5,
165-
9,
169+
75,
166170
-1,
167-
10,
168-
38,
171+
67,
172+
35,
169173
-1,
174+
76,
175+
1,
176+
61,
177+
46,
178+
-1,
179+
21,
180+
-1,
181+
11,
182+
-1, -1,
170183
52,
171184
54,
185+
64,
186+
12,
187+
10,
172188
-1,
173-
2,
174-
40,
175-
46,
176-
27,
177-
76,
178-
26,
179-
-1,
189+
3,
180190
49,
181191
57,
192+
69,
193+
70,
194+
-1, -1,
195+
74,
196+
50,
197+
58,
198+
63,
199+
65,
182200
-1,
183-
75,
201+
14,
202+
30,
203+
53,
204+
5,
205+
-1, -1, -1,
206+
13,
184207
-1,
208+
48,
209+
56,
210+
60,
211+
-1,
212+
8,
213+
36,
214+
20,
185215
47,
186216
55,
187-
78,
188-
36,
217+
6,
189218
-1,
190-
50,
191-
58,
192-
8,
193-
-1, -1,
194-
69,
195-
39,
196219
7,
197-
-1, -1,
198-
64,
199-
67,
200-
-1, -1,
201-
30,
202-
48,
203-
56,
204-
-1, -1, -1,
205-
35,
206-
74,
207-
-1, -1,
208-
24,
209-
53,
210-
62,
211-
43,
212-
-1, -1,
220+
-1,
221+
27,
213222
45,
214-
22,
215-
-1, -1, -1,
216-
6,
217-
3,
218-
-1, -1, -1,
219-
18,
220-
71,
221-
-1, -1, -1,
222-
21,
223+
26,
224+
31,
223225
-1,
224226
37,
225-
-1,
226-
4,
227-
60,
228-
25,
229-
-1, -1,
230-
72,
227+
17,
228+
24,
231229
51,
232230
-1,
233-
44,
234-
29,
231+
33,
232+
15,
235233
-1,
234+
16,
235+
-1,
236+
38,
237+
-1, -1,
236238
28,
237-
0,
239+
34,
238240
-1,
239-
14,
240-
31,
241-
63,
242-
12,
241+
72,
242+
-1, -1, -1,
243+
0,
243244
-1,
244-
13,
245-
33,
246-
-1, -1,
247-
68,
248-
-1, -1, -1, -1,
245+
29,
249246
-1, -1,
250-
20,
251-
-1, -1, -1, -1,
252-
-1, -1, -1,
247+
4,
248+
19,
249+
22,
253250
77,
254251
-1, -1, -1, -1,
255-
-1, -1, -1, -1,
256-
65,
257-
-1, -1, -1, -1,
258-
70,
252+
32,
253+
2,
259254
-1, -1, -1, -1,
260255
-1,
261-
41,
256+
43,
262257
-1, -1, -1, -1,
263-
-1,
264-
17,
265-
-1, -1, -1,
258+
-1, -1,
266259
42,
267-
16,
260+
-1,
261+
41,
268262
-1, -1, -1, -1,
269263
-1, -1, -1, -1,
270264
-1, -1, -1, -1,
271-
15,
272265
-1, -1, -1, -1,
273-
34,
266+
-1,
267+
25,
274268
-1, -1, -1, -1,
275269
-1, -1, -1,
276-
32,
277-
-1, -1, -1, -1,
278-
-1, -1, -1, -1,
279-
-1, -1, -1, -1,
280-
-1, -1, -1, -1,
281-
-1, -1, -1, -1,
282-
-1, -1, -1, -1,
283-
-1, -1, -1, -1,
284-
-1, -1, -1, -1,
270+
44,
285271
-1, -1, -1, -1,
286272
-1, -1, -1, -1,
287273
-1, -1, -1, -1,
288274
-1,
289-
19
275+
68,
276+
-1, -1, -1, -1,
277+
-1, -1, -1,
278+
62
290279
};
291280

292281
static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
293282
{
294283
static const unsigned char asso_values[] =
295284
{
296-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
297-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
298-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
299-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
300-
231, 231, 231, 231, 231, 5, 231, 231, 0, 50,
301-
5, 15, 35, 10, 20, 75, 0, 45, 231, 231,
302-
231, 231, 231, 231, 231, 80, 5, 0, 0, 0,
303-
75, 75, 0, 0, 15, 70, 0, 5, 0, 0,
304-
25, 55, 30, 0, 10, 0, 231, 25, 231, 231,
305-
0, 231, 231, 231, 231, 231, 231, 80, 5, 0,
306-
0, 0, 75, 75, 0, 0, 15, 70, 0, 5,
307-
0, 0, 25, 55, 30, 0, 10, 0, 231, 25,
308-
231, 231, 0, 231, 231, 231, 231, 231, 231, 231,
309-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
310-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
311-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
312-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
313-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
314-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
315-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
316-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
317-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
318-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
319-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
320-
231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
321-
231, 231, 231, 231, 231, 231
285+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
286+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
287+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
288+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
289+
189, 189, 189, 189, 189, 7, 189, 189, 7, 49,
290+
0, 42, 35, 14, 21, 56, 7, 28, 189, 189,
291+
189, 189, 189, 189, 189, 35, 7, 0, 0, 14,
292+
0, 21, 28, 0, 84, 14, 0, 56, 0, 0,
293+
7, 0, 14, 7, 56, 0, 189, 56, 189, 189,
294+
7, 189, 189, 189, 189, 189, 189, 35, 7, 0,
295+
0, 14, 0, 21, 28, 0, 84, 14, 0, 56,
296+
0, 0, 7, 0, 14, 7, 56, 0, 189, 56,
297+
189, 189, 7, 189, 189, 189, 189, 189, 189, 189,
298+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
299+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
300+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
301+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
302+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
303+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
304+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
305+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
306+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
307+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
308+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
309+
189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
310+
189, 189, 189, 189, 189, 189
322311
};
323312
unsigned int hval = len;
324313

@@ -377,7 +366,7 @@ const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
377366
/* Use perfect hash lookup for name */
378367
if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
379368
unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
380-
if (key <= sizeof(mbfl_encoding_ptr_list_after_hashing)) {
369+
if (key < sizeof(mbfl_encoding_ptr_list_after_hashing) / sizeof(mbfl_encoding_ptr_list_after_hashing[0])) {
381370
int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
382371
if (offset >= 0) {
383372
encoding = mbfl_encoding_ptr_list + offset;

0 commit comments

Comments
 (0)