Skip to content

Commit 3bf4bcc

Browse files
committed
Update IR
IR commit: 1d3df9f7dd82fe49001e714a4c31962387b526f6
1 parent 8793f99 commit 3bf4bcc

File tree

7 files changed

+120
-46
lines changed

7 files changed

+120
-46
lines changed

ext/opcache/jit/ir/gen_ir_fold_hash.c

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
#define MAX_RULES 2048
1616
#define MAX_SLOTS (MAX_RULES * 4)
1717

18+
#define USE_SEMI_PERFECT_HASH 1
19+
#define USE_SHL_HASH 1
20+
#define USE_ROL_HASH 0
21+
1822
static ir_strtab strtab;
1923

2024
void print_hash(uint32_t *mask, uint32_t count)
@@ -28,12 +32,14 @@ void print_hash(uint32_t *mask, uint32_t count)
2832
printf("};\n\n");
2933
}
3034

31-
#if 0
35+
#if USE_SHL_HASH
3236
static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
3337
{
3438
return ((mask << r1) - mask) << r2;
3539
}
36-
#else
40+
#endif
41+
42+
#if USE_ROL_HASH
3743
#define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
3844
#define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
3945

@@ -50,29 +56,64 @@ int find_hash(uint32_t *mask, uint32_t count)
5056
uint32_t n, r1, r2, i, h;
5157

5258
for (n = (count | 1); n < MAX_SLOTS; n += 2) {
59+
#if USE_SEMI_PERFECT_HASH
60+
int semi_perfect = 0;
61+
#endif
62+
5363
for (r1 = 0; r1 < 31; r1++) {
5464
for (r2 = 0; r2 < 32; r2++) {
55-
#if 0
65+
#if USE_SHL_HASH
5666
memset(hash, 0, n * sizeof(uint32_t));
5767
for (i = 0; i < count; i++) {
5868
h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
59-
if (hash[h]) break; /* collision */
69+
if (hash[h]) {
70+
#if USE_SEMI_PERFECT_HASH
71+
h++;
72+
if (!hash[h]) {
73+
hash[h] = mask[i];
74+
semi_perfect = 1;
75+
continue;
76+
}
77+
#endif
78+
break; /* collision */
79+
}
6080
hash[h] = mask[i];
6181
}
6282
if (i == count) {
6383
print_hash(hash, n);
84+
#if USE_SEMI_PERFECT_HASH
85+
if (semi_perfect) {
86+
printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
87+
}
88+
#endif
6489
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
6590
return 1;
6691
}
67-
#else
92+
#endif
93+
#if USE_ROL_HASH
6894
memset(hash, 0, n * sizeof(uint32_t));
6995
for (i = 0; i < count; i++) {
7096
h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
71-
if (hash[h]) break; /* collision */
97+
if (hash[h]) {
98+
#if USE_SEMI_PERFECT_HASH
99+
h++;
100+
if (!hash[h]) {
101+
hash[h] = mask[i];
102+
semi_perfect = 1;
103+
continue;
104+
}
105+
#endif
106+
break; /* collision */
107+
}
72108
hash[h] = mask[i];
73109
}
74110
if (i == count) {
75111
print_hash(hash, n);
112+
#if USE_SEMI_PERFECT_HASH
113+
if (semi_perfect) {
114+
printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
115+
}
116+
#endif
76117
printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn ir_rol32((ir_rol32(h, %d) - h), %d) %% %d;\n}\n", r1, r2, n);
77118
return 1;
78119
}

ext/opcache/jit/ir/ir.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -926,7 +926,11 @@ ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3,
926926
uint32_t k = key & any;
927927
uint32_t h = _ir_fold_hashkey(k);
928928
uint32_t fh = _ir_fold_hash[h];
929-
if (IR_FOLD_KEY(fh) == k /*|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)*/) {
929+
if (IR_FOLD_KEY(fh) == k
930+
#ifdef IR_FOLD_SEMI_PERFECT_HASH
931+
|| (fh = _ir_fold_hash[h+1], (fh & 0x1fffff) == k)
932+
#endif
933+
) {
930934
switch (IR_FOLD_RULE(fh)) {
931935
#include "ir_fold.h"
932936
default:
@@ -1287,6 +1291,7 @@ void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref ref)
12871291
*p = IR_UNUSED;
12881292
break;
12891293
}
1294+
p++;
12901295
j++;
12911296
}
12921297
}

ext/opcache/jit/ir/ir_aarch64.dasc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4309,7 +4309,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
43094309
}
43104310

43114311
/* Generate a table jmp or a sequence of calls */
4312-
if ((max.i64-min.i64) < count * 8) {
4312+
if (count > 2 && (max.i64-min.i64) < count * 8) {
43134313
int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
43144314

43154315
for (i = 0; i <= (max.i64 - min.i64); i++) {

ext/opcache/jit/ir/ir_cfg.c

Lines changed: 43 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,13 +2027,10 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
20272027
ir_chain *chains;
20282028
ir_bitqueue worklist;
20292029
ir_bitset visited;
2030-
uint32_t *empty, count;
2031-
#ifdef IR_DEBUG
2032-
uint32_t empty_count = 0;
2033-
#endif
2030+
uint32_t *schedule_end, count;
20342031

20352032
ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
2036-
empty = ctx->cfg_schedule + ctx->cfg_blocks_count;
2033+
schedule_end = ctx->cfg_schedule + ctx->cfg_blocks_count;
20372034

20382035
/* 1. Create initial chains for each BB */
20392036
chains = ir_mem_malloc(sizeof(ir_chain) * (ctx->cfg_blocks_count + 1));
@@ -2083,11 +2080,8 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
20832080
/* move empty blocks to the end */
20842081
IR_ASSERT(chains[b].head == b);
20852082
chains[b].head = 0;
2086-
#ifdef IR_DEBUG
2087-
empty_count++;
2088-
#endif
2089-
*empty = b;
2090-
empty--;
2083+
*schedule_end = b;
2084+
schedule_end--;
20912085

20922086
if (successor > b) {
20932087
bb_freq[successor] += bb_freq[b];
@@ -2168,14 +2162,22 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
21682162
} else {
21692163
prob1 = prob2 = 50;
21702164
}
2171-
IR_ASSERT(edges_count < max_edges_count);
2172-
freq = bb_freq[b] * (float)prob1 / (float)probN;
2173-
if (successor1 > b) {
2174-
IR_ASSERT(!ir_bitset_in(visited, successor1));
2175-
bb_freq[successor1] += freq;
2176-
ir_bitqueue_add(&worklist, successor1);
2177-
}
21782165
do {
2166+
freq = bb_freq[b] * (float)prob1 / (float)probN;
2167+
if (successor1 > b) {
2168+
IR_ASSERT(!ir_bitset_in(visited, successor1));
2169+
bb_freq[successor1] += freq;
2170+
if (successor1_bb->successors_count == 0 && insn1->op2 == 1) {
2171+
/* move cold block without successors to the end */
2172+
IR_ASSERT(chains[successor1].head == successor1);
2173+
chains[successor1].head = 0;
2174+
*schedule_end = successor1;
2175+
schedule_end--;
2176+
break;
2177+
} else {
2178+
ir_bitqueue_add(&worklist, successor1);
2179+
}
2180+
}
21792181
/* try to join edges early to reduce number of edges and the cost of their sorting */
21802182
if (prob1 > prob2
21812183
&& (successor1_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
@@ -2187,19 +2189,28 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
21872189
if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
21882190
}
21892191
successor1 = _ir_skip_empty_blocks(ctx, successor1);
2192+
IR_ASSERT(edges_count < max_edges_count);
21902193
edges[edges_count].from = b;
21912194
edges[edges_count].to = successor1;
21922195
edges[edges_count].freq = freq;
21932196
edges_count++;
21942197
} while (0);
2195-
IR_ASSERT(edges_count < max_edges_count);
2196-
freq = bb_freq[b] * (float)prob2 / (float)probN;
2197-
if (successor2 > b) {
2198-
IR_ASSERT(!ir_bitset_in(visited, successor2));
2199-
bb_freq[successor2] += freq;
2200-
ir_bitqueue_add(&worklist, successor2);
2201-
}
22022198
do {
2199+
freq = bb_freq[b] * (float)prob2 / (float)probN;
2200+
if (successor2 > b) {
2201+
IR_ASSERT(!ir_bitset_in(visited, successor2));
2202+
bb_freq[successor2] += freq;
2203+
if (successor2_bb->successors_count == 0 && insn2->op2 == 1) {
2204+
/* move cold block without successors to the end */
2205+
IR_ASSERT(chains[successor2].head == successor2);
2206+
chains[successor2].head = 0;
2207+
*schedule_end = successor2;
2208+
schedule_end--;
2209+
break;
2210+
} else {
2211+
ir_bitqueue_add(&worklist, successor2);
2212+
}
2213+
}
22032214
if (prob2 > prob1
22042215
&& (successor2_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
22052216
uint32_t src = chains[b].next;
@@ -2210,6 +2221,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
22102221
if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
22112222
}
22122223
successor2 = _ir_skip_empty_blocks(ctx, successor2);
2224+
IR_ASSERT(edges_count < max_edges_count);
22132225
edges[edges_count].from = b;
22142226
edges[edges_count].to = successor2;
22152227
edges[edges_count].freq = freq;
@@ -2242,14 +2254,14 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
22422254
} else {
22432255
prob = 100 / bb->successors_count;
22442256
}
2245-
IR_ASSERT(edges_count < max_edges_count);
22462257
freq = bb_freq[b] * (float)prob / 100.0f;
22472258
if (successor > b) {
22482259
IR_ASSERT(!ir_bitset_in(visited, successor));
22492260
bb_freq[successor] += freq;
22502261
ir_bitqueue_add(&worklist, successor);
22512262
}
22522263
successor = _ir_skip_empty_blocks(ctx, successor);
2264+
IR_ASSERT(edges_count < max_edges_count);
22532265
edges[edges_count].from = b;
22542266
edges[edges_count].to = successor;
22552267
edges[edges_count].freq = freq;
@@ -2383,7 +2395,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
23832395
}
23842396
}
23852397

2386-
IR_ASSERT(count + empty_count == ctx->cfg_blocks_count);
2398+
IR_ASSERT(ctx->cfg_schedule + count == schedule_end);
23872399
ctx->cfg_schedule[ctx->cfg_blocks_count + 1] = 0;
23882400

23892401
ir_mem_free(edges);
@@ -2401,17 +2413,14 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
24012413
uint32_t b, best_successor, last_non_empty;
24022414
ir_block *bb, *best_successor_bb;
24032415
ir_insn *insn;
2404-
uint32_t *list, *empty;
2416+
uint32_t *list, *schedule_end;
24052417
uint32_t count = 0;
2406-
#ifdef IR_DEBUG
2407-
uint32_t empty_count = 0;
2408-
#endif
24092418

24102419
ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1);
24112420
blocks.pos = 0;
24122421
list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
24132422
list[ctx->cfg_blocks_count + 1] = 0;
2414-
empty = list + ctx->cfg_blocks_count;
2423+
schedule_end = list + ctx->cfg_blocks_count;
24152424
for (b = 1; b <= ctx->cfg_blocks_count; b++) {
24162425
ir_bitset_incl(blocks.set, b);
24172426
}
@@ -2431,11 +2440,8 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
24312440
}
24322441
if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
24332442
/* move empty blocks to the end */
2434-
#ifdef IR_DEBUG
2435-
empty_count++;
2436-
#endif
2437-
*empty = b;
2438-
empty--;
2443+
*schedule_end = b;
2444+
schedule_end--;
24392445
} else {
24402446
count++;
24412447
list[count] = b;
@@ -2520,7 +2526,7 @@ static int ir_schedule_blocks_top_down(ir_ctx *ctx)
25202526
} while (1);
25212527
}
25222528

2523-
IR_ASSERT(count + empty_count == ctx->cfg_blocks_count);
2529+
IR_ASSERT(list + count == schedule_end);
25242530
ctx->cfg_schedule = list;
25252531
ir_bitqueue_free(&blocks);
25262532

ext/opcache/jit/ir/ir_fold.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2508,6 +2508,7 @@ IR_FOLD(MUL(MUL, C_I8))
25082508
IR_FOLD(MUL(MUL, C_I16))
25092509
IR_FOLD(MUL(MUL, C_I32))
25102510
IR_FOLD(MUL(MUL, C_I64))
2511+
IR_FOLD(MUL(MUL, C_ADDR))
25112512
{
25122513
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
25132514
/* (x * c1) * c2 => x * (c1 * c2) */
@@ -2527,6 +2528,7 @@ IR_FOLD(AND(AND, C_I8))
25272528
IR_FOLD(AND(AND, C_I16))
25282529
IR_FOLD(AND(AND, C_I32))
25292530
IR_FOLD(AND(AND, C_I64))
2531+
IR_FOLD(AND(AND, C_ADDR))
25302532
{
25312533
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
25322534
/* (x & c1) & c2 => x & (c1 & c2) */
@@ -2546,6 +2548,7 @@ IR_FOLD(OR(OR, C_I8))
25462548
IR_FOLD(OR(OR, C_I16))
25472549
IR_FOLD(OR(OR, C_I32))
25482550
IR_FOLD(OR(OR, C_I64))
2551+
IR_FOLD(OR(OR, C_ADDR))
25492552
{
25502553
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
25512554
/* (x | c1) | c2 => x | (c1 | c2) */
@@ -2565,6 +2568,7 @@ IR_FOLD(XOR(XOR, C_I8))
25652568
IR_FOLD(XOR(XOR, C_I16))
25662569
IR_FOLD(XOR(XOR, C_I32))
25672570
IR_FOLD(XOR(XOR, C_I64))
2571+
IR_FOLD(XOR(XOR, C_ADDR))
25682572
{
25692573
if (IR_IS_CONST_REF(op1_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op1_insn->op2].op)) {
25702574
/* (x ^ c1) ^ c2 => x ^ (c1 ^ c2) */

ext/opcache/jit/ir/ir_ra.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,10 @@ static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
15541554
ir_live_interval *child_ival = ctx->live_intervals[child];
15551555
ir_live_interval *parent_ival = ctx->live_intervals[parent];
15561556

1557+
if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
1558+
// TODO: Support valid cases with already coalesced "parent_ival
1559+
return 0;
1560+
}
15571561
#if 0
15581562
if (child_ival->end >= parent_ival->end) {
15591563
return 0;
@@ -1629,6 +1633,13 @@ static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from
16291633
uint16_t f1 = ctx->live_intervals[v1]->flags;
16301634
uint16_t f2 = ctx->live_intervals[v2]->flags;
16311635

1636+
#if 0
1637+
if (ctx->binding) {
1638+
ir_ref b1 = ir_binding_find(ctx, from);
1639+
ir_ref b2 = ir_binding_find(ctx, to);
1640+
IR_ASSERT(b1 == b2);
1641+
}
1642+
#endif
16321643
if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
16331644
ir_vregs_join(ctx, v1, v2);
16341645
ctx->vregs[to] = v1;
@@ -1971,6 +1982,13 @@ int ir_coalesce(ir_ctx *ctx)
19711982
&& ctx->vregs[insn->op1]
19721983
&& ctx->vregs[i] != ctx->vregs[insn->op1]) {
19731984
if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
1985+
if (ctx->binding) {
1986+
ir_ref b1 = ir_binding_find(ctx, i);
1987+
ir_ref b2 = ir_binding_find(ctx, insn->op1);
1988+
if (b1 != b2) {
1989+
continue;
1990+
}
1991+
}
19741992
ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
19751993
compact = 1;
19761994
}

ext/opcache/jit/ir/ir_x86.dasc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7746,7 +7746,7 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
77467746
}
77477747

77487748
/* Generate a table jmp or a seqence of calls */
7749-
if ((max.i64-min.i64) < count * 8) {
7749+
if (count > 2 && (max.i64-min.i64) < count * 8) {
77507750
int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1));
77517751

77527752
for (i = 0; i <= (max.i64 - min.i64); i++) {

0 commit comments

Comments
 (0)