Skip to content

Commit c438805

Browse files
authored
Update IR (#21462)
IR commit: 7fed7999743ba6a5ffc5535e786725d5577f6f34
1 parent 0bd3a6b commit c438805

File tree

4 files changed

+87
-62
lines changed

4 files changed

+87
-62
lines changed

ext/opcache/jit/ir/ir_cfg.c

Lines changed: 67 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t
211211
if (life_inputs) {
212212
ir_remove_phis_inputs(ctx, &ctx->use_lists[bb->start], insn->inputs_count, life_inputs);
213213
ir_mem_free(life_inputs);
214+
life_inputs = NULL;
214215
}
215216
}
216217
}
@@ -613,59 +614,64 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx)
613614
return 1;
614615
}
615616

616-
static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
617-
{
618-
uint32_t i, *p;
619-
ir_block *bb = &ctx->cfg_blocks[b];
620-
621-
if (bb->postnum != 0) {
622-
return;
623-
}
624-
625-
if (bb->successors_count) {
626-
bb->postnum = -1; /* Marker for "currently visiting" */
627-
p = ctx->cfg_edges + bb->successors;
628-
i = bb->successors_count;
629-
do {
630-
compute_postnum(ctx, cur, *p);
631-
p++;
632-
} while (--i);
633-
}
634-
bb->postnum = (*cur)++;
635-
}
636-
637617
/* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
638618
* Cooper, Harvey and Kennedy. */
639619
static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx)
640620
{
641-
uint32_t blocks_count, b, postnum;
621+
uint32_t blocks_count, b, postnum, i;
642622
ir_block *blocks, *bb;
643623
uint32_t *edges;
624+
uint32_t *rpo = ir_mem_malloc((ctx->cfg_blocks_count + 1) * sizeof(uint32_t));
644625
bool changed;
645626

646627
blocks = ctx->cfg_blocks;
647628
edges = ctx->cfg_edges;
648629
blocks_count = ctx->cfg_blocks_count;
649630

650-
/* Clear the dominators tree */
651-
for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
652-
bb->idom = 0;
653-
bb->dom_depth = 0;
654-
bb->dom_child = 0;
655-
bb->dom_next_child = 0;
656-
}
657-
658631
ctx->flags2 &= ~IR_NO_LOOPS;
659632

660633
postnum = 1;
661-
compute_postnum(ctx, &postnum, 1);
634+
ir_worklist work;
635+
ir_worklist_init(&work, ctx->cfg_blocks_count + 1);
636+
ir_worklist_push(&work, 1);
637+
IR_ASSERT(blocks[1].next_succ == 0);
638+
while (ir_worklist_len(&work)) {
639+
next:
640+
b = ir_worklist_peek(&work);
641+
bb = &blocks[b];
642+
uint32_t n = bb->successors_count - bb->next_succ;
643+
if (n) {
644+
uint32_t *p = edges + bb->successors + bb->next_succ;
645+
for (; n > 0; p++, n--) {
646+
uint32_t succ = *p;
647+
if (ir_worklist_push(&work, succ)) {
648+
bb->next_succ = bb->successors_count - n + 1;
649+
IR_ASSERT(blocks[succ].next_succ == 0);
650+
goto next;
651+
}
652+
}
653+
}
654+
655+
/* Start from bb->idom calculated by the fast dominators algorithm */
656+
// bb->idom = 0;
657+
bb->next_succ = 0;
658+
rpo[postnum] = b;
659+
bb->postnum = postnum++;
660+
ir_worklist_pop(&work);
661+
}
662+
ir_worklist_free(&work);
663+
664+
IR_ASSERT(rpo[blocks_count] == 1);
662665

663666
/* Find immediate dominators by iterative fixed-point algorithm */
664667
blocks[1].idom = 1;
665668
do {
666669
changed = 0;
670+
667671
/* Iterating in Reverse Post Order */
668-
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
672+
for (i = blocks_count - 1; i > 0; i--) {
673+
b = rpo[i];
674+
bb = &blocks[b];
669675
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
670676
IR_ASSERT(bb->predecessors_count > 0);
671677
if (bb->predecessors_count == 1) {
@@ -718,6 +724,8 @@ static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx)
718724
}
719725
} while (changed);
720726

727+
ir_mem_free(rpo);
728+
721729
/* Build dominators tree */
722730
blocks[1].idom = 0;
723731
blocks[1].dom_depth = 0;
@@ -771,7 +779,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
771779
blocks[1].idom = 1;
772780
blocks[1].dom_depth = 0;
773781

774-
/* Iterating in Reverse Post Order */
782+
/* Iterating in Reverse Post Order (relay on existing BB order and fall-back to slow algorithm) */
775783
for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
776784
IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
777785
IR_ASSERT(bb->predecessors_count > 0);
@@ -783,8 +791,8 @@ int ir_build_dominators_tree(ir_ctx *ctx)
783791
if (UNEXPECTED(idom >= b)) {
784792
/* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
785793
ctx->flags2 &= ~IR_NO_LOOPS;
786-
// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
787794
if (UNEXPECTED(k <= 1)) {
795+
// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
788796
slow_case:
789797
ir_list_free(&worklist);
790798
return ir_build_dominators_tree_slow(ctx);
@@ -798,6 +806,7 @@ int ir_build_dominators_tree(ir_ctx *ctx)
798806
break;
799807
}
800808
if (UNEXPECTED(k == 0)) {
809+
// IR_ASSERT(0 && "Wrong blocks order: BB is before all its predecessors");
801810
goto slow_case;
802811
}
803812
ir_list_push(&worklist, idom);
@@ -830,13 +839,6 @@ int ir_build_dominators_tree(ir_ctx *ctx)
830839
bb->dom_depth = idom_bb->dom_depth + 1;
831840
}
832841

833-
/* Construct children lists sorted by block number */
834-
for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) {
835-
ir_block *idom_bb = &blocks[bb->idom];
836-
bb->dom_next_child = idom_bb->dom_child;
837-
idom_bb->dom_child = b;
838-
}
839-
840842
blocks[1].idom = 0;
841843

842844
if (ir_list_len(&worklist) != 0) {
@@ -874,10 +876,18 @@ int ir_build_dominators_tree(ir_ctx *ctx)
874876

875877
if (UNEXPECTED(!complete)) {
876878
ir_list_free(&worklist);
879+
// TODO: this algorithm may be incorrect. Prove and/or switch to ir_build_dominators_tree_slow() ???
877880
return ir_build_dominators_tree_iterative(ctx);
878881
}
879882
}
880883

884+
/* Construct children lists sorted by block number */
885+
for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) {
886+
ir_block *idom_bb = &blocks[bb->idom];
887+
bb->dom_next_child = idom_bb->dom_child;
888+
idom_bb->dom_child = b;
889+
}
890+
881891
ir_list_free(&worklist);
882892

883893
return 1;
@@ -898,8 +908,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
898908
/* Clear the dominators tree, but keep already found dominators */
899909
for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
900910
bb->dom_depth = 0;
901-
bb->dom_child = 0;
902-
bb->dom_next_child = 0;
903911
}
904912

905913
/* Find immediate dominators by iterative fixed-point algorithm */
@@ -917,20 +925,20 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
917925
if (blocks[idom].idom == 0) {
918926
while (1) {
919927
k--;
928+
if (UNEXPECTED(k == 0)) break;
920929
p++;
921930
idom = *p;
922931
if (blocks[idom].idom > 0) {
923932
break;
924933
}
925-
IR_ASSERT(k > 0);
926934
}
935+
if (UNEXPECTED(k == 0)) continue;
927936
}
928937
IR_ASSERT(k != 0);
929938
while (--k > 0) {
930939
uint32_t pred_b = *(++p);
931940

932941
if (blocks[pred_b].idom > 0) {
933-
IR_ASSERT(blocks[pred_b].idom > 0);
934942
while (idom != pred_b) {
935943
while (pred_b > idom) {
936944
pred_b = blocks[pred_b].idom;
@@ -1094,35 +1102,36 @@ int ir_find_loops(ir_ctx *ctx)
10941102
times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t));
10951103
sorted_blocks = times + (ctx->cfg_blocks_count + 1) * 2;
10961104

1105+
ir_bitset visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
10971106
ir_worklist_push(&work, 1);
1098-
ENTRY_TIME(1) = time++;
1099-
11001107
while (ir_worklist_len(&work)) {
1101-
ir_block *bb;
1102-
1108+
next:
11031109
b = ir_worklist_peek(&work);
1110+
ir_block *bb = &blocks[b];
11041111

1105-
/* Visit successors of "b". */
1106-
next:
1107-
bb = &blocks[b];
1108-
n = bb->successors_count;
1109-
if (n) {
1110-
uint32_t *p = edges + bb->successors;
1112+
if (!ir_bitset_in(visited, b)) {
1113+
ir_bitset_incl(visited, b);
1114+
ENTRY_TIME(b) = time++;
1115+
}
11111116

1117+
uint32_t n = bb->successors_count - bb->next_succ;
1118+
if (n) {
1119+
uint32_t *p = edges + bb->successors + bb->next_succ;
11121120
for (; n > 0; p++, n--) {
11131121
uint32_t succ = *p;
1114-
11151122
if (ir_worklist_push(&work, succ)) {
1116-
b = succ;
1117-
ENTRY_TIME(b) = time++;
1123+
bb->next_succ = bb->successors_count - n + 1;
1124+
IR_ASSERT(blocks[succ].next_succ == 0);
11181125
goto next;
11191126
}
11201127
}
11211128
}
11221129

1130+
bb->next_succ = 0;
11231131
EXIT_TIME(b) = time++;
11241132
ir_worklist_pop(&work);
11251133
}
1134+
ir_mem_free(visited);
11261135

11271136
/* Sort blocks by level, which is the opposite order in which we want to process them */
11281137
/* (Breadth First Search using "sorted_blocks" as a queue) */

ext/opcache/jit/ir/ir_gdb.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,8 @@ IR_NEVER_INLINE void __jit_debug_register_code(void)
521521
static bool ir_gdb_register_code(const void *object, size_t size)
522522
{
523523
ir_gdbjit_code_entry *entry;
524+
ir_elf_header *elf_header;
525+
ir_elf_sectheader *elf_section, *elf_section_end;
524526

525527
entry = malloc(sizeof(ir_gdbjit_code_entry) + size);
526528
if (entry == NULL) {
@@ -532,6 +534,17 @@ static bool ir_gdb_register_code(const void *object, size_t size)
532534

533535
memcpy((char *)entry->symfile_addr, object, size);
534536

537+
elf_header = (ir_elf_header*)entry->symfile_addr;
538+
elf_section = (ir_elf_sectheader*)(entry->symfile_addr + elf_header->shofs);
539+
elf_section_end = (ir_elf_sectheader*)((char*)elf_section + (elf_header->shentsize * elf_header->shnum));
540+
541+
while (elf_section < elf_section_end) {
542+
if ((elf_section->flags & ELFSECT_FLAGS_ALLOC) && elf_section->addr == 0) {
543+
elf_section->addr = (uintptr_t)(entry->symfile_addr + elf_section->ofs);
544+
}
545+
elf_section = (ir_elf_sectheader*)((char*)elf_section + elf_header->shentsize);
546+
}
547+
535548
entry->prev_entry = NULL;
536549
entry->next_entry = __jit_debug_descriptor.first_entry;
537550

ext/opcache/jit/ir/ir_perf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
#if defined(__linux__)
3232
#include <sys/syscall.h>
33-
#elif defined(__darwin__)
33+
#elif defined(__APPLE__)
3434
# include <pthread.h>
3535
#elif defined(__FreeBSD__)
3636
# include <sys/thr.h>
@@ -215,7 +215,7 @@ int ir_perf_jitdump_register(const char *name, const void *start, size_t size)
215215
uint32_t thread_id = 0;
216216
#if defined(__linux__)
217217
thread_id = syscall(SYS_gettid);
218-
#elif defined(__darwin__)
218+
#elif defined(__APPLE__)
219219
uint64_t thread_id_u64;
220220
pthread_threadid_np(NULL, &thread_id_u64);
221221
thread_id = (uint32_t) thread_id_u64;

ext/opcache/jit/ir/ir_private.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,12 +1145,15 @@ struct _ir_block {
11451145
};
11461146
union {
11471147
uint32_t dom_depth; /* depth from the root of the dominators tree */
1148-
uint32_t postnum; /* used temporary during tree constructon */
1148+
uint32_t postnum; /* used temporary for iterative Post Ordering */
11491149
};
11501150
uint32_t dom_child; /* first dominated blocks */
11511151
uint32_t dom_next_child; /* next dominated block (linked list) */
11521152
uint32_t loop_header;
1153-
uint32_t loop_depth;
1153+
union {
1154+
uint32_t loop_depth;
1155+
uint32_t next_succ; /* used temporary for iterative Post Ordering */
1156+
};
11541157
};
11551158

11561159
void ir_build_prev_refs(ir_ctx *ctx);

0 commit comments

Comments
 (0)