diff options
| author | Natalie Vock <natalie.vock@gmx.de> | 2025-10-17 13:53:33 +0200 |
|---|---|---|
| committer | Marge Bot <marge-bot@fdo.invalid> | 2025-11-06 21:34:33 +0000 |
| commit | 0cb1fca8fafe43e6ef87f585e2da79936997cb85 (patch) | |
| tree | 7804c68be0067bd5671afe6eb087be56eb1358c1 | |
| parent | a8b75dd0f426e8875534d141610d898b856e53cd (diff) | |
nir: Use sparse bitset for liveness information
Some shaders, especially RTPSO shaders that have parts of the PSO
inlined, can become absolutely huge. Using a sparse bitset avoids
quadratic complexity in memory consumption for the liveness information.
This reduces peak memory usage in worst-case tests (hammering
compilation of many huge RTPSOs on 32 threads concurrently) by ~60%,
from 43GB to 18GB.
CPU time (seconds) differences for a workload with mostly small shaders:
Difference at 95.0% confidence
-5.27 +/- 1.08963
-0.88811% +/- 0.183626%
(Student's t, pooled s = 0.629735)
Peak resident set usage for the mostly-small workload:
Difference at 95.0% confidence
30809 +/- 13394.3
1.59276% +/- 0.69246%
(Student's t, pooled s = 7741.09)
CPU time for the heavy workload did not show any difference.
Co-authored-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37908>
| -rw-r--r-- | src/compiler/nir/nir.h | 7 | ||||
| -rw-r--r-- | src/compiler/nir/nir_liveness.c | 69 | ||||
| -rw-r--r-- | src/compiler/nir/nir_lower_shader_calls.c | 205 | ||||
| -rw-r--r-- | src/compiler/nir/nir_metadata.c | 12 | ||||
| -rw-r--r-- | src/compiler/nir/nir_opt_call.c | 6 | ||||
| -rw-r--r-- | src/compiler/nir/nir_validate.c | 18 | ||||
| -rw-r--r-- | src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c | 32 | ||||
| -rw-r--r-- | src/intel/compiler/brw/brw_nir.c | 24 |
8 files changed, 151 insertions, 222 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ebb1950e8b8..8c54e18bd05 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -43,6 +43,7 @@ #include "util/ralloc.h" #include "util/range_minimum_query.h" #include "util/set.h" +#include "util/sparse_bitset.h" #include "util/u_math.h" #include "nir_defines.h" #include "nir_shader_compiler_options.h" @@ -3106,8 +3107,8 @@ typedef struct nir_block { /* SSA def live in and out for this block; used for liveness analysis. * Indexed by ssa_def->index */ - BITSET_WORD *live_in; - BITSET_WORD *live_out; + struct u_sparse_bitset live_in; + struct u_sparse_bitset live_out; } nir_block; static inline bool @@ -6131,7 +6132,7 @@ bool nir_shader_supports_implicit_lod(nir_shader *shader); void nir_live_defs_impl(nir_function_impl *impl); -const BITSET_WORD *nir_get_live_defs(nir_cursor cursor, void *mem_ctx); +struct u_sparse_bitset *nir_get_live_defs(nir_cursor cursor, void *mem_ctx); void nir_loop_analyze_impl(nir_function_impl *impl, nir_variable_mode indirect_mask, diff --git a/src/compiler/nir/nir_liveness.c b/src/compiler/nir/nir_liveness.c index 27b6481ad7b..b25ca7c13d5 100644 --- a/src/compiler/nir/nir_liveness.c +++ b/src/compiler/nir/nir_liveness.c @@ -40,10 +40,11 @@ */ struct live_defs_state { - unsigned bitset_words; + unsigned num_bits; + void *mem_ctx; /* Used in propagate_across_edge() */ - BITSET_WORD *tmp_live; + struct u_sparse_bitset tmp_live; nir_block_worklist worklist; }; @@ -55,26 +56,20 @@ static void init_liveness_block(nir_block *block, struct live_defs_state *state) { - block->live_in = reralloc(block, block->live_in, BITSET_WORD, - state->bitset_words); - memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); - - block->live_out = reralloc(block, block->live_out, BITSET_WORD, - state->bitset_words); - memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); - + u_sparse_bitset_init(&block->live_in, state->num_bits, state->mem_ctx); + u_sparse_bitset_init(&block->live_out, state->num_bits, state->mem_ctx); nir_block_worklist_push_head(&state->worklist, block); } static bool set_src_live(nir_src *src, void *void_live) { - BITSET_WORD *live = void_live; + struct u_sparse_bitset *live = void_live; if (nir_src_is_undef(*src)) return true; /* undefined variables are never live */ - BITSET_SET(live, src->ssa->index); + u_sparse_bitset_set(live, src->ssa->index); return true; } @@ -82,9 +77,9 @@ set_src_live(nir_src *src, void *void_live) static bool set_ssa_def_dead(nir_def *def, void *void_live) { - BITSET_WORD *live = void_live; + struct u_sparse_bitset *live = void_live; - BITSET_CLEAR(live, def->index); + u_sparse_bitset_clear(live, def->index); return true; } @@ -102,8 +97,8 @@ static bool propagate_across_edge(nir_block *pred, nir_block *succ, struct live_defs_state *state) { - BITSET_WORD *live = state->tmp_live; - memcpy(live, succ->live_in, state->bitset_words * sizeof *live); + struct u_sparse_bitset *live = &state->tmp_live; + u_sparse_bitset_dup(live, &succ->live_in); nir_foreach_phi(phi, succ) { set_ssa_def_dead(&phi->def, live); @@ -118,21 +113,18 @@ propagate_across_edge(nir_block *pred, nir_block *succ, } } - BITSET_WORD progress = 0; - for (unsigned i = 0; i < state->bitset_words; ++i) { - progress |= live[i] & ~pred->live_out[i]; - pred->live_out[i] |= live[i]; - } - return progress != 0; + bool progress = u_sparse_bitset_merge(&pred->live_out, live); + u_sparse_bitset_free(live); + return progress; } void nir_live_defs_impl(nir_function_impl *impl) { struct live_defs_state state = { - .bitset_words = BITSET_WORDS(impl->ssa_alloc), + .num_bits = impl->ssa_alloc, + .mem_ctx = impl, }; - state.tmp_live = rzalloc_array(impl, BITSET_WORD, state.bitset_words), nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); @@ -156,12 +148,11 @@ nir_live_defs_impl(nir_function_impl *impl) */ nir_block *block = nir_block_worklist_pop_head(&state.worklist); - memcpy(block->live_in, block->live_out, - state.bitset_words * sizeof(BITSET_WORD)); + u_sparse_bitset_dup(&block->live_in, &block->live_out); nir_if *following_if = nir_block_get_following_if(block); if (following_if) - set_src_live(&following_if->condition, block->live_in); + set_src_live(&following_if->condition, &block->live_in); nir_foreach_instr_reverse(instr, block) { /* Phi nodes are handled seperately so we want to skip them. Since @@ -171,8 +162,8 @@ nir_live_defs_impl(nir_function_impl *impl) if (instr->type == nir_instr_type_phi) break; - nir_foreach_def(instr, set_ssa_def_dead, block->live_in); - nir_foreach_src(instr, set_src_live, block->live_in); + nir_foreach_def(instr, set_ssa_def_dead, &block->live_in); + nir_foreach_src(instr, set_src_live, &block->live_in); } /* Walk over all of the predecessors of the current block updating @@ -187,7 +178,6 @@ nir_live_defs_impl(nir_function_impl *impl) } } - ralloc_free(state.tmp_live); nir_block_worklist_fini(&state.worklist); } @@ -197,7 +187,7 @@ nir_live_defs_impl(nir_function_impl *impl) * which the instruction lives. Do not ralloc_free() it directly; * instead, provide a mem_ctx and free that. */ -const BITSET_WORD * +struct u_sparse_bitset * nir_get_live_defs(nir_cursor cursor, void *mem_ctx) { nir_block *block = nir_cursor_current_block(cursor); @@ -206,26 +196,25 @@ nir_get_live_defs(nir_cursor cursor, void *mem_ctx) switch (cursor.option) { case nir_cursor_before_block: - return cursor.block->live_in; + return &cursor.block->live_in; case nir_cursor_after_block: - return cursor.block->live_out; + return &cursor.block->live_out; case nir_cursor_before_instr: if (cursor.instr == nir_block_first_instr(cursor.instr->block)) - return cursor.instr->block->live_in; + return &cursor.instr->block->live_in; break; case nir_cursor_after_instr: if (cursor.instr == nir_block_last_instr(cursor.instr->block)) - return cursor.instr->block->live_out; + return &cursor.instr->block->live_out; break; } /* If we got here, we're an instruction cursor mid-block */ - const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc); - BITSET_WORD *live = ralloc_array(mem_ctx, BITSET_WORD, bitset_words); - memcpy(live, block->live_out, bitset_words * sizeof(BITSET_WORD)); + struct u_sparse_bitset *live = rzalloc_size(mem_ctx, sizeof(struct u_sparse_bitset)); + u_sparse_bitset_dup_with_ctx(live, &block->live_out, mem_ctx); nir_foreach_instr_reverse(instr, block) { if (cursor.option == nir_cursor_after_instr && instr == cursor.instr) @@ -283,13 +272,13 @@ search_for_use_after_instr(nir_instr *start, nir_def *def) static bool nir_def_is_live_at(nir_def *def, nir_instr *instr) { - if (BITSET_TEST(instr->block->live_out, def->index)) { + if (u_sparse_bitset_test(&instr->block->live_out, def->index)) { /* Since def dominates instr, if def is in the liveout of the block, * it's live at instr */ return true; } else { - if (BITSET_TEST(instr->block->live_in, def->index) || + if (u_sparse_bitset_test(&instr->block->live_in, def->index) || nir_def_block(def) == instr->block) { /* In this case it is either live coming into instr's block or it * is defined in the same block. In this case, we simply need to diff --git a/src/compiler/nir/nir_lower_shader_calls.c b/src/compiler/nir/nir_lower_shader_calls.c index 2374b3bb022..816c39c6fd1 100644 --- a/src/compiler/nir/nir_lower_shader_calls.c +++ b/src/compiler/nir/nir_lower_shader_calls.c @@ -72,51 +72,16 @@ instr_is_shader_call(nir_instr *instr) intrin->intrinsic == nir_intrinsic_execute_callable; } -/* Previously named bitset, it had to be renamed as FreeBSD defines a struct - * named bitset in sys/_bitset.h required by pthread_np.h which is included - * from src/util/u_thread.h that is indirectly included by this file. - */ -struct sized_bitset { - BITSET_WORD *set; - unsigned size; -}; - -static struct sized_bitset -bitset_create(void *mem_ctx, unsigned size) -{ - return (struct sized_bitset){ - .set = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(size)), - .size = size, - }; -} - static bool src_is_in_bitset(nir_src *src, void *_set) { - struct sized_bitset *set = _set; - - /* Any SSA values which were added after we generated liveness information - * are things generated by this pass and, while most of it is arithmetic - * which we could re-materialize, we don't need to because it's only used - * for a single load/store and so shouldn't cross any shader calls. - */ - if (src->ssa->index >= set->size) - return false; - - return BITSET_TEST(set->set, src->ssa->index); -} - -static void -add_ssa_def_to_bitset(nir_def *def, struct sized_bitset *set) -{ - if (def->index >= set->size) - return; + struct u_sparse_bitset *set = _set; - BITSET_SET(set->set, def->index); + return u_sparse_bitset_test(set, src->ssa->index); } static bool -can_remat_instr(nir_instr *instr, struct sized_bitset *remat) +can_remat_instr(nir_instr *instr, struct u_sparse_bitset *remat) { /* Set of all values which are trivially re-materializable and we shouldn't * ever spill them. This includes: @@ -203,21 +168,21 @@ can_remat_instr(nir_instr *instr, struct sized_bitset *remat) } static bool -can_remat_ssa_def(nir_def *def, struct sized_bitset *remat) +can_remat_ssa_def(nir_def *def, struct u_sparse_bitset *remat) { return can_remat_instr(def->parent_instr, remat); } struct add_instr_data { struct util_dynarray *buf; - struct sized_bitset *remat; + struct u_sparse_bitset *remat; }; static bool add_src_instr(nir_src *src, void *state) { struct add_instr_data *data = state; - if (BITSET_TEST(data->remat->set, src->ssa->index)) + if (u_sparse_bitset_test(data->remat, src->ssa->index)) return true; util_dynarray_foreach(data->buf, nir_instr *, instr_ptr) { @@ -243,7 +208,7 @@ compare_instr_indexes(const void *_inst1, const void *_inst2) } static bool -can_remat_chain_ssa_def(nir_def *def, struct sized_bitset *remat, struct util_dynarray *buf) +can_remat_chain_ssa_def(nir_def *def, struct u_sparse_bitset *remat, struct util_dynarray *buf) { assert(util_dynarray_num_elements(buf, nir_instr *) == 0); @@ -274,14 +239,14 @@ can_remat_chain_ssa_def(nir_def *def, struct sized_bitset *remat, struct util_dy * through values that might not be in that set but that we can * rematerialize. */ - struct sized_bitset potential_remat = bitset_create(mem_ctx, remat->size); - memcpy(potential_remat.set, remat->set, BITSET_WORDS(remat->size) * sizeof(BITSET_WORD)); + struct u_sparse_bitset potential_remat; + u_sparse_bitset_dup(&potential_remat, remat); util_dynarray_foreach(buf, nir_instr *, instr_ptr) { nir_def *instr_ssa_def = nir_instr_def(*instr_ptr); /* If already in the potential rematerializable, nothing to do. */ - if (BITSET_TEST(potential_remat.set, instr_ssa_def->index)) + if (u_sparse_bitset_test(&potential_remat, instr_ssa_def->index)) continue; if (!can_remat_instr(*instr_ptr, &potential_remat)) @@ -290,7 +255,7 @@ can_remat_chain_ssa_def(nir_def *def, struct sized_bitset *remat, struct util_dy /* All the sources are rematerializable and the instruction is also * rematerializable, mark it as rematerializable too. */ - BITSET_SET(potential_remat.set, instr_ssa_def->index); + u_sparse_bitset_set(&potential_remat, instr_ssa_def->index); } ralloc_free(mem_ctx); @@ -313,8 +278,9 @@ remat_ssa_def(nir_builder *b, nir_def *def, struct hash_table *remap_table) static nir_def * remat_chain_ssa_def(nir_builder *b, struct util_dynarray *buf, - struct sized_bitset *remat, nir_def ***fill_defs, - unsigned call_idx, struct hash_table *remap_table) + struct u_sparse_bitset *remat, nir_def ***fill_defs, + unsigned call_idx, unsigned num_ssa_defs, + struct hash_table *remap_table) { nir_def *last_def = NULL; @@ -331,14 +297,14 @@ remat_chain_ssa_def(nir_builder *b, struct util_dynarray *buf, if (fill_defs[ssa_index] == NULL) { fill_defs[ssa_index] = - rzalloc_array(fill_defs, nir_def *, remat->size); + rzalloc_array(fill_defs, nir_def *, num_ssa_defs); } /* Add the new ssa_def to the list fill_defs and flag it as * rematerialized */ fill_defs[ssa_index][call_idx] = last_def = clone_ssa_def; - BITSET_SET(remat->set, ssa_index); + u_sparse_bitset_set(remat, ssa_index); _mesa_hash_table_insert(remap_table, instr_ssa_def, last_def); } @@ -414,9 +380,9 @@ spill_fill(nir_builder *before, nir_builder *after, nir_def *def, static bool add_src_to_call_live_bitset(nir_src *src, void *state) { - BITSET_WORD *call_live = state; + struct u_sparse_bitset *call_live = state; - BITSET_SET(call_live, src->ssa->index); + u_sparse_bitset_set(call_live, src->ssa->index); return true; } @@ -453,8 +419,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, void *mem_ctx = ralloc_context(shader); const unsigned num_ssa_defs = impl->ssa_alloc; - const unsigned live_words = BITSET_WORDS(num_ssa_defs); - struct sized_bitset trivial_remat = bitset_create(mem_ctx, num_ssa_defs); + struct u_sparse_bitset trivial_remat; + u_sparse_bitset_init(&trivial_remat, num_ssa_defs, mem_ctx); /* Array of all live SSA defs which are spill candidates */ nir_def **spill_defs = @@ -467,8 +433,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, rzalloc_array(mem_ctx, nir_def **, num_ssa_defs); /* For each call instruction, the liveness set at the call */ - const BITSET_WORD **call_live = - rzalloc_array(mem_ctx, const BITSET_WORD *, num_calls); + struct u_sparse_bitset **call_live = + rzalloc_array(mem_ctx, struct u_sparse_bitset *, num_calls); /* For each call instruction, the block index of the block it lives in */ uint32_t *call_block_indices = rzalloc_array(mem_ctx, uint32_t, num_calls); @@ -512,8 +478,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, * spilled/filled sources. */ if (options->should_remat_callback) { - BITSET_WORD **updated_call_live = - rzalloc_array(mem_ctx, BITSET_WORD *, num_calls); + struct u_sparse_bitset **updated_call_live = + rzalloc_array(mem_ctx, struct u_sparse_bitset *, num_calls); nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { @@ -522,7 +488,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, continue; for (unsigned c = 0; c < num_calls; c++) { - if (!BITSET_TEST(call_live[c], def->index)) + if (!u_sparse_bitset_test(call_live[c], def->index)) continue; if (!options->should_remat_callback(def->parent_instr, @@ -530,9 +496,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, continue; if (updated_call_live[c] == NULL) { - const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc); - updated_call_live[c] = ralloc_array(mem_ctx, BITSET_WORD, bitset_words); - memcpy(updated_call_live[c], call_live[c], bitset_words * sizeof(BITSET_WORD)); + updated_call_live[c] = ralloc_size(mem_ctx, sizeof(struct u_sparse_bitset)); + u_sparse_bitset_dup(updated_call_live[c], call_live[c]); } nir_foreach_src(instr, add_src_to_call_live_bitset, updated_call_live[c]); @@ -557,7 +522,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, nir_def *def = nir_instr_def(instr); if (def != NULL) { if (can_remat_ssa_def(def, &trivial_remat)) { - add_ssa_def_to_bitset(def, &trivial_remat); + u_sparse_bitset_set(&trivial_remat, def->index); _mesa_hash_table_insert(trivial_remap_table, def, def); } else { spill_defs[def->index] = def; @@ -567,7 +532,7 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, if (!instr_is_shader_call(instr)) continue; - const BITSET_WORD *live = call_live[call_idx]; + struct u_sparse_bitset *live = call_live[call_idx]; struct hash_table *remap_table = _mesa_hash_table_clone(trivial_remap_table, mem_ctx); @@ -575,8 +540,8 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, /* Make a copy of trivial_remat that we'll update as we crawl through * the live SSA defs and unspill them. */ - struct sized_bitset remat = bitset_create(mem_ctx, num_ssa_defs); - memcpy(remat.set, trivial_remat.set, live_words * sizeof(BITSET_WORD)); + struct u_sparse_bitset remat; + u_sparse_bitset_dup(&remat, &trivial_remat); /* Before the two builders are always separated by the call * instruction, it won't break anything to have two of them. @@ -596,63 +561,58 @@ spill_ssa_defs_and_lower_shader_calls(nir_shader *shader, uint32_t num_calls, util_dynarray_init_from_stack(&remat_chain, remat_chain_instrs, sizeof(remat_chain_instrs)); unsigned offset = shader->scratch_size; - for (unsigned w = 0; w < live_words; w++) { - BITSET_WORD spill_mask = live[w] & ~trivial_remat.set[w]; - while (spill_mask) { - int i = u_bit_scan(&spill_mask); - assert(i >= 0); - unsigned index = w * BITSET_WORDBITS + i; - assert(index < num_ssa_defs); - - def = spill_defs[index]; - nir_def *original_def = def, *new_def; - if (can_remat_ssa_def(def, &remat)) { - /* If this SSA def is re-materializable or based on other - * things we've already spilled, re-materialize it rather - * than spilling and filling. Anything which is trivially - * re-materializable won't even get here because we take - * those into account in spill_mask above. - */ - new_def = remat_ssa_def(&after, def, remap_table); - } else if (can_remat_chain_ssa_def(def, &remat, &remat_chain)) { - new_def = remat_chain_ssa_def(&after, &remat_chain, &remat, - fill_defs, call_idx, - remap_table); - util_dynarray_clear(&remat_chain); - } else { - bool is_bool = def->bit_size == 1; - if (is_bool) - def = nir_b2b32(&before, def); - - const unsigned comp_size = def->bit_size / 8; - offset = ALIGN(offset, comp_size); - - new_def = spill_fill(&before, &after, def, - index, call_idx, - offset, options->stack_alignment); - - if (is_bool) - new_def = nir_b2b1(&after, new_def); - - offset += def->num_components * comp_size; - } + U_SPARSE_BITSET_FOREACH_SET(live, index) { + if (u_sparse_bitset_test(&trivial_remat, index)) + continue; - /* Mark this SSA def as available in the remat set so that, if - * some other SSA def we need is computed based on it, we can - * just re-compute instead of fetching from memory. + def = spill_defs[index]; + nir_def *original_def = def, *new_def; + if (can_remat_ssa_def(def, &remat)) { + /* If this SSA def is re-materializable or based on other + * things we've already spilled, re-materialize it rather + * than spilling and filling. Anything which is trivially + * re-materializable won't even get here because we take + * those into account in spill_mask above. */ - BITSET_SET(remat.set, index); + new_def = remat_ssa_def(&after, def, remap_table); + } else if (can_remat_chain_ssa_def(def, &remat, &remat_chain)) { + new_def = remat_chain_ssa_def(&after, &remat_chain, &remat, + fill_defs, call_idx, num_ssa_defs, + remap_table); + util_dynarray_clear(&remat_chain); + } else { + bool is_bool = def->bit_size == 1; + if (is_bool) + def = nir_b2b32(&before, def); - /* For now, we just make a note of this new SSA def. We'll - * fix things up with the phi builder as a second pass. - */ - if (fill_defs[index] == NULL) { - fill_defs[index] = - rzalloc_array(fill_defs, nir_def *, num_calls); - } - fill_defs[index][call_idx] = new_def; - _mesa_hash_table_insert(remap_table, original_def, new_def); + const unsigned comp_size = def->bit_size / 8; + offset = ALIGN(offset, comp_size); + + new_def = spill_fill(&before, &after, def, + index, call_idx, + offset, options->stack_alignment); + + if (is_bool) + new_def = nir_b2b1(&after, new_def); + + offset += def->num_components * comp_size; + } + + /* Mark this SSA def as available in the remat set so that, if + * some other SSA def we need is computed based on it, we can + * just re-compute instead of fetching from memory. + */ + u_sparse_bitset_set(&remat, index); + + /* For now, we just make a note of this new SSA def. We'll + * fix things up with the phi builder as a second pass. + */ + if (fill_defs[index] == NULL) { + fill_defs[index] = + rzalloc_array(fill_defs, nir_def *, num_calls); } + fill_defs[index][call_idx] = new_def; + _mesa_hash_table_insert(remap_table, original_def, new_def); } nir_builder *b = &before; @@ -1005,7 +965,7 @@ flatten_resume_if_ladder(nir_builder *b, struct exec_list *child_list, bool child_list_contains_cursor, nir_instr *resume_instr, - struct sized_bitset *remat) + struct u_sparse_bitset *remat) { nir_cf_list cf_list; @@ -1044,7 +1004,7 @@ flatten_resume_if_ladder(nir_builder *b, b->cursor = nir_after_instr(instr); nir_def *def = nir_instr_def(instr); - BITSET_SET(remat->set, def->index); + u_sparse_bitset_set(remat, def->index); } } if (b->cursor.option == nir_cursor_after_block && @@ -1276,7 +1236,8 @@ lower_resume(nir_shader *shader, int call_idx) /* Used to track which things may have been assumed to be re-materialized * by the spilling pass and which we shouldn't delete. */ - struct sized_bitset remat = bitset_create(mem_ctx, impl->ssa_alloc); + struct u_sparse_bitset remat; + u_sparse_bitset_init(&remat, impl->ssa_alloc, mem_ctx); /* Create a nop instruction to use as a cursor as we extract and re-insert * stuff into the CFG. diff --git a/src/compiler/nir/nir_metadata.c b/src/compiler/nir/nir_metadata.c index 811d582ef42..f661444a81c 100644 --- a/src/compiler/nir/nir_metadata.c +++ b/src/compiler/nir/nir_metadata.c @@ -85,9 +85,8 @@ nir_progress(bool progress, nir_function_impl *impl, nir_metadata preserved) */ if ((impl->valid_metadata & ~preserved) & nir_metadata_live_defs) { nir_foreach_block(block, impl) { - ralloc_free(block->live_in); - ralloc_free(block->live_out); - block->live_in = block->live_out = NULL; + u_sparse_bitset_free(&block->live_in); + u_sparse_bitset_free(&block->live_out); } } @@ -115,10 +114,11 @@ nir_metadata_invalidate(nir_shader *shader) block->index = (block_idx-- & 0xf) + 0xfffffff0; if (impl->valid_metadata & nir_metadata_live_defs) { - ralloc_free(block->live_in); - ralloc_free(block->live_out); + u_sparse_bitset_free(&block->live_in); + u_sparse_bitset_free(&block->live_out); } - block->live_in = block->live_out = NULL; + u_sparse_bitset_init(&block->live_in, impl->ssa_alloc, NULL); + u_sparse_bitset_init(&block->live_out, impl->ssa_alloc, NULL); if (impl->valid_metadata & nir_metadata_dominance && block->dom_children != block->_dom_children_storage) diff --git a/src/compiler/nir/nir_opt_call.c b/src/compiler/nir/nir_opt_call.c index 7b8747d3b48..45dda349d0a 100644 --- a/src/compiler/nir/nir_opt_call.c +++ b/src/compiler/nir/nir_opt_call.c @@ -10,7 +10,7 @@ struct call_liveness_entry { struct list_head list; nir_call_instr *instr; - const BITSET_WORD *live_set; + struct u_sparse_bitset *live_set; }; static bool @@ -188,8 +188,6 @@ nir_minimize_call_live_states_impl(nir_function_impl *impl) BITSET_WORD *def_blocks = ralloc_array(mem_ctx, BITSET_WORD, block_words); list_for_each_entry(struct call_liveness_entry, entry, &call_list, list) { - unsigned i; - nir_builder b = nir_builder_at(nir_after_instr(&entry->instr->instr)); struct nir_phi_builder *builder = nir_phi_builder_create(impl); @@ -198,7 +196,7 @@ nir_minimize_call_live_states_impl(nir_function_impl *impl) struct hash_table *remap_table = _mesa_pointer_hash_table_create(mem_ctx); - BITSET_FOREACH_SET(i, entry->live_set, num_defs) { + U_SPARSE_BITSET_FOREACH_SET(entry->live_set, i) { if (!rematerializable[i] || _mesa_hash_table_search(remap_table, rematerializable[i])) continue; diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index d2769a796d2..54dd8d6bcbe 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -1853,8 +1853,8 @@ validate_dominance(nir_function_impl *impl, validate_state *state) } typedef struct { - BITSET_WORD *live_in; - BITSET_WORD *live_out; + struct u_sparse_bitset live_in; + struct u_sparse_bitset live_out; } block_liveness_metadata; static void @@ -1871,8 +1871,8 @@ validate_live_defs(nir_function_impl *impl, validate_state *state) md->live_in = block->live_in; md->live_out = block->live_out; - block->live_in = NULL; - block->live_out = NULL; + u_sparse_bitset_init(&block->live_in, impl->ssa_alloc, NULL); + u_sparse_bitset_init(&block->live_out, impl->ssa_alloc, NULL); } /* Call metadata passes and compare it against the preserved metadata */ @@ -1890,10 +1890,8 @@ validate_live_defs(nir_function_impl *impl, validate_state *state) size_t bitset_words = BITSET_WORDS(impl->ssa_alloc); if (bitset_words) { - validate_assert(state, !memcmp(md->live_in, block->live_in, - sizeof(BITSET_WORD) * bitset_words)); - validate_assert(state, !memcmp(md->live_out, block->live_out, - sizeof(BITSET_WORD) * bitset_words)); + validate_assert(state, !u_sparse_bitset_cmp(&md->live_in, &block->live_in)); + validate_assert(state, !u_sparse_bitset_cmp(&md->live_out, &block->live_out)); } } state->block = NULL; @@ -1903,8 +1901,8 @@ validate_live_defs(nir_function_impl *impl, validate_state *state) nir_block *block = (nir_block *)entry->key; block_liveness_metadata *md = &blocks[entry - state->blocks->table]; - ralloc_free(block->live_in); - ralloc_free(block->live_out); + u_sparse_bitset_free(&block->live_in); + u_sparse_bitset_free(&block->live_out); block->live_in = md->live_in; block->live_out = md->live_out; diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c index ac9ef49a4d7..8f16fa56c13 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c @@ -26,6 +26,7 @@ #include "etnaviv_compiler_nir.h" #include "compiler/nir/nir_worklist.h" +#include "util/sparse_bitset.h" static void range_include(struct live_def *def, unsigned index) @@ -38,7 +39,6 @@ range_include(struct live_def *def, unsigned index) struct live_defs_state { unsigned num_defs; - unsigned bitset_words; nir_function_impl *impl; nir_block *block; /* current block pointer */ @@ -54,13 +54,8 @@ static bool init_liveness_block(nir_block *block, struct live_defs_state *state) { - block->live_in = reralloc(block, block->live_in, BITSET_WORD, - state->bitset_words); - memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); - - block->live_out = reralloc(block, block->live_out, BITSET_WORD, - state->bitset_words); - memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); + u_sparse_bitset_init(&block->live_in, state->num_defs, block); + u_sparse_bitset_init(&block->live_out, state->num_defs, block); nir_block_worklist_push_head(&state->worklist, block); @@ -98,7 +93,7 @@ set_src_live(nir_src *src, void *void_state) unsigned i = state->live_map[src_index(state->impl, src)]; assert(i != ~0u); - BITSET_SET(state->block->live_in, i); + u_sparse_bitset_set(&state->block->live_in, i); range_include(&state->defs[i], state->index); return true; @@ -108,12 +103,7 @@ static bool propagate_across_edge(nir_block *pred, nir_block *succ, struct live_defs_state *state) { - BITSET_WORD progress = 0; - for (unsigned i = 0; i < state->bitset_words; ++i) { - progress |= succ->live_in[i] & ~pred->live_out[i]; - pred->live_out[i] |= succ->live_in[i]; - } - return progress != 0; + return u_sparse_bitset_merge(&pred->live_out, &succ->live_in); } unsigned @@ -162,7 +152,6 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma * ahead and allocate live_in and live_out sets and add all of the * blocks to the worklist. */ - state.bitset_words = BITSET_WORDS(state.num_defs); nir_foreach_block(block, impl) { init_liveness_block(block, &state); } @@ -181,8 +170,7 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma nir_block *block = nir_block_worklist_pop_head(&state.worklist); state.block = block; - memcpy(block->live_in, block->live_out, - state.bitset_words * sizeof(BITSET_WORD)); + u_sparse_bitset_dup(&block->live_in, &block->live_out); state.index = block_live_index[block->index + 1]; @@ -198,7 +186,7 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma * we don't expect any partial write_mask alus * so clearing live_in here is OK */ - BITSET_CLEAR(block->live_in, state.index); + u_sparse_bitset_clear(&block->live_in, state.index); } /* don't set_src_live for not-emitted instructions */ @@ -247,12 +235,10 @@ etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_ma /* apply live_in/live_out to ranges */ nir_foreach_block(block, impl) { - int i; - - BITSET_FOREACH_SET(i, block->live_in, state.num_defs) + U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i) range_include(&state.defs[i], block_live_index[block->index]); - BITSET_FOREACH_SET(i, block->live_out, state.num_defs) + U_SPARSE_BITSET_FOREACH_SET(&block->live_out, i) range_include(&state.defs[i], block_live_index[block->index + 1]); } diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index d4f7ae6f554..8f302e81679 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -28,6 +28,7 @@ #include "compiler/glsl_types.h" #include "compiler/nir/nir_builder.h" #include "dev/intel_debug.h" +#include "util/sparse_bitset.h" /** * Returns the minimum number of vec4 elements needed to pack a type. @@ -3036,7 +3037,7 @@ brw_nir_find_complete_variable_with_location(nir_shader *shader, struct brw_quick_pressure_state { uint8_t *convergent_size; uint8_t *divergent_size; - BITSET_WORD *live; + struct u_sparse_bitset live; unsigned curr_convergent_size; unsigned curr_divergent_size; }; @@ -3086,8 +3087,8 @@ set_src_live(nir_src *src, void *v_state) if (nir_src_is_undef(*src)) return true; - if (!BITSET_TEST(state->live, src->ssa->index)) { - BITSET_SET(state->live, src->ssa->index); + if (!u_sparse_bitset_test(&state->live, src->ssa->index)) { + u_sparse_bitset_set(&state->live, src->ssa->index); /* This value just became live, add its size */ state->curr_convergent_size += state->convergent_size[src->ssa->index]; @@ -3101,8 +3102,8 @@ static bool set_def_dead(nir_def *def, void *v_state) { struct brw_quick_pressure_state *state = v_state; - if (BITSET_TEST(state->live, def->index)) { - BITSET_CLEAR(state->live, def->index); + if (u_sparse_bitset_test(&state->live, def->index)) { + u_sparse_bitset_clear(&state->live, def->index); /* This value just became dead, subtract its size */ state->curr_convergent_size -= state->convergent_size[def->index]; @@ -3121,14 +3122,12 @@ quick_pressure_estimate(nir_shader *nir, nir_metadata_require(impl, nir_metadata_divergence | nir_metadata_live_defs); - const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc); - struct brw_quick_pressure_state state = { .convergent_size = calloc(impl->ssa_alloc, sizeof(uint8_t)), .divergent_size = calloc(impl->ssa_alloc, sizeof(uint8_t)), - .live = calloc(bitset_words, sizeof(BITSET_WORD)), }; + u_sparse_bitset_init(&state.live, impl->ssa_alloc, NULL); unsigned max_convergent_size = 0, max_divergent_size = 0; nir_foreach_block(block, impl) { @@ -3140,16 +3139,13 @@ quick_pressure_estimate(nir_shader *nir, state.curr_divergent_size = 0; /* Start with sizes for anything live-out from the block */ - - unsigned i; - BITSET_FOREACH_SET(i, block->live_out, impl->ssa_alloc) { + U_SPARSE_BITSET_FOREACH_SET(&block->live_out, i) { state.curr_convergent_size += state.convergent_size[i]; state.curr_divergent_size += state.divergent_size[i]; } /* Walk backwards, add source sizes on first sight, subtract on def */ - for (i = 0; i < bitset_words; i++) - state.live[i] = block->live_out[i]; + u_sparse_bitset_dup(&state.live, &block->live_out); nir_foreach_instr_reverse(instr, block) { if (instr->type == nir_instr_type_phi) @@ -3170,7 +3166,7 @@ quick_pressure_estimate(nir_shader *nir, free(state.convergent_size); free(state.divergent_size); - free(state.live); + u_sparse_bitset_free(&state.live); } /** |