About Social Code
aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa.rosenzweig@intel.com>2025-11-04 17:51:33 -0500
committerMarge Bot <marge-bot@fdo.invalid>2025-11-06 21:34:33 +0000
commitaf872180e197a9e2a1e73f742d2902ab5ed61dab (patch)
tree164a9f356fc5b5362a9bafec36ff8c9161504b91 /src
parent0cb1fca8fafe43e6ef87f585e2da79936997cb85 (diff)
agx: use sparse live-sets
fixes O(N^2) memory usage and runtime around liveness/scheduling/spilling/RA, and proves out the design for the common code sparse bitsets (I did need to make an adjustment for this - worth the effort). Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37908>
Diffstat (limited to 'src')
-rw-r--r--src/asahi/compiler/agx_compiler.h6
-rw-r--r--src/asahi/compiler/agx_liveness.c43
-rw-r--r--src/asahi/compiler/agx_pressure_schedule.c31
-rw-r--r--src/asahi/compiler/agx_register_allocate.c16
-rw-r--r--src/asahi/compiler/agx_spill.c17
5 files changed, 47 insertions, 66 deletions
diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h
index e7dc9cd1aac..6df0c50e5a6 100644
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -457,8 +457,8 @@ typedef struct agx_block {
bool divergent;
/* Liveness analysis results */
- BITSET_WORD *live_in;
- BITSET_WORD *live_out;
+ struct u_sparse_bitset live_in;
+ struct u_sparse_bitset live_out;
BITSET_DECLARE(reg_live_in, AGX_NUM_REGS);
BITSET_DECLARE(reg_live_out, AGX_NUM_REGS);
@@ -1094,7 +1094,7 @@ void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
unsigned n);
void agx_compute_liveness(agx_context *ctx);
-void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
+void agx_liveness_ins_update(struct u_sparse_bitset *live, agx_instr *I);
bool agx_nir_opt_preamble(nir_shader *s, unsigned *sizes);
bool agx_nir_lower_load_mask(nir_shader *shader);
diff --git a/src/asahi/compiler/agx_liveness.c b/src/asahi/compiler/agx_liveness.c
index dc84ee370c4..79c535e6d77 100644
--- a/src/asahi/compiler/agx_liveness.c
+++ b/src/asahi/compiler/agx_liveness.c
@@ -4,9 +4,7 @@
* SPDX-License-Identifier: MIT
*/
-#include "util/list.h"
-#include "util/set.h"
-#include "util/u_memory.h"
+#include "util/sparse_bitset.h"
#include "agx_compiler.h"
/* Liveness analysis is a backwards-may dataflow analysis pass. Within a block,
@@ -16,17 +14,17 @@
/* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */
void
-agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I)
+agx_liveness_ins_update(struct u_sparse_bitset *live, agx_instr *I)
{
agx_foreach_ssa_dest(I, d)
- BITSET_CLEAR(live, I->dest[d].value);
+ u_sparse_bitset_clear(live, I->dest[d].value);
agx_foreach_ssa_src(I, s) {
/* If the source is not live after this instruction, but becomes live
* at this instruction, this is the use that kills the source
*/
- I->src[s].kill = !BITSET_TEST(live, I->src[s].value);
- BITSET_SET(live, I->src[s].value);
+ I->src[s].kill = !u_sparse_bitset_test(live, I->src[s].value);
+ u_sparse_bitset_set(live, I->src[s].value);
}
}
@@ -43,14 +41,12 @@ agx_compute_liveness(agx_context *ctx)
u_worklist_init(&worklist, ctx->num_blocks, NULL);
/* Free any previous liveness, and allocate */
- unsigned words = BITSET_WORDS(ctx->alloc);
-
agx_foreach_block(ctx, block) {
- ralloc_free(block->live_in);
- ralloc_free(block->live_out);
+ u_sparse_bitset_free(&block->live_in);
+ u_sparse_bitset_free(&block->live_out);
- block->live_in = rzalloc_array(block, BITSET_WORD, words);
- block->live_out = rzalloc_array(block, BITSET_WORD, words);
+ u_sparse_bitset_init(&block->live_in, ctx->alloc, block);
+ u_sparse_bitset_init(&block->live_out, ctx->alloc, block);
agx_worklist_push_head(&worklist, block);
}
@@ -61,11 +57,11 @@ agx_compute_liveness(agx_context *ctx)
agx_block *blk = agx_worklist_pop_head(&worklist);
/* Update its liveness information */
- memcpy(blk->live_in, blk->live_out, words * sizeof(BITSET_WORD));
+ u_sparse_bitset_dup(&blk->live_in, &blk->live_out);
agx_foreach_instr_in_block_rev(blk, I) {
if (I->op != AGX_OPCODE_PHI)
- agx_liveness_ins_update(blk->live_in, I);
+ agx_liveness_ins_update(&blk->live_in, I);
}
/* Propagate the live in of the successor (blk) to the live out of
@@ -76,32 +72,25 @@ agx_compute_liveness(agx_context *ctx)
* corresponding sources.
*/
agx_foreach_predecessor(blk, pred) {
- BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
- memcpy(live, blk->live_in, words * sizeof(BITSET_WORD));
+ struct u_sparse_bitset live;
+ u_sparse_bitset_dup(&live, &blk->live_in);
/* Kill write */
agx_foreach_phi_in_block(blk, phi) {
assert(phi->dest[0].type == AGX_INDEX_NORMAL);
- BITSET_CLEAR(live, phi->dest[0].value);
+ u_sparse_bitset_clear(&live, phi->dest[0].value);
}
/* Make live the corresponding source */
agx_foreach_phi_in_block(blk, phi) {
agx_index operand = phi->src[agx_predecessor_index(blk, *pred)];
if (operand.type == AGX_INDEX_NORMAL) {
- BITSET_SET(live, operand.value);
+ u_sparse_bitset_set(&live, operand.value);
phi->src[agx_predecessor_index(blk, *pred)].kill = false;
}
}
- bool progress = false;
-
- for (unsigned i = 0; i < words; ++i) {
- progress |= live[i] & ~((*pred)->live_out[i]);
- (*pred)->live_out[i] |= live[i];
- }
-
- if (progress)
+ if (u_sparse_bitset_merge(&(*pred)->live_out, &live))
agx_worklist_push_tail(&worklist, *pred);
}
}
diff --git a/src/asahi/compiler/agx_pressure_schedule.c b/src/asahi/compiler/agx_pressure_schedule.c
index 35961bee876..57849fd6e83 100644
--- a/src/asahi/compiler/agx_pressure_schedule.c
+++ b/src/asahi/compiler/agx_pressure_schedule.c
@@ -7,6 +7,7 @@
/* Bottom-up local scheduler to reduce register pressure */
#include "util/dag.h"
+#include "util/sparse_bitset.h"
#include "agx_compiler.h"
#include "agx_opcodes.h"
@@ -15,7 +16,7 @@ struct sched_ctx {
struct dag *dag;
/* Live set */
- BITSET_WORD *live;
+ struct u_sparse_bitset live;
};
struct sched_node {
@@ -121,13 +122,13 @@ create_dag(agx_context *ctx, agx_block *block, void *memctx)
* live_in = (live_out - KILL) + GEN
*/
static signed
-calculate_pressure_delta(agx_instr *I, BITSET_WORD *live)
+calculate_pressure_delta(agx_instr *I, struct u_sparse_bitset *live)
{
signed delta = 0;
/* Destinations must be unique */
agx_foreach_ssa_dest(I, d) {
- if (BITSET_TEST(live, I->dest[d].value))
+ if (u_sparse_bitset_test(live, I->dest[d].value))
delta -= agx_index_size_16(I->dest[d]);
}
@@ -142,7 +143,7 @@ calculate_pressure_delta(agx_instr *I, BITSET_WORD *live)
}
}
- if (!dupe && !BITSET_TEST(live, I->src[src].value))
+ if (!dupe && !u_sparse_bitset_test(live, I->src[src].value))
delta += agx_index_size_16(I->src[src]);
}
@@ -185,7 +186,7 @@ choose_instr(struct sched_ctx *s)
if (n->instr->op == AGX_OPCODE_WAIT_PIX)
return n;
- int32_t delta = calculate_pressure_delta(n->instr, s->live);
+ int32_t delta = calculate_pressure_delta(n->instr, &s->live);
if (delta < min_delta) {
best = n;
@@ -204,16 +205,16 @@ pressure_schedule_block(agx_context *ctx, agx_block *block, struct sched_ctx *s)
signed orig_max_pressure = 0;
unsigned nr_ins = 0;
- memcpy(s->live, block->live_out, BITSET_BYTES(ctx->alloc));
+ u_sparse_bitset_dup(&s->live, &block->live_out);
agx_foreach_instr_in_block_rev(block, I) {
- pressure += calculate_pressure_delta(I, s->live);
+ pressure += calculate_pressure_delta(I, &s->live);
orig_max_pressure = MAX2(pressure, orig_max_pressure);
- agx_liveness_ins_update(s->live, I);
+ agx_liveness_ins_update(&s->live, I);
nr_ins++;
}
- memcpy(s->live, block->live_out, BITSET_BYTES(ctx->alloc));
+ u_sparse_bitset_dup(&s->live, &block->live_out);
/* off by a constant, that's ok */
signed max_pressure = 0;
@@ -224,12 +225,12 @@ pressure_schedule_block(agx_context *ctx, agx_block *block, struct sched_ctx *s)
while (!list_is_empty(&s->dag->heads)) {
struct sched_node *node = choose_instr(s);
- pressure += calculate_pressure_delta(node->instr, s->live);
+ pressure += calculate_pressure_delta(node->instr, &s->live);
max_pressure = MAX2(pressure, max_pressure);
dag_prune_head(s->dag, &node->dag);
schedule[nr_ins++] = node;
- agx_liveness_ins_update(s->live, node->instr);
+ agx_liveness_ins_update(&s->live, node->instr);
}
/* Bail if it looks like it's worse */
@@ -252,14 +253,10 @@ agx_pressure_schedule(agx_context *ctx)
{
agx_compute_liveness(ctx);
void *memctx = ralloc_context(ctx);
- BITSET_WORD *live =
- ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->alloc));
agx_foreach_block(ctx, block) {
- struct sched_ctx sctx = {
- .dag = create_dag(ctx, block, memctx),
- .live = live,
- };
+ struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx)};
+ u_sparse_bitset_init(&sctx.live, ctx->alloc, memctx);
pressure_schedule_block(ctx, block, &sctx);
}
diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c
index eff2a97d6f8..53bc47c6d76 100644
--- a/src/asahi/compiler/agx_register_allocate.c
+++ b/src/asahi/compiler/agx_register_allocate.c
@@ -5,6 +5,7 @@
#include "util/bitset.h"
#include "util/macros.h"
+#include "util/sparse_bitset.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -233,12 +234,9 @@ agx_calc_register_demand(agx_context *ctx, bool remat)
unsigned demand = reserved_size(ctx);
/* Everything live-in */
- {
- int i;
- BITSET_FOREACH_SET(i, block->live_in, ctx->alloc) {
- if (classes[i] == RA_GPR)
- demand += widths[i];
- }
+ U_SPARSE_BITSET_FOREACH_SET(&block->live_in, i) {
+ if (classes[i] == RA_GPR)
+ demand += widths[i];
}
max_demand = MAX2(demand, max_demand);
@@ -674,8 +672,7 @@ reserve_live_in(struct ra_ctx *rctx)
agx_builder b =
agx_init_builder(rctx->shader, agx_before_block(rctx->block));
- int i;
- BITSET_FOREACH_SET(i, rctx->block->live_in, rctx->shader->alloc) {
+ U_SPARSE_BITSET_FOREACH_SET(&rctx->block->live_in, i) {
/* Skip values defined in loops when processing the loop header */
if (!BITSET_TEST(rctx->visited, i))
continue;
@@ -1191,8 +1188,7 @@ agx_ra_assign_local(struct ra_ctx *rctx)
rctx->bound[i] * sizeof(*block->reg_to_ssa_out[i]));
}
- int i;
- BITSET_FOREACH_SET(i, block->live_out, rctx->shader->alloc) {
+ U_SPARSE_BITSET_FOREACH_SET(&block->live_out, i) {
block->reg_to_ssa_out[rctx->classes[i]][rctx->ssa_to_reg[i]] = i;
}
diff --git a/src/asahi/compiler/agx_spill.c b/src/asahi/compiler/agx_spill.c
index 98c318aa8a3..2ea287ddc7e 100644
--- a/src/asahi/compiler/agx_spill.c
+++ b/src/asahi/compiler/agx_spill.c
@@ -8,6 +8,7 @@
#include "util/bitset.h"
#include "util/hash_table.h"
#include "util/ralloc.h"
+#include "util/sparse_bitset.h"
#include "util/u_dynarray.h"
#include "util/u_qsort.h"
#include "agx_builder.h"
@@ -849,7 +850,7 @@ compute_w_entry_loop_header(struct spill_ctx *ctx)
agx_block *block = ctx->block;
struct spill_block *sb = spill_block(ctx, block);
- unsigned nP = __bitset_count(block->live_in, BITSET_WORDS(ctx->n));
+ unsigned nP = u_sparse_bitset_count(&block->live_in);
struct candidate *candidates = calloc(nP, sizeof(struct candidate));
unsigned j = 0;
@@ -996,12 +997,12 @@ compute_s_entry(struct spill_ctx *ctx)
for (unsigned i = 0; i < sp->nS_exit; ++i) {
v = sp->S_exit[i];
- if (BITSET_TEST(ctx->block->live_in, v))
+ if (u_sparse_bitset_test(&ctx->block->live_in, v))
BITSET_SET(ctx->S, v);
}
}
- BITSET_FOREACH_SET(v, ctx->block->live_in, ctx->n) {
+ U_SPARSE_BITSET_FOREACH_SET(&ctx->block->live_in, v) {
if (!BITSET_TEST(ctx->W, v))
BITSET_SET(ctx->S, v);
}
@@ -1142,23 +1143,21 @@ validate_next_use_info(UNUSED agx_context *ctx,
UNUSED struct spill_block *blocks)
{
#ifndef NDEBUG
- int i;
-
agx_foreach_block(ctx, blk) {
struct spill_block *sb = &blocks[blk->index];
/* Invariant: next-use distance is finite iff the node is live */
- BITSET_FOREACH_SET(i, blk->live_in, ctx->alloc)
+ U_SPARSE_BITSET_FOREACH_SET(&blk->live_in, i)
assert(search_next_uses(&sb->next_use_in, i) < DIST_INFINITY);
- BITSET_FOREACH_SET(i, blk->live_out, ctx->alloc)
+ U_SPARSE_BITSET_FOREACH_SET(&blk->live_out, i)
assert(search_next_uses(&sb->next_use_out, i) < DIST_INFINITY);
foreach_next_use(&sb->next_use_in, i, _)
- assert(BITSET_TEST(blk->live_in, i));
+ assert(u_sparse_bitset_test(&blk->live_in, i));
foreach_next_use(&sb->next_use_out, i, _)
- assert(BITSET_TEST(blk->live_out, i));
+ assert(u_sparse_bitset_test(&blk->live_out, i));
}
#endif
}