diff options
| author | Lionel Landwerlin <lionel.g.landwerlin@intel.com> | 2025-07-29 09:56:33 +0300 |
|---|---|---|
| committer | Marge Bot <marge-bot@fdo.invalid> | 2025-11-06 15:27:18 +0000 |
| commit | f56e118ecdb926c85282d90b178c60d220539ea4 (patch) | |
| tree | 717058d8c02d2251b02137a3a6e77b9820d940a0 | |
| parent | 69d7fcd6135a7713c02fffe644c09a6d738e767a (diff) | |
vulkan/runtime: split out partitioning logic
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36647>
| -rw-r--r-- | src/vulkan/runtime/vk_pipeline.c | 136 |
1 files changed, 85 insertions, 51 deletions
diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c index cc19badd3c2..993dee1581a 100644 --- a/src/vulkan/runtime/vk_pipeline.c +++ b/src/vulkan/runtime/vk_pipeline.c @@ -1132,13 +1132,73 @@ vk_pipeline_to_shader_flags(VkPipelineCreateFlags2KHR pipeline_flags, return shader_flags; } +/* Specify how linking should be done for graphics stages */ +struct vk_graphics_pipeline_link_info { + bool optimize; + + uint32_t part_count; + uint32_t partition[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + 1]; + + VkShaderStageFlags part_stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; +}; + +static void +vk_graphics_pipeline_compute_link_info(struct vk_graphics_pipeline_link_info *link_info, + bool link_time_optimize, + uint32_t stage_count, + const struct vk_pipeline_stage *stages) +{ + memset(link_info, 0, sizeof(*link_info)); + + link_info->optimize = link_time_optimize; + + /* No shader, must be a pipeline library with vertex-input/color-output */ + if (stage_count == 0) + return; + + /* Partition the shaders. Whenever pipelines are used, + * vertex/geometry/fragment stages are always specified together, so should + * always be linked. That doesn't break the fast link since the relevant + * link happens at pipeline library create time. + * + * We don't gate this behind an option since linking shaders is beneficial + * on all hardware, to clean up the I/O mess that applications regularly + * leave. + */ + if (link_time_optimize) { + link_info->partition[1] = stage_count; + link_info->part_count = 1; + } else if (stages[0].stage == MESA_SHADER_FRAGMENT) { + assert(stage_count == 1); + link_info->partition[1] = stage_count; + link_info->part_count = 1; + } else if (stages[stage_count - 1].stage == MESA_SHADER_FRAGMENT) { + /* In this case we have both geometry stages and fragment */ + assert(stage_count > 1); + link_info->partition[1] = stage_count - 1; + link_info->partition[2] = stage_count; + link_info->part_count = 2; + } else { + /* In this case we only have geometry stages */ + link_info->partition[1] = stage_count; + link_info->part_count = 1; + } + + for (uint32_t i = 0; i < link_info->part_count; i++) { + for (uint32_t j = link_info->partition[i]; j < link_info->partition[i + 1]; j++) { + const struct vk_pipeline_stage *stage = &stages[j]; + link_info->part_stages[i] |= mesa_to_vk_shader_stage(stage->stage); + } + } +} + static VkResult vk_graphics_pipeline_compile_shaders(struct vk_device *device, struct vk_pipeline_cache *cache, struct vk_graphics_pipeline *pipeline, struct vk_pipeline_layout *pipeline_layout, const struct vk_graphics_pipeline_state *state, - bool link_time_optimize, + const struct vk_graphics_pipeline_link_info *link_info, uint32_t stage_count, struct vk_pipeline_stage *stages, VkPipelineCreationFeedback *stage_feedbacks) @@ -1153,7 +1213,7 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, * likely haven't been properly linked. We keep the precompiled shaders * and we still look it up in the cache so it may still be fast. */ - if (link_time_optimize) { + if (link_info->optimize) { for (uint32_t i = 0; i < stage_count; i++) { if (stages[i].shader != NULL) { vk_shader_unref(device, stages[i].shader); @@ -1204,51 +1264,20 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, blake3_hash layout_blake3; _mesa_blake3_final(&blake3_ctx, layout_blake3); - /* Partition the shaders. Whenever pipelines are used, - * vertex/geometry/fragment stages are always specified together, so should - * always be linked. That doesn't break the fast link since the relevant link - * happens at pipeline library create time. - * - * We don't gate this behind an option since linking shaders is beneficial on - * all hardware, to clean up the I/O mess that applications regularly leave. - */ - uint32_t part_count; - uint32_t partition[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + 1] = { 0 }; - if (link_time_optimize) { - partition[1] = stage_count; - part_count = 1; - } else if (stages[0].stage == MESA_SHADER_FRAGMENT) { - assert(stage_count == 1); - partition[1] = stage_count; - part_count = 1; - } else if (stages[stage_count - 1].stage == MESA_SHADER_FRAGMENT) { - /* In this case we have both geometry stages and fragment */ - assert(stage_count > 1); - partition[1] = stage_count - 1; - partition[2] = stage_count; - part_count = 2; - } else { - /* In this case we only have geometry stages */ - partition[1] = stage_count; - part_count = 1; - } - - for (uint32_t p = 0; p < part_count; p++) { + for (uint32_t p = 0; p < link_info->part_count; p++) { const int64_t part_start = os_time_get_nano(); /* Don't try to re-compile any fast-link shaders */ - if (!link_time_optimize && stages[partition[p]].shader != NULL) + if (!link_info->optimize && stages[link_info->partition[p]].shader != NULL) continue; struct vk_shader_pipeline_cache_key shader_key = { 0 }; _mesa_blake3_init(&blake3_ctx); - VkShaderStageFlags part_stages = 0; - for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) { const struct vk_pipeline_stage *stage = &stages[i]; - part_stages |= mesa_to_vk_shader_stage(stage->stage); _mesa_blake3_update(&blake3_ctx, stage->precomp->blake3, sizeof(stage->precomp->blake3)); @@ -1259,13 +1288,13 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, blake3_hash state_blake3; ops->hash_state(device->physical, state, &device->enabled_features, - part_stages, state_blake3); + link_info->part_stages[p], state_blake3); _mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3)); _mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3)); - if (part_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + if (link_info->part_stages[p] & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) _mesa_blake3_update(&blake3_ctx, &tess_info, sizeof(tess_info)); /* The set of geometry stages used together is used to generate the @@ -1305,7 +1334,7 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, */ bool all_shaders_found = true; bool all_cache_hits = true; - for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) { struct vk_pipeline_stage *stage = &stages[i]; shader_key.stage = stage->stage; @@ -1345,7 +1374,7 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, * in the partition. Otherwise, we have to go re-compile it all * anyway. */ - for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) { struct vk_pipeline_stage *stage = &stages[i]; stage_feedbacks[stage->stage].flags |= @@ -1356,7 +1385,7 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, if (all_shaders_found) { /* Update duration to take cache lookups into account */ const int64_t part_end = os_time_get_nano(); - for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) { struct vk_pipeline_stage *stage = &stages[i]; stage_feedbacks[stage->stage].duration += part_end - part_start; } @@ -1369,16 +1398,16 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, return VK_PIPELINE_COMPILE_REQUIRED; struct vk_shader_compile_info infos[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; - for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) { struct vk_pipeline_stage *stage = &stages[i]; VkShaderCreateFlagsEXT shader_flags = vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage); - if (partition[p + 1] - partition[p] > 1) + if (link_info->partition[p + 1] - link_info->partition[p] > 1) shader_flags |= VK_SHADER_CREATE_LINK_STAGE_BIT_EXT; - if ((part_stages & VK_SHADER_STAGE_MESH_BIT_EXT) && + if ((link_info->part_stages[p] & VK_SHADER_STAGE_MESH_BIT_EXT) && !(geom_stages & VK_SHADER_STAGE_TASK_BIT_EXT)) shader_flags = VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT; @@ -1400,7 +1429,7 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp, nir_options); if (nir == NULL) { - for (uint32_t j = partition[p]; j < i; j++) + for (uint32_t j = link_info->partition[p]; j < i; j++) ralloc_free(infos[i].nir); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1439,16 +1468,16 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device, * returns, we own the shaders but not the NIR in infos. */ struct vk_shader *shaders[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES]; - result = ops->compile(device, partition[p + 1] - partition[p], - &infos[partition[p]], + result = ops->compile(device, link_info->partition[p + 1] - link_info->partition[p], + &infos[link_info->partition[p]], state, &device->enabled_features, &device->alloc, - &shaders[partition[p]]); + &shaders[link_info->partition[p]]); if (result != VK_SUCCESS) return result; const int64_t part_end = os_time_get_nano(); - for (uint32_t i = partition[p]; i < partition[p + 1]; i++) { + for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) { struct vk_pipeline_stage *stage = &stages[i]; shader_key.stage = stage->stage; @@ -1831,8 +1860,13 @@ vk_create_graphics_pipeline(struct vk_device *device, (pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT); + struct vk_graphics_pipeline_link_info link_info; + vk_graphics_pipeline_compute_link_info(&link_info, lto, + stage_count, stages); + result = vk_graphics_pipeline_compile_shaders(device, cache, pipeline, - pipeline_layout, state, lto, + pipeline_layout, state, + &link_info, stage_count, stages, stage_feedbacks); if (result != VK_SUCCESS) |