About Social Code
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2025-07-29 12:45:39 +0300
committerMarge Bot <marge-bot@fdo.invalid>2025-11-06 15:27:25 +0000
commit08ed1c3da2c1ad07cb7180311f83ad0ae672209f (patch)
tree9d8a1a7662771edf263e1b810ae0851608a3385c
parentb2d6ead1ee2e3f63ec2d24b770c89997fe0c9f4e (diff)
vulkan/runtime: split graphics shaders hashing from compile
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36647>
-rw-r--r--src/vulkan/runtime/vk_pipeline.c671
1 files changed, 362 insertions, 309 deletions
diff --git a/src/vulkan/runtime/vk_pipeline.c b/src/vulkan/runtime/vk_pipeline.c
index eba21a72294..477e3f826e7 100644
--- a/src/vulkan/runtime/vk_pipeline.c
+++ b/src/vulkan/runtime/vk_pipeline.c
@@ -41,6 +41,8 @@
#include "nir_serialize.h"
#include "nir.h"
+#include "shader_enums.h"
+
#include "util/mesa-sha1.h"
bool
@@ -1143,8 +1145,22 @@ vk_pipeline_to_shader_flags(VkPipelineCreateFlags2KHR pipeline_flags,
return shader_flags;
}
-/* Specify how linking should be done for graphics stages */
-struct vk_graphics_pipeline_link_info {
+struct vk_graphics_pipeline_compile_info {
+ /* Compacted array of stages */
+ struct vk_pipeline_stage stages[MESA_SHADER_MESH_STAGES];
+ uint32_t stage_count;
+
+ /* Maps gl_shader_stage to the matching index in stages[] */
+ uint32_t stage_to_index[MESA_SHADER_MESH_STAGES];
+
+ /* Imported stages from pipeline libraries */
+ VkShaderStageFlags imported_stages;
+
+ uint32_t set_layout_count;
+ struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS];
+
+ struct vk_graphics_pipeline_state *state;
+
bool optimize;
uint32_t part_count;
@@ -1153,19 +1169,178 @@ struct vk_graphics_pipeline_link_info {
VkShaderStageFlags part_stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
};
+/* Compute all the state necessary for compilation, this includes precomp
+ * shader hashes, final shader hashes and all the state necessary.
+ */
static void
-vk_graphics_pipeline_compute_link_info(struct vk_graphics_pipeline_link_info *link_info,
- bool link_time_optimize,
- uint32_t stage_count,
- const struct vk_pipeline_stage *stages)
+vk_get_graphics_pipeline_compile_info(struct vk_graphics_pipeline_compile_info *info,
+ struct vk_device *device,
+ struct vk_graphics_pipeline_state *state,
+ struct vk_graphics_pipeline_all_state *all_state,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- memset(link_info, 0, sizeof(*link_info));
+ VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
- link_info->optimize = link_time_optimize;
+ memset(info, 0, sizeof(*info));
- /* No shader, must be a pipeline library with vertex-input/color-output */
- if (stage_count == 0)
- return;
+ info->state = state;
+
+ const VkPipelineCreateFlags2KHR pipeline_flags =
+ vk_graphics_pipeline_create_flags(pCreateInfo);
+
+ const VkPipelineLibraryCreateInfoKHR *libs_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_LIBRARY_CREATE_INFO_KHR);
+
+ if (libs_info) {
+ for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
+ VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]);
+ assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
+ assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
+ struct vk_graphics_pipeline *lib_gfx_pipeline =
+ container_of(lib_pipeline, struct vk_graphics_pipeline, base);
+
+ vk_graphics_pipeline_state_merge(info->state, &lib_gfx_pipeline->lib.state);
+
+ info->set_layout_count = MAX2(info->set_layout_count,
+ lib_gfx_pipeline->set_layout_count);
+ for (uint32_t i = 0; i < lib_gfx_pipeline->set_layout_count; i++) {
+ if (lib_gfx_pipeline->set_layouts[i] == NULL)
+ continue;
+
+ if (info->set_layouts[i] == NULL)
+ info->set_layouts[i] = lib_gfx_pipeline->set_layouts[i];
+ }
+
+ for (uint32_t i = 0; i < lib_gfx_pipeline->stage_count; i++) {
+ const struct vk_pipeline_stage *lib_stage =
+ &lib_gfx_pipeline->stages[i];
+
+ /* We shouldn't have duplicated stages in the imported pipeline
+ * but it's cheap enough to protect against it so we may as well.
+ */
+ assert(lib_stage->stage < ARRAY_SIZE(info->stages));
+ assert(vk_pipeline_stage_is_null(&info->stages[lib_stage->stage]));
+ if (!vk_pipeline_stage_is_null(&info->stages[lib_stage->stage]))
+ continue;
+
+ info->stages[lib_stage->stage] = vk_pipeline_stage_clone(lib_stage);
+ info->imported_stages |= mesa_to_vk_shader_stage(lib_stage->stage);
+ }
+ }
+ }
+
+ if (pipeline_layout != NULL) {
+ info->set_layout_count = MAX2(info->set_layout_count,
+ pipeline_layout->set_count);
+ for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
+ if (pipeline_layout->set_layouts[i] == NULL)
+ continue;
+
+ if (info->set_layouts[i] == NULL)
+ info->set_layouts[i] = pipeline_layout->set_layouts[i];
+ }
+ }
+
+ VkResult result = vk_graphics_pipeline_state_fill(device, info->state,
+ pCreateInfo,
+ NULL /* driver_rp */,
+ 0 /* driver_rp_flags */,
+ all_state,
+ NULL, 0, NULL);
+ /* We provide a all_state so there should not be any allocation, hence no failure.*/
+ assert(result == VK_SUCCESS);
+
+ VkShaderStageFlags all_stages = info->imported_stages;
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ const VkPipelineShaderStageCreateInfo *stage_info =
+ &pCreateInfo->pStages[i];
+
+ assert(util_bitcount(stage_info->stage) == 1);
+ if (!(info->state->shader_stages & stage_info->stage))
+ continue;
+
+ mesa_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
+ assert(vk_device_supports_stage(device, stage));
+
+ /* We don't need to load anything for imported stages, precomp should be
+ * included if
+ * VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT was
+ * provided and shader should obviously be there.
+ */
+ if (info->imported_stages & stage_info->stage)
+ continue;
+
+ info->stages[stage] = (struct vk_pipeline_stage) {
+ .stage = stage,
+ };
+ all_stages |= stage_info->stage;
+
+ vk_pipeline_hash_precomp_shader_stage(device, pipeline_flags,
+ pCreateInfo->pNext,
+ stage_info, &info->stages[stage]);
+ }
+
+ /* Compact the array of stages */
+ info->stage_count = 0;
+ for (uint32_t s = 0; s < ARRAY_SIZE(info->stages); s++) {
+ assert(s >= info->stage_count);
+ if (all_stages & mesa_to_vk_shader_stage(s))
+ info->stages[info->stage_count++] = info->stages[s];
+ }
+ for (uint32_t s = info->stage_count; s < ARRAY_SIZE(info->stages); s++)
+ memset(&info->stages[s], 0, sizeof(info->stages[s]));
+
+ /* Sort so we always give the driver shaders in order.
+ *
+ * This makes everything easier for everyone. This also helps stabilize
+ * shader keys so that we get a cache hit even if the client gives us the
+ * stages in a different order.
+ */
+ qsort(info->stages, info->stage_count,
+ sizeof(info->stages[0]), cmp_vk_pipeline_stages);
+
+ for (uint32_t s = 0; s < info->stage_count; s++)
+ info->stage_to_index[info->stages[s].stage] = s;
+
+ /* Decide whether we should apply link-time optimizations. The spec says:
+ *
+ * VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT specifies that
+ * pipeline libraries being linked into this library should have link time
+ * optimizations applied. If this bit is omitted, implementations should
+ * instead perform linking as rapidly as possible.
+ *
+ * ...
+ *
+ * Using VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT (or not) when
+ * linking pipeline libraries is intended as a performance tradeoff
+ * between host and device. If the bit is omitted, linking should be
+ * faster and produce a pipeline more rapidly, but performance of the
+ * pipeline on the target device may be reduced. If the bit is included,
+ * linking may be slower but should produce a pipeline with device
+ * performance comparable to a monolithically created pipeline.
+ *
+ * The key phrase here is "pipeline libraries". When we are linking pipeline
+ * libraries, we look at this bit to determine whether to apply link-time
+ * optimizations. When there are not pipeline libraries, however, we are
+ * compiling a monolithic pipeline, which the last sentence implies should
+ * always have link-time optimizations applied.
+ *
+ * Summarizing, we want to link-time optimize monolithic pipelines and
+ * non-monolithic pipelines with LINK_TIME_OPTIMIZATION_BIT.
+ *
+ * (Strictly speaking, there's a corner case here, where a pipeline without
+ * LINK_TIME_OPTIMIZATION_BIT links pipeline libraries for graphics state but
+ * supplies shaders directly outside of the pipeline library. This logic does
+ * not link those shaders, which is a conservative choice. GPL is a disaster
+ * of combinatoric complexity, and this simplified approach gets good
+ * performance for the cases that actually matter: monolithic, GPL fast link,
+ * GPL optimized link.)
+ */
+ info->optimize =
+ libs_info == NULL ||
+ (pipeline_flags &
+ VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT);
/* Partition the shaders. Whenever pipelines are used,
* vertex/geometry/fragment stages are always specified together, so should
@@ -1176,61 +1351,121 @@ vk_graphics_pipeline_compute_link_info(struct vk_graphics_pipeline_link_info *li
* on all hardware, to clean up the I/O mess that applications regularly
* leave.
*/
- if (link_time_optimize) {
- link_info->partition[1] = stage_count;
- link_info->part_count = 1;
- } else if (stages[0].stage == MESA_SHADER_FRAGMENT) {
- assert(stage_count == 1);
- link_info->partition[1] = stage_count;
- link_info->part_count = 1;
- } else if (stages[stage_count - 1].stage == MESA_SHADER_FRAGMENT) {
+ if (info->stage_count == 0) {
+ info->part_count = 0;
+ } else if (info->optimize) {
+ info->partition[1] = info->stage_count;
+ info->part_count = 1;
+ } else if (info->stages[0].stage == MESA_SHADER_FRAGMENT) {
+ assert(info->stage_count == 1);
+ info->partition[1] = info->stage_count;
+ info->part_count = 1;
+ } else if (info->stages[info->stage_count - 1].stage == MESA_SHADER_FRAGMENT) {
/* In this case we have both geometry stages and fragment */
- assert(stage_count > 1);
- link_info->partition[1] = stage_count - 1;
- link_info->partition[2] = stage_count;
- link_info->part_count = 2;
+ assert(info->stage_count > 1);
+ info->partition[1] = info->stage_count - 1;
+ info->partition[2] = info->stage_count;
+ info->part_count = 2;
} else {
/* In this case we only have geometry stages */
- link_info->partition[1] = stage_count;
- link_info->part_count = 1;
+ info->partition[1] = info->stage_count;
+ info->part_count = 1;
+ }
+
+ for (uint32_t i = 0; i < info->part_count; i++) {
+ for (uint32_t j = info->partition[i]; j < info->partition[i + 1]; j++) {
+ const struct vk_pipeline_stage *stage = &info->stages[j];
+ info->part_stages[i] |= mesa_to_vk_shader_stage(stage->stage);
+ }
+ }
+
+ struct mesa_blake3 blake3_ctx;
+ _mesa_blake3_init(&blake3_ctx);
+ for (uint32_t i = 0; i < info->set_layout_count; i++) {
+ if (info->set_layouts[i] != NULL) {
+ _mesa_blake3_update(&blake3_ctx, info->set_layouts[i]->blake3,
+ sizeof(info->set_layouts[i]->blake3));
+ }
}
+ if (pipeline_layout != NULL) {
+ _mesa_blake3_update(&blake3_ctx, &pipeline_layout->push_ranges,
+ sizeof(pipeline_layout->push_ranges[0]) *
+ pipeline_layout->push_range_count);
+ }
+ blake3_hash layout_blake3;
+ _mesa_blake3_final(&blake3_ctx, layout_blake3);
+
+ const struct vk_device_shader_ops *ops = device->shader_ops;
+ for (uint32_t p = 0; p < info->part_count; p++) {
+ /* Don't try to re-compile any fast-link shaders */
+ if (!info->optimize && info->stages[info->partition[p]].shader != NULL)
+ continue;
+
+ _mesa_blake3_init(&blake3_ctx);
+
+ for (uint32_t i = info->partition[p]; i < info->partition[p + 1]; i++) {
+ const struct vk_pipeline_stage *stage = &info->stages[i];
+
+ _mesa_blake3_update(&blake3_ctx, stage->precomp_key,
+ sizeof(stage->precomp_key));
+
+ VkShaderCreateFlagsEXT shader_flags =
+ vk_pipeline_to_shader_flags(pipeline_flags, stage->stage);
+ _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags));
+ }
+
+ blake3_hash state_blake3;
+ ops->hash_state(device->physical, info->state,
+ &device->enabled_features, info->part_stages[p],
+ state_blake3);
- for (uint32_t i = 0; i < link_info->part_count; i++) {
- for (uint32_t j = link_info->partition[i]; j < link_info->partition[i + 1]; j++) {
- const struct vk_pipeline_stage *stage = &stages[j];
- link_info->part_stages[i] |= mesa_to_vk_shader_stage(stage->stage);
+ _mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3));
+ _mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3));
+
+ blake3_hash linked_blake3;
+ _mesa_blake3_final(&blake3_ctx, linked_blake3);
+
+ for (uint32_t i = info->partition[p]; i < info->partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &info->stages[i];
+
+ stage->shader_key.stage = stage->stage;
+ memcpy(stage->shader_key.blake3, linked_blake3, sizeof(blake3_hash));
}
}
}
+static void
+vk_release_graphics_pipeline_compile_info(struct vk_graphics_pipeline_compile_info *info,
+ struct vk_device *device,
+ const VkAllocationCallbacks *pAllocator)
+{
+ for (uint32_t i = 0; i < ARRAY_SIZE(info->stages); i++)
+ vk_pipeline_stage_finish(device, &info->stages[i]);
+}
+
static VkResult
vk_graphics_pipeline_compile_shaders(struct vk_device *device,
struct vk_pipeline_cache *cache,
- struct vk_graphics_pipeline *pipeline,
+ VkPipelineCreateFlags2KHR pipeline_flags,
struct vk_pipeline_layout *pipeline_layout,
- const struct vk_graphics_pipeline_state *state,
- const struct vk_graphics_pipeline_link_info *link_info,
- uint32_t stage_count,
- struct vk_pipeline_stage *stages,
- uint32_t set_layout_count,
- struct vk_descriptor_set_layout **set_layouts,
+ struct vk_graphics_pipeline_compile_info *compile_info,
VkPipelineCreationFeedback *stage_feedbacks)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
VkResult result;
- if (stage_count == 0)
+ if (compile_info->stage_count == 0)
return VK_SUCCESS;
/* If we're linking, throw away any previously compiled shaders as they
* likely haven't been properly linked. We keep the precompiled shaders
* and we still look it up in the cache so it may still be fast.
*/
- if (link_info->optimize) {
- for (uint32_t i = 0; i < stage_count; i++) {
- if (stages[i].shader != NULL) {
- vk_shader_unref(device, stages[i].shader);
- stages[i].shader = NULL;
+ if (compile_info->optimize) {
+ for (uint32_t i = 0; i < compile_info->stage_count; i++) {
+ if (compile_info->stages[i].shader != NULL) {
+ vk_shader_unref(device, compile_info->stages[i].shader);
+ compile_info->stages[i].shader = NULL;
}
}
}
@@ -1238,17 +1473,17 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
bool have_all_shaders = true;
VkShaderStageFlags all_stages = 0;
struct vk_pipeline_precomp_shader *tcs_precomp = NULL, *tes_precomp = NULL;
- for (uint32_t i = 0; i < stage_count; i++) {
- all_stages |= mesa_to_vk_shader_stage(stages[i].stage);
+ for (uint32_t i = 0; i < compile_info->stage_count; i++) {
+ all_stages |= mesa_to_vk_shader_stage(compile_info->stages[i].stage);
- if (stages[i].shader == NULL)
+ if (compile_info->stages[i].shader == NULL)
have_all_shaders = false;
- if (stages[i].stage == MESA_SHADER_TESS_CTRL)
- tcs_precomp = stages[i].precomp;
+ if (compile_info->stages[i].stage == MESA_SHADER_TESS_CTRL)
+ tcs_precomp = compile_info->stages[i].precomp;
- if (stages[i].stage == MESA_SHADER_TESS_EVAL)
- tes_precomp = stages[i].precomp;
+ if (compile_info->stages[i].stage == MESA_SHADER_TESS_EVAL)
+ tes_precomp = compile_info->stages[i].precomp;
}
/* If we already have a shader for each stage, there's nothing to do. */
@@ -1261,53 +1496,14 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
vk_pipeline_tess_info_merge(&tess_info, &tes_precomp->tess);
}
- struct mesa_blake3 blake3_ctx;
- _mesa_blake3_init(&blake3_ctx);
- for (uint32_t i = 0; i < set_layout_count; i++) {
- if (set_layouts[i] != NULL) {
- _mesa_blake3_update(&blake3_ctx, set_layouts[i]->blake3,
- sizeof(set_layouts[i]->blake3));
- }
- }
- if (pipeline_layout != NULL) {
- _mesa_blake3_update(&blake3_ctx, &pipeline_layout->push_ranges,
- sizeof(pipeline_layout->push_ranges[0]) *
- pipeline_layout->push_range_count);
- }
- blake3_hash layout_blake3;
- _mesa_blake3_final(&blake3_ctx, layout_blake3);
-
- for (uint32_t p = 0; p < link_info->part_count; p++) {
+ for (uint32_t p = 0; p < compile_info->part_count; p++) {
const int64_t part_start = os_time_get_nano();
/* Don't try to re-compile any fast-link shaders */
- if (!link_info->optimize && stages[link_info->partition[p]].shader != NULL)
+ if (!compile_info->optimize &&
+ compile_info->stages[compile_info->partition[p]].shader != NULL)
continue;
- struct vk_shader_pipeline_cache_key shader_key = { 0 };
-
- _mesa_blake3_init(&blake3_ctx);
-
- for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) {
- const struct vk_pipeline_stage *stage = &stages[i];
-
- _mesa_blake3_update(&blake3_ctx, stage->precomp->cache_key,
- sizeof(stage->precomp->cache_key));
-
- VkShaderCreateFlagsEXT shader_flags =
- vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage);
- _mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags));
- }
-
- blake3_hash state_blake3;
- ops->hash_state(device->physical, state, &device->enabled_features,
- link_info->part_stages[p], state_blake3);
-
- _mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3));
- _mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3));
-
- _mesa_blake3_final(&blake3_ctx, shader_key.blake3);
-
if (cache != NULL) {
/* From the Vulkan 1.3.278 spec:
*
@@ -1336,17 +1532,15 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
*/
bool all_shaders_found = true;
bool all_cache_hits = true;
- for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) {
- struct vk_pipeline_stage *stage = &stages[i];
-
- shader_key.stage = stage->stage;
+ for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &compile_info->stages[i];
if (stage->shader) {
/* If we have a shader from some library pipeline and the key
* matches, just use that.
*/
if (memcmp(&stage->shader->pipeline.cache_key,
- &shader_key, sizeof(shader_key)) == 0)
+ &stage->shader_key, sizeof(stage->shader_key)) == 0)
continue;
/* Otherwise, throw it away */
@@ -1356,8 +1550,8 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
bool cache_hit = false;
struct vk_pipeline_cache_object *cache_obj =
- vk_pipeline_cache_lookup_object(cache, &shader_key,
- sizeof(shader_key),
+ vk_pipeline_cache_lookup_object(cache, &stage->shader_key,
+ sizeof(stage->shader_key),
&pipeline_shader_cache_ops,
&cache_hit);
if (cache_obj != NULL) {
@@ -1376,8 +1570,8 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
* in the partition. Otherwise, we have to go re-compile it all
* anyway.
*/
- for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) {
- struct vk_pipeline_stage *stage = &stages[i];
+ for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &compile_info->stages[i];
stage_feedbacks[stage->stage].flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
@@ -1387,40 +1581,40 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
if (all_shaders_found) {
/* Update duration to take cache lookups into account */
const int64_t part_end = os_time_get_nano();
- for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) {
- struct vk_pipeline_stage *stage = &stages[i];
+ for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &compile_info->stages[i];
stage_feedbacks[stage->stage].duration += part_end - part_start;
}
continue;
}
}
- if (pipeline->base.flags &
+ if (pipeline_flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
return VK_PIPELINE_COMPILE_REQUIRED;
struct vk_shader_compile_info infos[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
- for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) {
- struct vk_pipeline_stage *stage = &stages[i];
+ for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &compile_info->stages[i];
VkShaderCreateFlagsEXT shader_flags =
- vk_pipeline_to_shader_flags(pipeline->base.flags, stage->stage);
+ vk_pipeline_to_shader_flags(pipeline_flags, stage->stage);
- if (link_info->partition[p + 1] - link_info->partition[p] > 1)
+ if (compile_info->partition[p + 1] - compile_info->partition[p] > 1)
shader_flags |= VK_SHADER_CREATE_LINK_STAGE_BIT_EXT;
- if ((link_info->part_stages[p] & VK_SHADER_STAGE_MESH_BIT_EXT) &&
+ if ((compile_info->part_stages[p] & VK_SHADER_STAGE_MESH_BIT_EXT) &&
!(all_stages & VK_SHADER_STAGE_TASK_BIT_EXT))
shader_flags = VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT;
VkShaderStageFlags next_stage;
if (stage->stage == MESA_SHADER_FRAGMENT) {
next_stage = 0;
- } else if (i + 1 < stage_count) {
+ } else if (i + 1 < compile_info->stage_count) {
/* We're always linking all the geometry shaders and hashing their
* hashes together, so this is safe.
*/
- next_stage = mesa_to_vk_shader_stage(stages[i + 1].stage);
+ next_stage = mesa_to_vk_shader_stage(compile_info->stages[i + 1].stage);
} else {
/* We're the last geometry stage */
next_stage = VK_SHADER_STAGE_FRAGMENT_BIT;
@@ -1433,7 +1627,7 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
nir_shader *nir =
vk_pipeline_precomp_shader_get_nir(stage->precomp, nir_options);
if (nir == NULL) {
- for (uint32_t j = link_info->partition[p]; j < i; j++)
+ for (uint32_t j = compile_info->partition[p]; j < i; j++)
ralloc_free(infos[i].nir);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -1452,8 +1646,8 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
.next_stage_mask = next_stage,
.nir = nir,
.robustness = &stage->precomp->rs,
- .set_layout_count = set_layout_count,
- .set_layouts = set_layouts,
+ .set_layout_count = compile_info->set_layout_count,
+ .set_layouts = compile_info->set_layouts,
.push_constant_range_count = push_range != NULL,
.push_constant_ranges = push_range != NULL ? push_range : NULL,
};
@@ -1464,21 +1658,21 @@ vk_graphics_pipeline_compile_shaders(struct vk_device *device,
* returns, we own the shaders but not the NIR in infos.
*/
struct vk_shader *shaders[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
- result = ops->compile(device, link_info->partition[p + 1] - link_info->partition[p],
- &infos[link_info->partition[p]],
- state, &device->enabled_features,
+ result = ops->compile(device,
+ compile_info->partition[p + 1] - compile_info->partition[p],
+ &infos[compile_info->partition[p]],
+ compile_info->state, &device->enabled_features,
&device->alloc,
- &shaders[link_info->partition[p]]);
+ &shaders[compile_info->partition[p]]);
if (result != VK_SUCCESS)
return result;
const int64_t part_end = os_time_get_nano();
- for (uint32_t i = link_info->partition[p]; i < link_info->partition[p + 1]; i++) {
- struct vk_pipeline_stage *stage = &stages[i];
+ for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
+ struct vk_pipeline_stage *stage = &compile_info->stages[i];
- shader_key.stage = stage->stage;
- vk_shader_init_cache_obj(device, shaders[i], &shader_key,
- sizeof(shader_key));
+ vk_shader_init_cache_obj(device, shaders[i], &stage->shader_key,
+ sizeof(stage->shader_key));
if (stage->shader == NULL) {
struct vk_pipeline_cache_object *cache_obj =
@@ -1662,10 +1856,6 @@ vk_create_graphics_pipeline(struct vk_device *device,
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
- const VkPipelineLibraryCreateInfoKHR *libs_info =
- vk_find_struct_const(pCreateInfo->pNext,
- PIPELINE_LIBRARY_CREATE_INFO_KHR);
-
struct vk_graphics_pipeline *pipeline =
vk_pipeline_zalloc(device, &vk_graphics_pipeline_ops,
VK_PIPELINE_BIND_POINT_GRAPHICS,
@@ -1673,80 +1863,24 @@ vk_create_graphics_pipeline(struct vk_device *device,
if (pipeline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- struct vk_pipeline_stage stages[MESA_SHADER_MESH_STAGES];
- memset(stages, 0, sizeof(stages));
-
VkPipelineCreationFeedback stage_feedbacks[MESA_SHADER_MESH_STAGES];
memset(stage_feedbacks, 0, sizeof(stage_feedbacks));
- struct vk_graphics_pipeline_state state_tmp, *state;
- struct vk_graphics_pipeline_all_state all_state_tmp, *all_state;
- if (pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
- /* For pipeline libraries, the state is stored in the pipeline */
- state = &pipeline->lib.state;
- all_state = &pipeline->lib.all_state;
- } else {
- /* For linked pipelines, we throw the state away at the end of pipeline
- * creation and only keep the dynamic state.
- */
- memset(&state_tmp, 0, sizeof(state_tmp));
- state = &state_tmp;
- all_state = &all_state_tmp;
- }
-
- VkShaderStageFlags imported_stages = 0;
-
- uint32_t set_layout_count = 0;
- struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS] = { 0 };
-
- /* If we have libraries, import them first. */
- if (libs_info) {
- for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
- VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]);
- assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
- assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
- struct vk_graphics_pipeline *lib_gfx_pipeline =
- container_of(lib_pipeline, struct vk_graphics_pipeline, base);
-
- vk_graphics_pipeline_state_merge(state, &lib_gfx_pipeline->lib.state);
-
- set_layout_count = MAX2(set_layout_count,
- lib_gfx_pipeline->set_layout_count);
- for (uint32_t i = 0; i < lib_gfx_pipeline->set_layout_count; i++) {
- if (lib_gfx_pipeline->set_layouts[i] == NULL)
- continue;
-
- if (set_layouts[i] == NULL)
- set_layouts[i] = lib_gfx_pipeline->set_layouts[i];
- }
+ const bool is_library = pipeline_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR;
- for (uint32_t i = 0; i < lib_gfx_pipeline->stage_count; i++) {
- const struct vk_pipeline_stage *lib_stage =
- &lib_gfx_pipeline->stages[i];
-
- /* We shouldn't have duplicated stages in the imported pipeline
- * but it's cheap enough to protect against it so we may as well.
- */
- assert(lib_stage->stage < ARRAY_SIZE(stages));
- assert(vk_pipeline_stage_is_null(&stages[lib_stage->stage]));
- if (!vk_pipeline_stage_is_null(&stages[lib_stage->stage]))
- continue;
+ struct vk_graphics_pipeline_state state_tmp;
+ struct vk_graphics_pipeline_all_state all_state_tmp;
+ if (!is_library)
+ memset(&state_tmp, 0, sizeof(state_tmp));
- stages[lib_stage->stage] = vk_pipeline_stage_clone(lib_stage);
- imported_stages |= mesa_to_vk_shader_stage(lib_stage->stage);
- }
- }
- }
-
- result = vk_graphics_pipeline_state_fill(device, state,
- pCreateInfo,
- NULL /* driver_rp */,
- 0 /* driver_rp_flags */,
- all_state,
- NULL, 0, NULL);
- if (result != VK_SUCCESS)
- goto fail_stages;
+ struct vk_graphics_pipeline_compile_info compile_info;
+ vk_get_graphics_pipeline_compile_info(
+ &compile_info, device,
+ is_library ? &pipeline->lib.state : &state_tmp,
+ is_library ? &pipeline->lib.all_state : &all_state_tmp,
+ pCreateInfo);
+ /* For pipeline libraries, the state is stored in the pipeline */
if (!(pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
pipeline->linked.dynamic.vi = &pipeline->linked._dynamic_vi;
pipeline->linked.dynamic.ms.sample_locations =
@@ -1754,18 +1888,6 @@ vk_create_graphics_pipeline(struct vk_device *device,
vk_dynamic_graphics_state_fill(&pipeline->linked.dynamic, &state_tmp);
}
- if (pipeline_layout != NULL) {
- set_layout_count = MAX2(set_layout_count, pipeline_layout->set_count);
- for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
- if (pipeline_layout->set_layouts[i] == NULL)
- continue;
-
- if (set_layouts[i] == NULL)
- set_layouts[i] = pipeline_layout->set_layouts[i];
- }
- }
-
- VkShaderStageFlags all_stages = imported_stages;
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *stage_info =
&pCreateInfo->pStages[i];
@@ -1773,119 +1895,56 @@ vk_create_graphics_pipeline(struct vk_device *device,
const int64_t stage_start = os_time_get_nano();
assert(util_bitcount(stage_info->stage) == 1);
- if (!(state->shader_stages & stage_info->stage))
- continue;
-
- mesa_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
- assert(vk_device_supports_stage(device, stage));
-
- stage_feedbacks[stage].flags |=
- VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
/* We don't need to load anything for imported stages, precomp should be
* included if
* VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT was
* provided and shader should obviously be there.
*/
- if (imported_stages & stage_info->stage)
+ if (compile_info.imported_stages & stage_info->stage)
continue;
- stages[stage] = (struct vk_pipeline_stage) {
- .stage = stage,
- };
+ mesa_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
- vk_pipeline_hash_precomp_shader_stage(device, pipeline_flags,
- pCreateInfo->pNext,
- stage_info, &stages[stage]);
+ stage_feedbacks[stage].flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+
+ stage_feedbacks[stage].flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
+ struct vk_pipeline_stage *pipeline_stage =
+ &compile_info.stages[compile_info.stage_to_index[stage]];
result = vk_pipeline_precompile_shader(device, cache, pipeline_flags,
pCreateInfo->pNext,
- stage_info, &stages[stage]);
+ stage_info, pipeline_stage);
if (result != VK_SUCCESS)
goto fail_stages;
- all_stages |= stage_info->stage;
-
const int64_t stage_end = os_time_get_nano();
stage_feedbacks[stage].duration += stage_end - stage_start;
}
- /* Compact the array of stages */
- uint32_t stage_count = 0;
- for (uint32_t s = 0; s < ARRAY_SIZE(stages); s++) {
- assert(s >= stage_count);
- if (all_stages & mesa_to_vk_shader_stage(s))
- stages[stage_count++] = stages[s];
- }
- for (uint32_t s = stage_count; s < ARRAY_SIZE(stages); s++)
- memset(&stages[s], 0, sizeof(stages[s]));
-
- /* Sort so we always give the driver shaders in order.
- *
- * This makes everything easier for everyone. This also helps stabilize
- * shader keys so that we get a cache hit even if the client gives us
- * the stages in a different order.
- */
- qsort(stages, stage_count, sizeof(*stages), cmp_vk_pipeline_stages);
-
- /* Decide whether we should apply link-time optimizations. The spec says:
- *
- * VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT specifies that
- * pipeline libraries being linked into this library should have link time
- * optimizations applied. If this bit is omitted, implementations should
- * instead perform linking as rapidly as possible.
- *
- * ...
- *
- * Using VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT (or not) when
- * linking pipeline libraries is intended as a performance tradeoff
- * between host and device. If the bit is omitted, linking should be
- * faster and produce a pipeline more rapidly, but performance of the
- * pipeline on the target device may be reduced. If the bit is included,
- * linking may be slower but should produce a pipeline with device
- * performance comparable to a monolithically created pipeline.
- *
- * The key phrase here is "pipeline libraries". When we are linking pipeline
- * libraries, we look at this bit to determine whether to apply link-time
- * optimizations. When there are not pipeline libraries, however, we are
- * compiling a monolithic pipeline, which the last sentence implies should
- * always have link-time optimizations applied.
- *
- * Summarizing, we want to link-time optimize monolithic pipelines and
- * non-monolithic pipelines with LINK_TIME_OPTIMIZATION_BIT.
- *
- * (Strictly speaking, there's a corner case here, where a pipeline without
- * LINK_TIME_OPTIMIZATION_BIT links pipeline libraries for graphics state but
- * supplies shaders directly outside of the pipeline library. This logic does
- * not link those shaders, which is a conservative choice. GPL is a disaster
- * of combinatoric complexity, and this simplified approach gets good
- * performance for the cases that actually matter: monolithic, GPL fast link,
- * GPL optimized link.)
- */
- bool lto = libs_info == NULL ||
- (pipeline->base.flags &
- VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT);
-
- struct vk_graphics_pipeline_link_info link_info;
- vk_graphics_pipeline_compute_link_info(&link_info, lto,
- stage_count, stages);
-
- result = vk_graphics_pipeline_compile_shaders(device, cache, pipeline,
- pipeline_layout, state,
- &link_info,
- stage_count, stages,
- set_layout_count, set_layouts,
+ result = vk_graphics_pipeline_compile_shaders(device, cache,
+ pipeline_flags,
+ pipeline_layout,
+ &compile_info,
stage_feedbacks);
if (result != VK_SUCCESS)
goto fail_stages;
/* Keep a reference on the set layouts */
- pipeline->set_layout_count = set_layout_count;
- for (uint32_t i = 0; i < set_layout_count; i++) {
- if (set_layouts[i] == NULL)
+ pipeline->set_layout_count = compile_info.set_layout_count;
+ for (uint32_t i = 0; i < compile_info.set_layout_count; i++) {
+ if (compile_info.set_layouts[i] == NULL)
continue;
- pipeline->set_layouts[i] = vk_descriptor_set_layout_ref(set_layouts[i]);
+ pipeline->set_layouts[i] =
+ vk_descriptor_set_layout_ref(compile_info.set_layouts[i]);
+ }
+
+ pipeline->stage_count = compile_info.stage_count;
+ for (uint32_t i = 0; i < compile_info.stage_count; i++) {
+ pipeline->base.stages |= mesa_to_vk_shader_stage(compile_info.stages[i].stage);
+ pipeline->stages[i] = vk_pipeline_stage_clone(&compile_info.stages[i]);
}
/* Throw away precompiled shaders unless the client explicitly asks us to
@@ -1893,20 +1952,14 @@ vk_create_graphics_pipeline(struct vk_device *device,
*/
if (!(pipeline_flags &
VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT)) {
- for (uint32_t i = 0; i < stage_count; i++) {
- if (stages[i].precomp != NULL) {
- vk_pipeline_precomp_shader_unref(device, stages[i].precomp);
- stages[i].precomp = NULL;
+ for (uint32_t i = 0; i < compile_info.stage_count; i++) {
+ if (pipeline->stages[i].precomp != NULL) {
+ vk_pipeline_precomp_shader_unref(device, pipeline->stages[i].precomp);
+ pipeline->stages[i].precomp = NULL;
}
}
}
- pipeline->stage_count = stage_count;
- for (uint32_t i = 0; i < stage_count; i++) {
- pipeline->base.stages |= mesa_to_vk_shader_stage(stages[i].stage);
- pipeline->stages[i] = stages[i];
- }
-
const int64_t pipeline_end = os_time_get_nano();
if (feedback_info != NULL) {
VkPipelineCreationFeedback pipeline_feedback = {
@@ -1925,13 +1978,13 @@ vk_create_graphics_pipeline(struct vk_device *device,
* cache.
*/
uint32_t cache_hit_count = 0;
- for (uint32_t i = 0; i < stage_count; i++) {
- const mesa_shader_stage stage = stages[i].stage;
+ for (uint32_t i = 0; i < compile_info.stage_count; i++) {
+ const mesa_shader_stage stage = compile_info.stages[i].stage;
if (stage_feedbacks[stage].flags &
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT)
cache_hit_count++;
}
- if (cache_hit_count > 0 && cache_hit_count == stage_count) {
+ if (cache_hit_count > 0 && cache_hit_count == compile_info.stage_count) {
pipeline_feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
@@ -1952,15 +2005,15 @@ vk_create_graphics_pipeline(struct vk_device *device,
}
}
+ vk_release_graphics_pipeline_compile_info(&compile_info, device, pAllocator);
+
*pPipeline = vk_pipeline_to_handle(&pipeline->base);
return VK_SUCCESS;
fail_stages:
- for (uint32_t i = 0; i < ARRAY_SIZE(stages); i++)
- vk_pipeline_stage_finish(device, &stages[i]);
-
vk_graphics_pipeline_destroy(device, &pipeline->base, pAllocator);
+ vk_release_graphics_pipeline_compile_info(&compile_info, device, pAllocator);
return result;
}