1 files changed, 1278 insertions, 0 deletions
diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c
new file mode 100644
index 00000000000..2f723599655
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_shader.c
@@ -0,0 +1,1278 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_shader.h"
+
+#include "kk_cmd_buffer.h"
+#include "kk_descriptor_set_layout.h"
+#include "kk_debug.h"
+#include "kk_device.h"
+#include "kk_format.h"
+#include "kk_nir_lower_vbo.h"
+#include "kk_physical_device.h"
+#include "kk_sampler.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/vk_to_mtl_map.h"
+#include "kosmickrisp/compiler/nir_to_msl.h"
+
+#include "nir_builder.h"
+#include "nir_lower_blend.h"
+
+#include "vk_blend.h"
+#include "vk_format.h"
+#include "vk_graphics_state.h"
+#include "vk_nir_convert_ycbcr.h"
+#include "vk_pipeline.h"
+
+static const nir_shader_compiler_options *
+kk_get_nir_options(struct vk_physical_device *vk_pdev, mesa_shader_stage stage,
+                   UNUSED const struct vk_pipeline_robustness_state *rs)
+{
+   static nir_shader_compiler_options options = {
+      .lower_fdph = true,
+      .has_fsub = true,
+      .has_isub = true,
+      .lower_extract_word = true,
+      .lower_extract_byte = true,
+      .lower_insert_word = true,
+      .lower_insert_byte = true,
+      .lower_fmod = true,
+      .discard_is_demote = true,
+      .instance_id_includes_base_index = true,
+      .lower_device_index_to_zero = true,
+      .lower_pack_64_2x32_split = true,
+      .lower_unpack_64_2x32_split = true,
+      .lower_pack_64_2x32 = true,
+      .lower_pack_half_2x16 = true,
+      .lower_pack_split = true,
+      .lower_unpack_half_2x16 = true,
+      .has_cs_global_id = true,
+      .lower_vector_cmp = true,
+      .lower_fquantize2f16 = true,
+      .lower_scmp = true,
+      .lower_ifind_msb = true,
+      .lower_ufind_msb = true,
+      .lower_find_lsb = true,
+      .has_uclz = true,
+      .lower_mul_2x32_64 = true,
+      .lower_uadd_carry = true,
+      .lower_usub_borrow = true,
+      /* Metal does not support double. */
+      .lower_doubles_options = (nir_lower_doubles_options)(~0),
+      .lower_int64_options =
+         nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64,
+   };
+   return &options;
+}
+
+static struct spirv_to_nir_options
+kk_get_spirv_options(struct vk_physical_device *vk_pdev,
+                     UNUSED mesa_shader_stage stage,
+                     const struct vk_pipeline_robustness_state *rs)
+{
+   return (struct spirv_to_nir_options){
+      .environment = NIR_SPIRV_VULKAN,
+      .ssbo_addr_format = nir_address_format_64bit_bounded_global,
+      .phys_ssbo_addr_format = nir_address_format_64bit_global,
+      .ubo_addr_format = nir_address_format_64bit_bounded_global,
+      .shared_addr_format = nir_address_format_32bit_offset,
+      .min_ssbo_alignment = KK_MIN_SSBO_ALIGNMENT,
+      .min_ubo_alignment = KK_MIN_UBO_ALIGNMENT,
+   };
+}
+
+static void
+kk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir,
+                  UNUSED const struct vk_pipeline_robustness_state *rs)
+{
+   /* Gather info before preprocess_nir but after some general lowering, so
+    * inputs_read and system_values_read are accurately set.
+    */
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   /* nir_lower_io_to_temporaries is required before nir_lower_blend since the
+    * blending pass sinks writes to the end of the block where we may have a
+    * jump, which is illegal.
+    */
+   NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
+            nir_shader_get_entrypoint(nir), true, false);
+
+   msl_preprocess_nir(nir);
+}
+
+struct kk_vs_key {
+   bool is_points;
+};
+
+static void
+kk_populate_vs_key(struct kk_vs_key *key,
+                   const struct vk_graphics_pipeline_state *state)
+{
+   memset(key, 0, sizeof(*key));
+   key->is_points =
+      (state->ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
+}
+
+struct kk_fs_key {
+   VkFormat color_formats[MESA_VK_MAX_COLOR_ATTACHMENTS];
+   struct vk_color_blend_state color_blend;
+   uint32_t rasterization_samples;
+   uint16_t static_sample_mask;
+   bool has_depth;
+};
+
+static void
+kk_populate_fs_key(struct kk_fs_key *key,
+                   const struct vk_graphics_pipeline_state *state)
+{
+   memset(key, 0, sizeof(*key));
+
+   /* Required since we [de]serialize blend, and render target swizzle for
+    * non-native formats */
+   memcpy(key->color_formats, state->rp->color_attachment_formats,
+          sizeof(key->color_formats));
+
+   /* Blend state gets [de]serialized, so we need to hash it */
+   if (state->cb)
+      key->color_blend = *(state->cb);
+
+   if (state->ms) {
+      key->rasterization_samples = state->ms->rasterization_samples;
+      key->static_sample_mask = state->ms->sample_mask;
+   }
+
+   /* Depth writes are removed unless there's an actual attachment */
+   key->has_depth = state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED;
+}
+
+static void
+kk_hash_graphics_state(struct vk_physical_device *device,
+                       const struct vk_graphics_pipeline_state *state,
+                       const struct vk_features *enabled_features,
+                       VkShaderStageFlags stages, blake3_hash blake3_out)
+{
+   struct mesa_blake3 blake3_ctx;
+   _mesa_blake3_init(&blake3_ctx);
+
+   if (stages & VK_SHADER_STAGE_VERTEX_BIT) {
+      struct kk_vs_key key;
+      kk_populate_vs_key(&key, state);
+      _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
+   }
+
+   if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
+      struct kk_fs_key key;
+      kk_populate_fs_key(&key, state);
+      _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
+
+      _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask,
+                          sizeof(state->rp->view_mask));
+   }
+
+   _mesa_blake3_final(&blake3_ctx, blake3_out);
+}
+
+static void
+shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+   assert(glsl_type_is_vector_or_scalar(type));
+
+   uint32_t comp_size =
+      glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+   unsigned length = glsl_get_vector_elements(type);
+   *size = comp_size * length, *align = comp_size;
+}
+
+struct lower_ycbcr_state {
+   uint32_t set_layout_count;
+   struct vk_descriptor_set_layout *const *set_layouts;
+};
+
+static const struct vk_ycbcr_conversion_state *
+lookup_ycbcr_conversion(const void *_state, uint32_t set, uint32_t binding,
+                        uint32_t array_index)
+{
+   const struct lower_ycbcr_state *state = _state;
+   assert(set < state->set_layout_count);
+   assert(state->set_layouts[set] != NULL);
+   const struct kk_descriptor_set_layout *set_layout =
+      vk_to_kk_descriptor_set_layout(state->set_layouts[set]);
+   assert(binding < set_layout->binding_count);
+
+   const struct kk_descriptor_set_binding_layout *bind_layout =
+      &set_layout->binding[binding];
+
+   if (bind_layout->immutable_samplers == NULL)
+      return NULL;
+
+   array_index = MIN2(array_index, bind_layout->array_size - 1);
+
+   const struct kk_sampler *sampler =
+      bind_layout->immutable_samplers[array_index];
+
+   return sampler && sampler->vk.ycbcr_conversion
+             ? &sampler->vk.ycbcr_conversion->state
+             : NULL;
+}
+
+static int
+type_size_vec4(const struct glsl_type *type, bool bindless)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
+static bool
+kk_nir_swizzle_fragment_output(nir_builder *b, nir_intrinsic_instr *intrin,
+                               void *data)
+{
+   if (intrin->intrinsic != nir_intrinsic_store_output &&
+       intrin->intrinsic != nir_intrinsic_load_output)
+      return false;
+
+   unsigned slot = nir_intrinsic_io_semantics(intrin).location;
+   if (slot < FRAG_RESULT_DATA0)
+      return false;
+
+   const struct vk_graphics_pipeline_state *state =
+      (const struct vk_graphics_pipeline_state *)data;
+   VkFormat vk_format =
+      state->rp->color_attachment_formats[slot - FRAG_RESULT_DATA0];
+   if (vk_format == VK_FORMAT_UNDEFINED)
+      return false;
+
+   enum pipe_format format = vk_format_to_pipe_format(vk_format);
+   const struct kk_va_format *supported_format = kk_get_va_format(format);
+
+   /* Check if we have to apply any swizzle */
+   if (!supported_format->is_native) {
+      unsigned channel_swizzle[] = {
+         supported_format->swizzle.red, supported_format->swizzle.green,
+         supported_format->swizzle.blue, supported_format->swizzle.alpha};
+
+      if (intrin->intrinsic == nir_intrinsic_store_output) {
+         b->cursor = nir_before_instr(&intrin->instr);
+         nir_def *to_replace = intrin->src[0].ssa;
+         nir_def *swizzled = nir_swizzle(b, to_replace, channel_swizzle,
+                                         to_replace->num_components);
+         nir_src_rewrite(&intrin->src[0], swizzled);
+      } else {
+         unsigned channel_unswizzle[4] = {0u};
+         for (uint32_t i = 0u; i < 4; ++i)
+            channel_unswizzle[channel_swizzle[i]] = i;
+
+         b->cursor = nir_after_instr(&intrin->instr);
+         nir_def *to_replace = &intrin->def;
+         nir_def *swizzled = nir_swizzle(b, to_replace, channel_unswizzle,
+                                         to_replace->num_components);
+         nir_def_rewrite_uses_after(to_replace, swizzled);
+      }
+      return true;
+   }
+
+   return false;
+}
+
+static void
+kk_lower_vs_vbo(nir_shader *nir, const struct vk_graphics_pipeline_state *state)
+{
+   assert(!(nir->info.inputs_read & BITFIELD64_MASK(VERT_ATTRIB_GENERIC0)) &&
+          "Fixed-function attributes not used in Vulkan");
+   NIR_PASS(_, nir, nir_recompute_io_bases, nir_var_shader_in);
+   /* the shader_out portion of this is load-bearing even for tess eval */
+   NIR_PASS(_, nir, nir_io_add_const_offset_to_base,
+            nir_var_shader_in | nir_var_shader_out);
+
+   struct kk_attribute attributes[KK_MAX_ATTRIBS] = {};
+   uint64_t attribs_read = nir->info.inputs_read >> VERT_ATTRIB_GENERIC0;
+   u_foreach_bit(i, state->vi->attributes_valid) {
+      const struct vk_vertex_attribute_state *attr = &state->vi->attributes[i];
+      assert(state->vi->bindings_valid & BITFIELD_BIT(attr->binding));
+      const struct vk_vertex_binding_state *binding =
+         &state->vi->bindings[attr->binding];
+
+      /* nir_assign_io_var_locations compacts vertex inputs, eliminating
+       * unused inputs. We need to do the same here to match the locations.
+       */
+      unsigned slot = util_bitcount64(attribs_read & BITFIELD_MASK(i));
+      attributes[slot].divisor = binding->divisor;
+      attributes[slot].binding = attr->binding;
+      attributes[slot].format = vk_format_to_pipe_format(attr->format);
+      attributes[slot].buf = attr->binding;
+      attributes[slot].instanced =
+         binding->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
+   }
+   NIR_PASS(_, nir, kk_nir_lower_vbo, attributes);
+}
+
+static void
+kk_lower_vs(nir_shader *nir, const struct vk_graphics_pipeline_state *state)
+{
+   if (state->ia->primitive_topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
+      nir_shader_intrinsics_pass(nir, msl_nir_vs_remove_point_size_write,
+                                 nir_metadata_control_flow, NULL);
+
+   NIR_PASS(_, nir, msl_nir_layer_id_type);
+}
+
+static void
+kk_lower_fs_blend(nir_shader *nir,
+                  const struct vk_graphics_pipeline_state *state)
+{
+   nir_lower_blend_options opts = {
+      .scalar_blend_const = false,
+      .logicop_enable = state->cb->logic_op_enable,
+      .logicop_func = state->cb->logic_op,
+   };
+
+   static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync");
+
+   for (unsigned i = 0; i < ARRAY_SIZE(opts.format); ++i) {
+      opts.format[i] =
+         vk_format_to_pipe_format(state->rp->color_attachment_formats[i]);
+      if (state->cb->attachments[i].blend_enable) {
+         opts.rt[i] = (nir_lower_blend_rt){
+            .rgb.src_factor = vk_blend_factor_to_pipe(
+               state->cb->attachments[i].src_color_blend_factor),
+            .rgb.dst_factor = vk_blend_factor_to_pipe(
+               state->cb->attachments[i].dst_color_blend_factor),
+            .rgb.func =
+               vk_blend_op_to_pipe(state->cb->attachments[i].color_blend_op),
+
+            .alpha.src_factor = vk_blend_factor_to_pipe(
+               state->cb->attachments[i].src_alpha_blend_factor),
+            .alpha.dst_factor = vk_blend_factor_to_pipe(
+               state->cb->attachments[i].dst_alpha_blend_factor),
+            .alpha.func =
+               vk_blend_op_to_pipe(state->cb->attachments[i].alpha_blend_op),
+
+            .colormask = state->cb->attachments[i].write_mask,
+         };
+      } else {
+         opts.rt[i] = (nir_lower_blend_rt){
+            .rgb.src_factor = PIPE_BLENDFACTOR_ONE,
+            .rgb.dst_factor = PIPE_BLENDFACTOR_ZERO,
+            .rgb.func = PIPE_BLEND_ADD,
+
+            .alpha.src_factor = PIPE_BLENDFACTOR_ONE,
+            .alpha.dst_factor = PIPE_BLENDFACTOR_ZERO,
+            .alpha.func = PIPE_BLEND_ADD,
+
+            .colormask = state->cb->attachments[i].write_mask,
+         };
+      }
+   }
+   NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
+   NIR_PASS(_, nir, nir_lower_blend, &opts);
+}
+
+static bool
+lower_subpass_dim(nir_builder *b, nir_tex_instr *tex, UNUSED void *_data)
+{
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS)
+      tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+   else if (tex->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
+      tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+   else
+      return false;
+
+   return true;
+}
+
+static void
+kk_lower_fs(nir_shader *nir, const struct vk_graphics_pipeline_state *state)
+{
+   if (state->cb)
+      kk_lower_fs_blend(nir, state);
+
+   if (state->rp->depth_attachment_format == VK_FORMAT_UNDEFINED ||
+       nir->info.fs.early_fragment_tests)
+      NIR_PASS(_, nir, nir_shader_intrinsics_pass,
+               msl_nir_fs_remove_depth_write, nir_metadata_control_flow, NULL);
+
+   /* Input attachments are treated as 2D textures. Fixes sampler dimension */
+   NIR_PASS(_, nir, nir_shader_tex_pass, lower_subpass_dim, nir_metadata_all,
+            NULL);
+
+   /* Swizzle non-native formats' outputs */
+   NIR_PASS(_, nir, nir_shader_intrinsics_pass, kk_nir_swizzle_fragment_output,
+            nir_metadata_control_flow, (void *)state);
+
+   /* Metal's sample mask is uint. */
+   NIR_PASS(_, nir, msl_nir_sample_mask_type);
+
+   if (state->ms && state->ms->rasterization_samples &&
+       state->ms->sample_mask != UINT16_MAX)
+      NIR_PASS(_, nir, msl_lower_static_sample_mask, state->ms->sample_mask);
+   /* Check https://github.com/KhronosGroup/Vulkan-Portability/issues/54 for
+    * explanation on why we need this. */
+   else if (nir->info.fs.needs_full_quad_helper_invocations ||
+            nir->info.fs.needs_coarse_quad_helper_invocations)
+      NIR_PASS(_, nir, msl_lower_static_sample_mask, 0xFFFFFFFF);
+}
+
+static void
+kk_lower_nir(struct kk_device *dev, nir_shader *nir,
+             const struct vk_pipeline_robustness_state *rs,
+             uint32_t set_layout_count,
+             struct vk_descriptor_set_layout *const *set_layouts,
+             const struct vk_graphics_pipeline_state *state)
+{
+   /* Massage IO related variables to please Metal */
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      NIR_PASS(_, nir, kk_nir_lower_vs_multiview, state->rp->view_mask);
+
+      /* kk_nir_lower_vs_multiview may create a temporary array to assign the
+       * correct view index. Since we don't handle derefs, we need to get rid of
+       * them. */
+      NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 0,
+               glsl_get_natural_size_align_bytes,
+               glsl_get_natural_size_align_bytes);
+
+      NIR_PASS(_, nir, msl_ensure_vertex_position_output);
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      enum pipe_format rts[MAX_DRAW_BUFFERS] = {PIPE_FORMAT_NONE};
+      const struct vk_render_pass_state *rp = state->rp;
+      for (uint32_t i = 0u; i < MAX_DRAW_BUFFERS; ++i)
+         rts[i] = vk_format_to_pipe_format(rp->color_attachment_formats[i]);
+
+      NIR_PASS(_, nir, msl_nir_fs_force_output_signedness, rts);
+
+      NIR_PASS(_, nir, kk_nir_lower_fs_multiview, state->rp->view_mask);
+
+      if (state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED &&
+          state->ial && state->ial->depth_att != MESA_VK_ATTACHMENT_NO_INDEX) {
+         NIR_PASS(_, nir, msl_ensure_depth_write);
+      }
+   }
+
+   const struct lower_ycbcr_state ycbcr_state = {
+      .set_layout_count = set_layout_count,
+      .set_layouts = set_layouts,
+   };
+   NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion,
+            &ycbcr_state);
+
+   /* Common msl texture lowering needs to happen after ycbcr lowering and
+    * before descriptor lowering. */
+   NIR_PASS(_, nir, msl_lower_textures);
+
+   /* Lower push constants before lower_descriptors */
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
+            nir_address_format_32bit_offset);
+
+   NIR_PASS(_, nir, nir_lower_memory_model);
+
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
+            nir_address_format_64bit_global);
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+            nir_address_format_64bit_bounded_global);
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
+            nir_address_format_64bit_bounded_global);
+
+   NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+            type_size_vec4,
+            nir_lower_io_lower_64bit_to_32 |
+               nir_lower_io_use_interpolated_input_intrinsics);
+
+   if (!nir->info.shared_memory_explicit_layout) {
+      NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
+               shared_var_info);
+   }
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
+            nir_address_format_32bit_offset);
+
+   if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
+      /* QMD::SHARED_MEMORY_SIZE requires an alignment of 256B so it's safe to
+       * align everything up to 16B so we can write whole vec4s.
+       */
+      nir->info.shared_size = align(nir->info.shared_size, 16);
+      NIR_PASS(_, nir, nir_zero_initialize_shared_memory, nir->info.shared_size,
+               16);
+
+      /* We need to call lower_compute_system_values again because
+       * nir_zero_initialize_shared_memory generates load_invocation_id which
+       * has to be lowered to load_invocation_index.
+       */
+      NIR_PASS(_, nir, nir_lower_compute_system_values, NULL);
+   }
+
+   NIR_PASS(_, nir, nir_opt_dce);
+   NIR_PASS(_, nir, nir_lower_variable_initializers, ~nir_var_function_temp);
+   NIR_PASS(_, nir, nir_remove_dead_variables,
+            nir_var_shader_in | nir_var_shader_out | nir_var_system_value,
+            NULL);
+   nir->info.io_lowered = true;
+
+   /* Required before kk_nir_lower_vbo so load_input intrinsics' parents are
+    * load_const, otherwise the pass will complain */
+   NIR_PASS(_, nir, nir_opt_constant_folding);
+
+   /* These passes operate on lowered IO. */
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      kk_lower_vs(nir, state);
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      kk_lower_fs(nir, state);
+   }
+
+   /* Descriptor lowering needs to happen after lowering blend since we will
+    * generate a nir_intrinsic_load_blend_const_color_rgba which gets lowered by
+    * the lower descriptor pass
+    */
+   NIR_PASS(_, nir, kk_nir_lower_descriptors, rs, set_layout_count,
+            set_layouts);
+   NIR_PASS(_, nir, kk_nir_lower_textures);
+
+   NIR_PASS(_, nir, nir_lower_global_vars_to_local);
+}
+
+static const struct vk_shader_ops kk_shader_ops;
+
+static void
+kk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader,
+                  const VkAllocationCallbacks *pAllocator)
+{
+   struct kk_device *dev = container_of(vk_dev, struct kk_device, vk);
+   struct kk_shader *shader = container_of(vk_shader, struct kk_shader, vk);
+
+   if (shader->pipeline.cs) {
+      mtl_release(shader->pipeline.cs);
+   } else if (shader->pipeline.gfx.handle) {
+      mtl_release(shader->pipeline.gfx.handle);
+      if (shader->pipeline.gfx.mtl_depth_stencil_state_handle)
+         mtl_release(shader->pipeline.gfx.mtl_depth_stencil_state_handle);
+      shader->pipeline.gfx.handle = NULL;
+      shader->pipeline.gfx.mtl_depth_stencil_state_handle = NULL;
+   }
+
+   ralloc_free((void *)shader->msl_code);
+   ralloc_free((void *)shader->entrypoint_name);
+
+   vk_shader_free(&dev->vk, pAllocator, &shader->vk);
+}
+
+static bool
+gather_vs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   if (intr->intrinsic != nir_intrinsic_load_input)
+      return false;
+
+   struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
+   BITSET_WORD *attribs_read = data;
+   BITSET_SET(attribs_read, (io.location - VERT_ATTRIB_GENERIC0));
+   return false;
+}
+
+static void
+gather_shader_info(struct kk_shader *shader, nir_shader *nir,
+                   const struct vk_graphics_pipeline_state *state)
+{
+   shader->info.stage = nir->info.stage;
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      nir_shader_intrinsics_pass(nir, gather_vs_inputs, nir_metadata_all,
+                                 &shader->info.vs.attribs_read);
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE
+       * which is not a valid Metal layout */
+      if (nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
+         nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+   } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
+      shader->info.cs.local_size.x = nir->info.workgroup_size[0];
+      shader->info.cs.local_size.y = nir->info.workgroup_size[1];
+      shader->info.cs.local_size.z = nir->info.workgroup_size[2];
+   }
+}
+
+static void
+modify_nir_info(nir_shader *nir)
+{
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      /* Vertex attribute fetch is done in shader through argument buffers. */
+      nir->info.inputs_read = 0u;
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE
+       * which is not a valid Metal layout */
+      if (nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
+         nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+
+      /* These values are part of the declaration and go with IO. We only
+       * require the instrunctions to understand interpolation mode. */
+      BITSET_CLEAR(nir->info.system_values_read,
+                   SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
+      BITSET_CLEAR(nir->info.system_values_read,
+                   SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
+      BITSET_CLEAR(nir->info.system_values_read,
+                   SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
+      BITSET_CLEAR(nir->info.system_values_read,
+                   SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
+      BITSET_CLEAR(nir->info.system_values_read,
+                   SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
+      BITSET_CLEAR(nir->info.system_values_read,
+                   SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
+   }
+}
+
+static VkResult
+kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info,
+                  const struct vk_graphics_pipeline_state *state,
+                  const VkAllocationCallbacks *pAllocator,
+                  struct vk_shader **shader_out)
+{
+   struct kk_shader *shader;
+   VkResult result = VK_SUCCESS;
+
+   /* We consume the NIR, regardless of success or failure */
+   nir_shader *nir = info->nir;
+
+   shader = vk_shader_zalloc(&dev->vk, &kk_shader_ops, info->stage, pAllocator,
+                             sizeof(*shader));
+   if (shader == NULL) {
+      ralloc_free(nir);
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   if (nir->info.io_lowered == false)
+      kk_lower_nir(dev, nir, info->robustness, info->set_layout_count,
+                   info->set_layouts, state);
+
+   gather_shader_info(shader, nir, state);
+
+   /* VBO lowering needs to go here otherwise, the linking step removes all
+    * inputs since we read vertex attributes from UBOs. */
+   if (info->stage == MESA_SHADER_VERTEX) {
+      kk_lower_vs_vbo(nir, state);
+   }
+   msl_optimize_nir(nir);
+   modify_nir_info(nir);
+   shader->msl_code = nir_to_msl(nir, NULL);
+   const char *entrypoint_name = nir_shader_get_entrypoint(nir)->function->name;
+
+   /* We need to steal so it doesn't get destroyed with the nir. Needs to happen
+    * after nir_to_msl since that's where we rename the entrypoint.
+    */
+   ralloc_steal(NULL, (void *)entrypoint_name);
+   shader->entrypoint_name = entrypoint_name;
+
+   if (KK_DEBUG(MSL))
+      mesa_logi("%s\n", shader->msl_code);
+
+   ralloc_free(nir);
+
+   *shader_out = &shader->vk;
+
+   return result;
+}
+
+static const struct vk_pipeline_robustness_state rs_none = {
+   .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+   .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+   .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
+};
+
+VkResult
+kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir,
+                      const VkAllocationCallbacks *alloc,
+                      struct kk_shader **shader_out)
+{
+   const struct kk_physical_device *pdev = kk_device_physical(dev);
+
+   assert(nir->info.stage == MESA_SHADER_COMPUTE);
+   if (nir->options == NULL)
+      nir->options = kk_get_nir_options((struct vk_physical_device *)&pdev->vk,
+                                        nir->info.stage, &rs_none);
+
+   struct vk_shader_compile_info info = {
+      .stage = nir->info.stage,
+      .nir = nir,
+      .robustness = &rs_none,
+   };
+
+   struct vk_shader *shader = NULL;
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+   VkResult result = kk_compile_shader(dev, &info, NULL, alloc, &shader);
+   if (result != VK_SUCCESS)
+      return result;
+
+   *shader_out = container_of(shader, struct kk_shader, vk);
+
+   return VK_SUCCESS;
+}
+
+static void
+nir_opts(nir_shader *nir)
+{
+   bool progress;
+
+   do {
+      progress = false;
+
+      NIR_PASS(progress, nir, nir_opt_loop);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_remove_phis);
+      NIR_PASS(progress, nir, nir_opt_dce);
+
+      NIR_PASS(progress, nir, nir_opt_if, 0);
+      NIR_PASS(progress, nir, nir_opt_dead_cf);
+      NIR_PASS(progress, nir, nir_opt_cse);
+
+      NIR_PASS(progress, nir, nir_opt_peephole_select,
+               &(nir_opt_peephole_select_options){
+                  .limit = 8,
+                  .expensive_alu_ok = true,
+                  .discard_ok = true,
+               });
+
+      NIR_PASS(progress, nir, nir_opt_phi_precision);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_io_add_const_offset_to_base,
+               nir_var_shader_in | nir_var_shader_out);
+
+      NIR_PASS(progress, nir, nir_opt_undef);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
+   } while (progress);
+}
+
+static nir_shader *
+get_empty_nir(struct kk_device *dev, mesa_shader_stage stage,
+              const struct vk_graphics_pipeline_state *state)
+{
+   nir_shader *nir = nir_shader_create(
+      NULL, stage,
+      kk_get_nir_options(&kk_device_physical(dev)->vk, stage, NULL));
+
+   nir_function *function = nir_function_create(nir, "main_entrypoint");
+   function->is_entrypoint = true;
+   nir_function_impl_create(function);
+
+   const struct vk_pipeline_robustness_state no_robustness = {
+      .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
+      .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
+      .vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
+      .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED,
+      .null_uniform_buffer_descriptor = false,
+      .null_storage_buffer_descriptor = false,
+   };
+   kk_lower_nir(dev, nir, &no_robustness, 0u, NULL, state);
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   return nir;
+}
+
+static VkResult
+kk_compile_compute_pipeline(struct kk_device *device, struct kk_shader *shader)
+{
+   uint32_t local_size_threads = shader->info.cs.local_size.x *
+                                 shader->info.cs.local_size.y *
+                                 shader->info.cs.local_size.z;
+   mtl_library *library = mtl_new_library(device->mtl_handle, shader->msl_code);
+   if (library == NULL)
+      return VK_ERROR_INVALID_SHADER_NV;
+
+   mtl_function *function =
+      mtl_new_function_with_name(library, shader->entrypoint_name);
+   shader->pipeline.cs = mtl_new_compute_pipeline_state(
+      device->mtl_handle, function, local_size_threads);
+   mtl_release(function);
+   mtl_release(library);
+
+   if (shader->pipeline.cs == NULL)
+      return VK_ERROR_INVALID_SHADER_NV;
+
+   return VK_SUCCESS;
+}
+
+static bool
+has_static_depth_stencil_state(const struct vk_graphics_pipeline_state *state)
+{
+   if (!state->ds)
+      return false;
+
+   return !(
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) |
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) |
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) |
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) |
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP) |
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) |
+      BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK));
+}
+
+mtl_depth_stencil_state *
+kk_compile_depth_stencil_state(struct kk_device *device,
+                               const struct vk_depth_stencil_state *ds,
+                               bool has_depth, bool has_stencil)
+{
+   mtl_stencil_descriptor *front = NULL;
+   mtl_stencil_descriptor *back = NULL;
+   mtl_depth_stencil_descriptor *descriptor =
+      mtl_new_depth_stencil_descriptor();
+   if (has_depth && ds->depth.test_enable) {
+      mtl_depth_stencil_descriptor_set_depth_write_enabled(
+         descriptor, ds->depth.write_enable);
+      mtl_depth_stencil_descriptor_set_depth_compare_function(
+         descriptor, ds->depth.compare_op);
+   } else {
+      /* Only way to disable is to always pass */
+      mtl_depth_stencil_descriptor_set_depth_write_enabled(descriptor, false);
+      mtl_depth_stencil_descriptor_set_depth_compare_function(
+         descriptor, VK_COMPARE_OP_ALWAYS);
+   }
+
+   if (has_stencil && ds->stencil.test_enable) {
+      back = mtl_new_stencil_descriptor();
+      mtl_stencil_descriptor_set_depth_failure_operation(
+         back, ds->stencil.back.op.depth_fail);
+      mtl_stencil_descriptor_set_stencil_failure_operation(
+         back, ds->stencil.back.op.fail);
+      mtl_stencil_descriptor_set_depth_stencil_pass_operation(
+         back, ds->stencil.back.op.pass);
+      mtl_stencil_descriptor_set_stencil_compare_function(
+         back, ds->stencil.back.op.compare);
+      mtl_stencil_descriptor_set_read_mask(back, ds->stencil.back.compare_mask);
+      mtl_stencil_descriptor_set_write_mask(back, ds->stencil.back.write_mask);
+      mtl_depth_stencil_descriptor_set_back_face_stencil(descriptor, back);
+
+      front = mtl_new_stencil_descriptor();
+      mtl_stencil_descriptor_set_depth_failure_operation(
+         front, ds->stencil.front.op.depth_fail);
+      mtl_stencil_descriptor_set_stencil_failure_operation(
+         front, ds->stencil.front.op.fail);
+      mtl_stencil_descriptor_set_depth_stencil_pass_operation(
+         front, ds->stencil.front.op.pass);
+      mtl_stencil_descriptor_set_stencil_compare_function(
+         front, ds->stencil.front.op.compare);
+      mtl_stencil_descriptor_set_read_mask(front,
+                                           ds->stencil.front.compare_mask);
+      mtl_stencil_descriptor_set_write_mask(front,
+                                            ds->stencil.front.write_mask);
+      mtl_depth_stencil_descriptor_set_front_face_stencil(descriptor, front);
+   }
+
+   mtl_depth_stencil_state *state =
+      mtl_new_depth_stencil_state(device->mtl_handle, descriptor);
+
+   if (front)
+      mtl_release(front);
+   if (back)
+      mtl_release(back);
+   mtl_release(descriptor);
+
+   return state;
+}
+
+/* TODO_KOSMICKRISP For now we just support vertex and fragment */
+static VkResult
+kk_compile_graphics_pipeline(struct kk_device *device,
+                             struct kk_shader *vertex_shader,
+                             struct kk_shader *fragment_shader,
+                             const struct vk_graphics_pipeline_state *state)
+{
+   VkResult result = VK_SUCCESS;
+
+   assert(vertex_shader->info.stage == MESA_SHADER_VERTEX &&
+          fragment_shader->info.stage == MESA_SHADER_FRAGMENT);
+
+   mtl_library *vertex_library =
+      mtl_new_library(device->mtl_handle, vertex_shader->msl_code);
+   if (vertex_library == NULL)
+      return VK_ERROR_INVALID_SHADER_NV;
+
+   mtl_function *vertex_function = mtl_new_function_with_name(
+      vertex_library, vertex_shader->entrypoint_name);
+
+   mtl_library *fragment_library =
+      mtl_new_library(device->mtl_handle, fragment_shader->msl_code);
+   if (fragment_library == NULL) {
+      result = VK_ERROR_INVALID_SHADER_NV;
+      goto destroy_vertex;
+   }
+   mtl_function *fragment_function = mtl_new_function_with_name(
+      fragment_library, fragment_shader->entrypoint_name);
+
+   mtl_render_pipeline_descriptor *pipeline_descriptor =
+      mtl_new_render_pipeline_descriptor();
+   mtl_render_pipeline_descriptor_set_vertex_shader(pipeline_descriptor,
+                                                    vertex_function);
+   if (fragment_function)
+      mtl_render_pipeline_descriptor_set_fragment_shader(pipeline_descriptor,
+                                                         fragment_function);
+   /* Layered rendering in Metal requires setting primitive topology class */
+   mtl_render_pipeline_descriptor_set_input_primitive_topology(
+      pipeline_descriptor,
+      vk_primitive_topology_to_mtl_primitive_topology_class(
+         state->ia->primitive_topology));
+
+   for (uint8_t i = 0; i < state->rp->color_attachment_count; ++i) {
+      if (state->rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
+         mtl_render_pipeline_descriptor_set_color_attachment_format(
+            pipeline_descriptor, i,
+            vk_format_to_mtl_pixel_format(
+               state->rp->color_attachment_formats[i]));
+   }
+
+   if (state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED)
+      mtl_render_pipeline_descriptor_set_depth_attachment_format(
+         pipeline_descriptor,
+         vk_format_to_mtl_pixel_format(state->rp->depth_attachment_format));
+
+   if (state->rp->stencil_attachment_format != VK_FORMAT_UNDEFINED)
+      mtl_render_pipeline_descriptor_set_stencil_attachment_format(
+         pipeline_descriptor,
+         vk_format_to_mtl_pixel_format(state->rp->stencil_attachment_format));
+
+   if (has_static_depth_stencil_state(state)) {
+      bool has_depth =
+         state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED;
+      bool has_stencil =
+         state->rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
+      vertex_shader->pipeline.gfx.mtl_depth_stencil_state_handle =
+         kk_compile_depth_stencil_state(device, state->ds, has_depth,
+                                        has_stencil);
+   }
+
+   if (state->rp->view_mask) {
+      uint32_t max_amplification = util_bitcount(state->rp->view_mask);
+      mtl_render_pipeline_descriptor_set_max_vertex_amplification_count(
+         pipeline_descriptor, max_amplification);
+   }
+
+   if (state->ms) {
+      mtl_render_pipeline_descriptor_set_raster_sample_count(
+         pipeline_descriptor, state->ms->rasterization_samples);
+      mtl_render_pipeline_descriptor_set_alpha_to_coverage(
+         pipeline_descriptor, state->ms->alpha_to_coverage_enable);
+      mtl_render_pipeline_descriptor_set_alpha_to_one(
+         pipeline_descriptor, state->ms->alpha_to_one_enable);
+   }
+
+   vertex_shader->pipeline.gfx.handle =
+      mtl_new_render_pipeline(device->mtl_handle, pipeline_descriptor);
+   if (vertex_shader->pipeline.gfx.handle == NULL)
+      result = VK_ERROR_INVALID_SHADER_NV;
+   vertex_shader->pipeline.gfx.primitive_type =
+      vk_primitive_topology_to_mtl_primitive_type(
+         state->ia->primitive_topology);
+
+   mtl_release(pipeline_descriptor);
+   mtl_release(fragment_function);
+   mtl_release(fragment_library);
+destroy_vertex:
+   mtl_release(vertex_function);
+   mtl_release(vertex_library);
+
+   return result;
+}
+
+static VkResult
+kk_compile_shaders(struct vk_device *device, uint32_t shader_count,
+                   struct vk_shader_compile_info *infos,
+                   const struct vk_graphics_pipeline_state *state,
+                   const struct vk_features *enabled_features,
+                   const VkAllocationCallbacks *pAllocator,
+                   struct vk_shader **shaders_out)
+{
+   VkResult result = VK_SUCCESS;
+   struct kk_device *dev = container_of(device, struct kk_device, vk);
+
+   /* Vulkan doesn't enforce a fragment shader to build pipelines. We may need
+    * to create one. */
+   nir_shader *null_fs = NULL;
+   nir_shader *shaders[shader_count + 1u];
+
+   /* Lower shaders, notably lowering IO. This is a prerequisite for intershader
+    * optimization. */
+   for (uint32_t i = 0u; i < shader_count; ++i) {
+      const struct vk_shader_compile_info *info = &infos[i];
+      nir_shader *nir = info->nir;
+
+      kk_lower_nir(dev, nir, info->robustness, info->set_layout_count,
+                   info->set_layouts, state);
+
+      shaders[i] = nir;
+   }
+
+   /* Since we don't support GPL nor shader objects and Metal render pipelines
+    * require both vertex and fragment, we may need to provide a pass-through
+    * fragment. */
+   if (state &&
+       shaders[shader_count - 1u]->info.stage != MESA_SHADER_FRAGMENT) {
+      null_fs = get_empty_nir(dev, MESA_SHADER_FRAGMENT, state);
+      shaders[shader_count] = null_fs;
+   }
+
+   uint32_t total_shaders = null_fs ? shader_count + 1 : shader_count;
+   nir_opt_varyings_bulk(shaders, total_shaders, true, UINT32_MAX, UINT32_MAX,
+                         nir_opts);
+   /* Second pass is required because some dEQP-VK.glsl.matrix.sub.dynamic.*
+    * would fail otherwise due to vertex outputting vec4 while fragments reading
+    * vec3 when in reality only vec3 is needed. */
+   nir_opt_varyings_bulk(shaders, total_shaders, true, UINT32_MAX, UINT32_MAX,
+                         nir_opts);
+
+   for (uint32_t i = 0; i < shader_count; i++) {
+      result =
+         kk_compile_shader(dev, &infos[i], state, pAllocator, &shaders_out[i]);
+      if (result != VK_SUCCESS) {
+         /* Clean up all the shaders before this point */
+         for (uint32_t j = 0; j < i; j++)
+            kk_shader_destroy(&dev->vk, shaders_out[j], pAllocator);
+
+         /* Clean up all the NIR after this point */
+         for (uint32_t j = i + 1; j < shader_count; j++)
+            ralloc_free(shaders[j]);
+
+         if (null_fs)
+            ralloc_free(null_fs);
+
+         /* Memset the output array */
+         memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
+
+         return result;
+      }
+   }
+
+   /* Compile pipeline:
+    * 1. Compute pipeline
+    * 2. Graphics with all stages (since we don't support GPL nor shader
+    * objects for now). This will be addressed later.
+    */
+   if (shaders_out[0]->stage == MESA_SHADER_COMPUTE) {
+      result = kk_compile_compute_pipeline(
+         dev, container_of(shaders_out[0], struct kk_shader, vk));
+   } else {
+      struct kk_shader *vs = container_of(shaders_out[0], struct kk_shader, vk);
+      struct kk_shader *fs =
+         container_of(shaders_out[shader_count - 1u], struct kk_shader, vk);
+      if (null_fs) {
+         struct vk_shader_compile_info info = {
+            .stage = MESA_SHADER_FRAGMENT,
+            .nir = null_fs,
+            .robustness = &rs_none,
+         };
+         struct vk_shader *frag_shader;
+         result =
+            kk_compile_shader(dev, &info, state, &dev->vk.alloc, &frag_shader);
+
+         if (result != VK_SUCCESS) {
+            for (uint32_t i = 0; i < shader_count; i++)
+               kk_shader_destroy(&dev->vk, shaders_out[i], pAllocator);
+
+            /* Memset the output array */
+            memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
+
+            return result;
+         }
+         fs = container_of(frag_shader, struct kk_shader, vk);
+      }
+
+      result = kk_compile_graphics_pipeline(dev, vs, fs, state);
+
+      if (null_fs)
+         kk_shader_destroy(&dev->vk, &fs->vk, pAllocator);
+   }
+
+   return result;
+}
+
+static bool
+kk_shader_serialize(struct vk_device *vk_dev, const struct vk_shader *vk_shader,
+                    struct blob *blob)
+{
+   struct kk_shader *shader = container_of(vk_shader, struct kk_shader, vk);
+
+   blob_write_bytes(blob, &shader->info, sizeof(shader->info));
+   uint32_t entrypoint_length = strlen(shader->entrypoint_name) + 1;
+   blob_write_bytes(blob, &entrypoint_length, sizeof(entrypoint_length));
+   uint32_t code_length = strlen(shader->msl_code) + 1;
+   blob_write_bytes(blob, &code_length, sizeof(code_length));
+   blob_write_bytes(blob, shader->entrypoint_name, entrypoint_length);
+   blob_write_bytes(blob, shader->msl_code, code_length);
+   blob_write_bytes(blob, &shader->pipeline, sizeof(shader->pipeline));
+
+   /* We are building a new shader into the cache so we need to retain resources
+    */
+   if (shader->info.stage == MESA_SHADER_COMPUTE)
+      mtl_retain(shader->pipeline.cs);
+   else if (shader->info.stage == MESA_SHADER_VERTEX) {
+      mtl_retain(shader->pipeline.gfx.handle);
+      if (shader->pipeline.gfx.mtl_depth_stencil_state_handle)
+         mtl_retain(shader->pipeline.gfx.mtl_depth_stencil_state_handle);
+   }
+
+   return !blob->out_of_memory;
+}
+
+static VkResult
+kk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
+                      uint32_t binary_version,
+                      const VkAllocationCallbacks *pAllocator,
+                      struct vk_shader **shader_out)
+{
+   struct kk_device *dev = container_of(vk_dev, struct kk_device, vk);
+   struct kk_shader *shader;
+
+   struct kk_shader_info info;
+   blob_copy_bytes(blob, &info, sizeof(info));
+
+   const uint32_t entrypoint_length = blob_read_uint32(blob);
+   const uint32_t code_length = blob_read_uint32(blob);
+   if (blob->overrun)
+      return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+
+   shader = vk_shader_zalloc(&dev->vk, &kk_shader_ops, info.stage, pAllocator,
+                             sizeof(*shader));
+   if (shader == NULL)
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   shader->info = info;
+
+   shader->entrypoint_name = ralloc_array(NULL, char, entrypoint_length);
+   if (shader->entrypoint_name == NULL) {
+      kk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   shader->msl_code = ralloc_array(NULL, char, code_length);
+   if (shader->msl_code == NULL) {
+      kk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   blob_copy_bytes(blob, (void *)shader->entrypoint_name, entrypoint_length);
+   blob_copy_bytes(blob, (void *)shader->msl_code, code_length);
+   blob_copy_bytes(blob, &shader->pipeline, sizeof(shader->pipeline));
+   if (blob->overrun) {
+      kk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+      return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+   }
+
+   /* We are building a new shader so we need to retain resources */
+   if (info.stage == MESA_SHADER_COMPUTE)
+      mtl_retain(shader->pipeline.cs);
+   else if (info.stage == MESA_SHADER_VERTEX) {
+      mtl_retain(shader->pipeline.gfx.handle);
+      if (shader->pipeline.gfx.mtl_depth_stencil_state_handle)
+         mtl_retain(shader->pipeline.gfx.mtl_depth_stencil_state_handle);
+   }
+
+   *shader_out = &shader->vk;
+
+   return VK_SUCCESS;
+}
+
+static void
+kk_cmd_bind_compute_shader(struct kk_cmd_buffer *cmd, struct kk_shader *shader)
+{
+   cmd->state.cs.pipeline_state = shader->pipeline.cs;
+   cmd->state.cs.dirty |= KK_DIRTY_PIPELINE;
+   cmd->state.cs.local_size = shader->info.cs.local_size;
+}
+
+static void
+kk_cmd_bind_graphics_shader(struct kk_cmd_buffer *cmd,
+                            const mesa_shader_stage stage,
+                            struct kk_shader *shader)
+{
+   /* Relevant pipeline data is only stored in vertex shaders */
+   if (stage != MESA_SHADER_VERTEX)
+      return;
+
+   cmd->state.gfx.primitive_type = shader->pipeline.gfx.primitive_type;
+   cmd->state.gfx.pipeline_state = shader->pipeline.gfx.handle;
+   cmd->state.gfx.vb.attribs_read = shader->info.vs.attribs_read;
+
+   bool requires_dynamic_depth_stencil =
+      shader->pipeline.gfx.mtl_depth_stencil_state_handle == NULL;
+   if (cmd->state.gfx.is_depth_stencil_dynamic) {
+      /* If we are switching from dynamic to static, we need to clean up
+       * temporary state. Otherwise, leave the existing dynamic state
+       * untouched.
+       */
+      if (!requires_dynamic_depth_stencil) {
+         mtl_release(cmd->state.gfx.depth_stencil_state);
+         cmd->state.gfx.depth_stencil_state =
+            shader->pipeline.gfx.mtl_depth_stencil_state_handle;
+      }
+   } else
+      cmd->state.gfx.depth_stencil_state =
+         shader->pipeline.gfx.mtl_depth_stencil_state_handle;
+   cmd->state.gfx.is_depth_stencil_dynamic = requires_dynamic_depth_stencil;
+   cmd->state.gfx.dirty |= KK_DIRTY_PIPELINE;
+}
+
+static void
+kk_cmd_bind_shaders(struct vk_command_buffer *cmd_buffer, uint32_t stage_count,
+                    const mesa_shader_stage *stages,
+                    struct vk_shader **const shaders)
+{
+   struct kk_cmd_buffer *cmd =
+      container_of(cmd_buffer, struct kk_cmd_buffer, vk);
+
+   for (uint32_t i = 0; i < stage_count; i++) {
+      struct kk_shader *shader = container_of(shaders[i], struct kk_shader, vk);
+
+      if (stages[i] == MESA_SHADER_COMPUTE || stages[i] == MESA_SHADER_KERNEL)
+         kk_cmd_bind_compute_shader(cmd, shader);
+      else
+         kk_cmd_bind_graphics_shader(cmd, stages[i], shader);
+   }
+}
+
+static VkResult
+kk_shader_get_executable_properties(
+   UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
+   uint32_t *executable_count, VkPipelineExecutablePropertiesKHR *properties)
+{
+   VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, properties,
+                          executable_count);
+
+   return vk_outarray_status(&out);
+}
+
+static VkResult
+kk_shader_get_executable_statistics(
+   UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
+   uint32_t executable_index, uint32_t *statistic_count,
+   VkPipelineExecutableStatisticKHR *statistics)
+{
+   /* TODO_KOSMICKRISP */
+   return VK_SUCCESS;
+}
+
+static VkResult
+kk_shader_get_executable_internal_representations(
+   UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
+   uint32_t executable_index, uint32_t *internal_representation_count,
+   VkPipelineExecutableInternalRepresentationKHR *internal_representations)
+{
+   /* TODO_KOSMICKRISP */
+   return VK_SUCCESS;
+}
+
+static const struct vk_shader_ops kk_shader_ops = {
+   .destroy = kk_shader_destroy,
+   .serialize = kk_shader_serialize,
+   .get_executable_properties = kk_shader_get_executable_properties,
+   .get_executable_statistics = kk_shader_get_executable_statistics,
+   .get_executable_internal_representations =
+      kk_shader_get_executable_internal_representations,
+};
+
+const struct vk_device_shader_ops kk_device_shader_ops = {
+   .get_nir_options = kk_get_nir_options,
+   .get_spirv_options = kk_get_spirv_options,
+   .preprocess_nir = kk_preprocess_nir,
+   .hash_state = kk_hash_graphics_state,
+   .compile =
+      kk_compile_shaders, /* This will only generate the MSL string we need to
+                             use for actual library generation */
+   .deserialize = kk_deserialize_shader,
+   .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
+   .cmd_bind_shaders = kk_cmd_bind_shaders,
+};
+\ No newline at end of file